├── mmdet ├── ops │ ├── dcn │ │ ├── modules │ │ │ └── __init__.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── deform_pool.py │ │ ├── setup.py │ │ ├── __init__.py │ │ └── src │ │ │ └── deform_pool_cuda.cpp │ ├── nms │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── Makefile │ │ ├── gpu_nms.hpp │ │ ├── gpu_nms.pyx │ │ ├── nms_wrapper.py │ │ ├── cpu_nms.pyx │ │ └── setup.py │ ├── roi_pool │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_pool.py │ │ ├── __init__.py │ │ ├── setup.py │ │ ├── gradcheck.py │ │ └── src │ │ │ └── roi_pool_cuda.cpp │ ├── roi_align │ │ ├── functions │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── modules │ │ │ ├── __init__.py │ │ │ └── roi_align.py │ │ ├── __init__.py │ │ ├── setup.py │ │ ├── gradcheck.py │ │ └── src │ │ │ └── roi_align_cuda.cpp │ └── __init__.py ├── models │ ├── necks │ │ └── __init__.py │ ├── mask_heads │ │ └── __init__.py │ ├── roi_extractors │ │ ├── __init__.py │ │ └── single_level.py │ ├── bbox_heads │ │ └── __init__.py │ ├── anchor_heads │ │ ├── __init__.py │ │ ├── retina_head.py │ │ └── rpn_head.py │ ├── backbones │ │ └── __init__.py │ ├── utils │ │ ├── __init__.py │ │ ├── weight_init.py │ │ ├── norm.py │ │ └── conv_module.py │ ├── detectors │ │ ├── __init__.py │ │ ├── retinanet.py │ │ ├── faster_rcnn.py │ │ ├── mask_rcnn.py │ │ ├── fast_rcnn.py │ │ ├── single_stage.py │ │ └── rpn.py │ ├── __init__.py │ ├── registry.py │ └── builder.py ├── __init__.py ├── core │ ├── mask │ │ ├── __init__.py │ │ ├── utils.py │ │ └── mask_target.py │ ├── anchor │ │ └── __init__.py │ ├── bbox │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── base_assigner.py │ │ │ └── assign_result.py │ │ ├── samplers │ │ │ ├── combined_sampler.py │ │ │ ├── __init__.py │ │ │ ├── sampling_result.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── random_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ └── base_sampler.py │ │ ├── __init__.py │ │ ├── assign_sampling.py │ │ ├── geometry.py │ │ └── bbox_target.py │ ├── utils │ │ ├── __init__.py │ │ ├── misc.py │ │ └── dist_utils.py │ ├── __init__.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ └── merge_augs.py │ ├── loss │ │ └── __init__.py │ └── evaluation │ │ ├── __init__.py │ │ └── bbox_overlaps.py ├── datasets │ ├── loader │ │ ├── __init__.py │ │ └── build_loader.py │ ├── SAR.py │ ├── repeat_dataset.py │ ├── voc.py │ ├── __init__.py │ ├── concat_dataset.py │ ├── xml_style.py │ └── transforms.py └── apis │ ├── __init__.py │ ├── env.py │ ├── inference.py │ └── train.py ├── demo ├── 1.png ├── 2.png ├── 3.png ├── 4.png ├── V2.png ├── V3.png ├── V4.png ├── v1.png └── coco_test_12510.jpg ├── .travis.yml ├── tools ├── dist_train.sh ├── coco_eval.py ├── voc_eval.py └── train.py ├── compile.sh ├── ssd_debug ├── compute_mean.py ├── assign_sample_debug.py └── test_ssd流程.py ├── test_images.py ├── INSTALL.md ├── .gitignore ├── README.md ├── RetinaNet_debug └── compute_Receptive_field.py ├── setup.py ├── configs ├── retinanet_r101_fpn_1x.py ├── retinanet_r50_fpn_1x.py ├── retinanet_mobileV2_fpn_1x.py ├── retinanet_x101_32x4d_fpn_1x.py ├── retinanet_x101_64x4d_fpn_1x.py ├── rpn_r50_fpn_1x.py ├── rpn_r101_fpn_1x.py ├── rpn_x101_32x4d_fpn_1x.py ├── rpn_x101_64x4d_fpn_1x.py ├── fast_rcnn_r50_fpn_1x.py ├── fast_rcnn_r101_fpn_1x.py └── ssd300_coco.py └── TECHNICAL_DETAILS.md /mmdet/ops/dcn/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet/ops/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.cpp 2 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/functions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | 3 | __all__ = ['FPN'] 4 | -------------------------------------------------------------------------------- /demo/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/1.png -------------------------------------------------------------------------------- /demo/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/2.png -------------------------------------------------------------------------------- /demo/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/3.png -------------------------------------------------------------------------------- /demo/4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/4.png -------------------------------------------------------------------------------- /demo/V2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/V2.png -------------------------------------------------------------------------------- /demo/V3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/V3.png -------------------------------------------------------------------------------- /demo/V4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/V4.png -------------------------------------------------------------------------------- /demo/v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/v1.png -------------------------------------------------------------------------------- /mmdet/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | 3 | __all__ = ['FCNMaskHead'] 4 | -------------------------------------------------------------------------------- /demo/coco_test_12510.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/237014845/MobilenetV2-Retina-Pytorch/HEAD/demo/coco_test_12510.jpg -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | 3 | __all__ = ['SingleRoIExtractor'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_pool import roi_pool 2 | from .modules.roi_pool import RoIPool 3 | 4 | __all__ = ['roi_pool', 'RoIPool'] 5 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.roi_align import roi_align 2 | from .modules.roi_align import RoIAlign 3 | 4 | __all__ = ['roi_align', 'RoIAlign'] 5 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import split_combined_polys 2 | from .mask_target import mask_target 3 | 4 | __all__ = ['split_combined_polys', 'mask_target'] 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | language: python 3 | 4 | install: 5 | - pip install flake8 6 | 7 | python: 8 | - "3.5" 9 | - "3.6" 10 | 11 | script: 12 | - flake8 -------------------------------------------------------------------------------- /mmdet/ops/nms/Makefile: -------------------------------------------------------------------------------- 1 | PYTHON=${PYTHON:-python} 2 | 3 | all: 4 | echo "Compiling nms kernels..." 5 | $(PYTHON) setup.py build_ext --inplace 6 | 7 | clean: 8 | rm -f *.so 9 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import anchor_target 3 | 4 | __all__ = ['AnchorGenerator', 'anchor_target'] 5 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | $PYTHON -m torch.distributed.launch --nproc_per_node=$2 $(dirname "$0")/train.py $1 --launcher pytorch ${@:3} 6 | -------------------------------------------------------------------------------- /mmdet/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | 4 | __all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead'] 5 | -------------------------------------------------------------------------------- /mmdet/ops/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id, size_t base); 3 | size_t nms_Malloc(); 4 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .build_loader import build_dataloader 2 | from .sampler import GroupSampler, DistributedGroupSampler 3 | 4 | __all__ = [ 5 | 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader' 6 | ] 7 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_assigner import BaseAssigner 2 | from .max_iou_assigner import MaxIoUAssigner 3 | from .assign_result import AssignResult 4 | 5 | __all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult'] 6 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .rpn_head import RPNHead 3 | from .retina_head import RetinaHead 4 | from .ssd_head import SSDHead 5 | 6 | __all__ = ['AnchorHead', 'RPNHead', 'RetinaHead', 'SSDHead'] 7 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import allreduce_grads, DistOptimizerHook 2 | from .misc import tensor2imgs, unmap, multi_apply 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', 6 | 'multi_apply' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import ResNet 2 | from .resnext import ResNeXt 3 | from .ssd_vgg import SSDVGG 4 | from .mobilenet import MobileNetV2 5 | from .shufflenet import ShuffleNetV2 6 | 7 | __all__ = ['ResNet', 'ResNeXt', 'SSDVGG', 'ShuffleNetV2', 'MobileNetV2'] 8 | -------------------------------------------------------------------------------- /mmdet/datasets/SAR.py: -------------------------------------------------------------------------------- 1 | from .xml_style import XMLDataset 2 | # from .voc import VOCDataset 3 | 4 | 5 | class SARDataset(XMLDataset): 6 | 7 | CLASSES = ('ship',) 8 | 9 | def __init__(self, **kwargs): 10 | super(SARDataset, self).__init__(**kwargs) 11 | self.abc = 1 -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .env import init_dist, get_root_logger, set_random_seed 2 | from .train import train_detector 3 | from .inference import inference_detector, show_result 4 | 5 | __all__ = [ 6 | 'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector', 7 | 'inference_detector', 'show_result' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .mask import * # noqa: F401, F403 4 | from .loss import * # noqa: F401, F403 5 | from .evaluation import * # noqa: F401, F403 6 | from .post_processing import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms 2 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes, 3 | merge_aug_scores, merge_aug_masks) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores', 'merge_aug_masks' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | # 定义一个抽象基类的方法是将一个类的元类设置为abc.ABCMeta 4 | class BaseAssigner(metaclass=ABCMeta): 5 | # 用@abstractmethod声明一个基类中的函数使虚函数。除了该装饰器外,还有@abstractproperty声明一个抽象属性。 6 | @abstractmethod 7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 8 | pass 9 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_pool', 6 | ext_modules=[ 7 | CUDAExtension('roi_pool_cuda', [ 8 | 'src/roi_pool_cuda.cpp', 9 | 'src/roi_pool_kernel.cu', 10 | ]) 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv_module import ConvModule 2 | from .norm import build_norm_layer 3 | from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init, 4 | bias_init_with_prob) 5 | 6 | __all__ = [ 7 | 'ConvModule', 'build_norm_layer', 'xavier_init', 'normal_init', 8 | 'uniform_init', 'kaiming_init', 'bias_init_with_prob' 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='roi_align_cuda', 6 | ext_modules=[ 7 | CUDAExtension('roi_align_cuda', [ 8 | 'src/roi_align_cuda.cpp', 9 | 'src/roi_align_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/modules/roi_pool.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_pool import roi_pool 3 | 4 | 5 | class RoIPool(Module): 6 | 7 | def __init__(self, out_size, spatial_scale): 8 | super(RoIPool, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | 13 | def forward(self, features, rois): 14 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 15 | -------------------------------------------------------------------------------- /mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .single_stage import SingleStageDetector 3 | from .two_stage import TwoStageDetector 4 | from .rpn import RPN 5 | from .fast_rcnn import FastRCNN 6 | from .faster_rcnn import FasterRCNN 7 | from .mask_rcnn import MaskRCNN 8 | from .cascade_rcnn import CascadeRCNN 9 | from .retinanet import RetinaNet 10 | 11 | __all__ = [ 12 | 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 13 | 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'RetinaNet' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deform_conv', 6 | ext_modules=[ 7 | CUDAExtension('deform_conv_cuda', [ 8 | 'src/deform_conv_cuda.cpp', 9 | 'src/deform_conv_cuda_kernel.cu', 10 | ]), 11 | CUDAExtension('deform_pool_cuda', [ 12 | 'src/deform_pool_cuda.cpp', 'src/deform_pool_cuda_kernel.cu' 13 | ]), 14 | ], 15 | cmdclass={'build_ext': BuildExtension}) 16 | -------------------------------------------------------------------------------- /mmdet/core/loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .losses import (weighted_nll_loss, weighted_cross_entropy, 2 | weighted_binary_cross_entropy, sigmoid_focal_loss, 3 | weighted_sigmoid_focal_loss, mask_cross_entropy, 4 | smooth_l1_loss, weighted_smoothl1, accuracy) 5 | 6 | __all__ = [ 7 | 'weighted_nll_loss', 'weighted_cross_entropy', 8 | 'weighted_binary_cross_entropy', 'sigmoid_focal_loss', 9 | 'weighted_sigmoid_focal_loss', 'mask_cross_entropy', 'smooth_l1_loss', 10 | 'weighted_smoothl1', 'accuracy' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | from .single_stage import SingleStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class RetinaNet(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import gradcheck 3 | 4 | import os.path as osp 5 | import sys 6 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 7 | from roi_pool import RoIPool # noqa: E402 8 | 9 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 10 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 11 | [1, 67, 40, 110, 120]]).cuda() 12 | inputs = (feat, rois) 13 | print('Gradcheck for roi pooling...') 14 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 15 | print(test) 16 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from ..assign_sampling import build_sampler 3 | 4 | 5 | class CombinedSampler(BaseSampler): 6 | 7 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 8 | super(CombinedSampler, self).__init__(**kwargs) 9 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 10 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | -------------------------------------------------------------------------------- /mmdet/datasets/repeat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class RepeatDataset(object): 5 | 6 | def __init__(self, dataset, times): 7 | self.dataset = dataset 8 | self.times = times 9 | self.CLASSES = dataset.CLASSES 10 | if hasattr(self.dataset, 'flag'): 11 | # np.tile:就是把数组沿各个方向复制 12 | self.flag = np.tile(self.dataset.flag, times) 13 | 14 | self._ori_len = len(self.dataset) 15 | 16 | def __getitem__(self, idx): 17 | return self.dataset[idx % self._ori_len] 18 | 19 | def __len__(self): 20 | return self.times * self._ori_len 21 | -------------------------------------------------------------------------------- /compile.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | echo "Building roi align op..." 6 | cd mmdet/ops/roi_align 7 | if [ -d "build" ]; then 8 | rm -r build 9 | fi 10 | $PYTHON setup.py build_ext --inplace 11 | 12 | echo "Building roi pool op..." 13 | cd ../roi_pool 14 | if [ -d "build" ]; then 15 | rm -r build 16 | fi 17 | $PYTHON setup.py build_ext --inplace 18 | 19 | echo "Building nms op..." 20 | cd ../nms 21 | make clean 22 | make PYTHON=${PYTHON} 23 | 24 | echo "Building dcn..." 25 | cd ../dcn 26 | if [ -d "build" ]; then 27 | rm -r build 28 | fi 29 | $PYTHON setup.py build_ext --inplace 30 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.roi_align import RoIAlignFunction 3 | 4 | 5 | class RoIAlign(Module): 6 | 7 | def __init__(self, out_size, spatial_scale, sample_num=0): 8 | super(RoIAlign, self).__init__() 9 | 10 | self.out_size = out_size 11 | self.spatial_scale = float(spatial_scale) 12 | self.sample_num = int(sample_num) 13 | 14 | def forward(self, features, rois): 15 | return RoIAlignFunction.apply(features, rois, self.out_size, 16 | self.spatial_scale, self.sample_num) 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .pseudo_sampler import PseudoSampler 3 | from .random_sampler import RandomSampler 4 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 5 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 6 | from .combined_sampler import CombinedSampler 7 | from .ohem_sampler import OHEMSampler 8 | from .sampling_result import SamplingResult 9 | 10 | __all__ = [ 11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 12 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 13 | 'OHEMSampler', 'SamplingResult' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions.deform_conv import deform_conv, modulated_deform_conv 2 | from .functions.deform_pool import deform_roi_pooling 3 | from .modules.deform_conv import (DeformConv, ModulatedDeformConv, 4 | ModulatedDeformConvPack) 5 | from .modules.deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 6 | ModulatedDeformRoIPoolingPack) 7 | 8 | __all__ = [ 9 | 'DeformConv', 'DeformRoIPooling', 'DeformRoIPoolingPack', 10 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 11 | 'ModulatedDeformConvPack', 'deform_conv', 12 | 'modulated_deform_conv', 'deform_roi_pooling' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .dcn import (DeformConv, DeformRoIPooling, DeformRoIPoolingPack, 2 | ModulatedDeformRoIPoolingPack, ModulatedDeformConv, 3 | ModulatedDeformConvPack, deform_conv, modulated_deform_conv, 4 | deform_roi_pooling) 5 | from .nms import nms, soft_nms 6 | from .roi_align import RoIAlign, roi_align 7 | from .roi_pool import RoIPool, roi_pool 8 | 9 | __all__ = [ 10 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 11 | 'DeformConv', 'DeformRoIPooling', 'DeformRoIPoolingPack', 12 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 13 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 14 | 'deform_roi_pooling' 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from .xml_style import XMLDataset 2 | 3 | 4 | class VOCDataset(XMLDataset): 5 | 6 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 7 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 8 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 9 | 'tvmonitor') 10 | # CLASSES = ('ship',) 11 | 12 | 13 | def __init__(self, **kwargs): 14 | super(VOCDataset, self).__init__(**kwargs) 15 | if 'VOC2007' in self.img_prefix: 16 | self.year = 2007 17 | elif 'VOC2012' in self.img_prefix: 18 | self.year = 2012 19 | else: 20 | raise ValueError('Cannot infer dataset year from img_prefix') 21 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .necks import * # noqa: F401,F403 3 | from .roi_extractors import * # noqa: F401,F403 4 | from .anchor_heads import * # noqa: F401,F403 5 | from .bbox_heads import * # noqa: F401,F403 6 | from .mask_heads import * # noqa: F401,F403 7 | from .detectors import * # noqa: F401,F403 8 | from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS 9 | from .builder import (build_backbone, build_neck, build_roi_extractor, 10 | build_head, build_detector) 11 | 12 | __all__ = [ 13 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'HEADS', 'DETECTORS', 14 | 'build_backbone', 'build_neck', 'build_roi_extractor', 'build_head', 15 | 'build_detector' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/assign_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AssignResult(object): 5 | 6 | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None): 7 | self.num_gts = num_gts 8 | self.gt_inds = gt_inds 9 | self.max_overlaps = max_overlaps 10 | self.labels = labels 11 | 12 | def add_gt_(self, gt_labels): 13 | self_inds = torch.arange( 14 | 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device) 15 | self.gt_inds = torch.cat([self_inds, self.gt_inds]) 16 | self.max_overlaps = torch.cat( 17 | [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps]) 18 | if self.labels is not None: 19 | self.labels = torch.cat([gt_labels, self.labels]) 20 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom import CustomDataset 2 | from .xml_style import XMLDataset 3 | from .coco import CocoDataset 4 | from .voc import VOCDataset 5 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader 6 | from .utils import to_tensor, random_scale, show_ann, get_dataset 7 | from .concat_dataset import ConcatDataset 8 | from .repeat_dataset import RepeatDataset 9 | from .extra_aug import ExtraAugmentation 10 | from .SAR import SARDataset 11 | 12 | __all__ = [ 13 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'SARDataset', 'VOCDataset', 'GroupSampler', 14 | 'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale', 15 | 'show_ann', 'get_dataset', 'ConcatDataset', 'RepeatDataset', 16 | 'ExtraAugmentation', 17 | ] 18 | -------------------------------------------------------------------------------- /ssd_debug/compute_mean.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | 5 | path = '/home/hs/data/SAR/JPEGImages' 6 | 7 | 8 | def compute(path): 9 | file_names = os.listdir(path) 10 | per_image_Rmean = [] 11 | per_image_Gmean = [] 12 | per_image_Bmean = [] 13 | for file_name in file_names: 14 | img = cv2.imread(os.path.join(path, file_name), 1) 15 | per_image_Bmean.append(np.mean(img[:, :, 0])) 16 | per_image_Gmean.append(np.mean(img[:, :, 1])) 17 | per_image_Rmean.append(np.mean(img[:, :, 2])) 18 | R_mean = np.mean(per_image_Rmean) 19 | G_mean = np.mean(per_image_Gmean) 20 | B_mean = np.mean(per_image_Bmean) 21 | return R_mean, G_mean, B_mean 22 | 23 | 24 | if __name__ == '__main__': 25 | R, G, B = compute(path) 26 | print(R, G, B) 27 | 28 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FasterRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | rpn_head, 12 | bbox_roi_extractor, 13 | bbox_head, 14 | train_cfg, 15 | test_cfg, 16 | pretrained=None): 17 | super(FasterRCNN, self).__init__( 18 | backbone=backbone, 19 | neck=neck, 20 | rpn_head=rpn_head, 21 | bbox_roi_extractor=bbox_roi_extractor, 22 | bbox_head=bbox_head, 23 | train_cfg=train_cfg, 24 | test_cfg=test_cfg, 25 | pretrained=pretrained) 26 | -------------------------------------------------------------------------------- /test_images.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | from mmcv.runner import load_checkpoint 3 | from mmdet.models import build_detector 4 | from mmdet.apis import inference_detector, show_result 5 | 6 | cfg = mmcv.Config.fromfile('configs/faster_rcnn_r50_fpn_1x.py') 7 | cfg.model.pretrained = None 8 | 9 | # construct the model and load checkpoint 10 | model = build_detector(cfg.model, test_cfg=cfg.test_cfg) 11 | _ = load_checkpoint(model, 'https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth') 12 | 13 | # test a single image 14 | img = mmcv.imread('test.jpg') 15 | result = inference_detector(model, img, cfg) 16 | show_result(img, result) 17 | 18 | # test a list of images 19 | imgs = ['test1.jpg', 'test2.jpg'] 20 | for i, result in enumerate(inference_detector(model, imgs, cfg, device='cuda:0')): 21 | print(i, imgs[i]) 22 | show_result(imgs[i], result) -------------------------------------------------------------------------------- /tools/coco_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | from mmdet.core import coco_eval 4 | 5 | 6 | def main(): 7 | parser = ArgumentParser(description='COCO Evaluation') 8 | parser.add_argument('result', help='result file path') 9 | parser.add_argument('--ann', help='annotation file path') 10 | parser.add_argument( 11 | '--types', 12 | type=str, 13 | nargs='+', 14 | choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'], 15 | default=['bbox'], 16 | help='result types') 17 | parser.add_argument( 18 | '--max-dets', 19 | type=int, 20 | nargs='+', 21 | default=[100, 300, 1000], 22 | help='proposal numbers, only used for recall evaluation') 23 | args = parser.parse_args() 24 | coco_eval(args.result, args.types, args.ann, args.max_dets) 25 | 26 | 27 | if __name__ == '__main__': 28 | main() 29 | -------------------------------------------------------------------------------- /mmdet/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | 5 | class ConcatDataset(_ConcatDataset): 6 | """A wrapper of concatenated dataset. 7 | 8 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 9 | concat the group flag for image aspect ratio. 10 | 11 | Args: 12 | datasets (list[:obj:`Dataset`]): A list of datasets. 13 | """ 14 | 15 | def __init__(self, datasets): 16 | super(ConcatDataset, self).__init__(datasets) 17 | self.CLASSES = datasets[0].CLASSES 18 | # 判断对象datasets[0]中是否存在 flag 属性,有则返回True 19 | # flag 表示 数据库中宽比高大的图像的数量 20 | # train 的图像 都是宽比高大 21 | if hasattr(datasets[0], 'flag'): 22 | flags = [] 23 | for i in range(0, len(datasets)): 24 | flags.append(datasets[i].flag) 25 | self.flag = np.concatenate(flags) 26 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class MaskRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | rpn_head, 12 | bbox_roi_extractor, 13 | bbox_head, 14 | mask_roi_extractor, 15 | mask_head, 16 | train_cfg, 17 | test_cfg, 18 | pretrained=None): 19 | super(MaskRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | rpn_head=rpn_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | mask_roi_extractor=mask_roi_extractor, 26 | mask_head=mask_head, 27 | train_cfg=train_cfg, 28 | test_cfg=test_cfg, 29 | pretrained=pretrained) 30 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd import gradcheck 4 | 5 | import os.path as osp 6 | import sys 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_align import RoIAlign # noqa: E402 9 | 10 | feat_size = 15 11 | spatial_scale = 1.0 / 8 12 | img_size = feat_size / spatial_scale 13 | num_imgs = 2 14 | num_rois = 20 15 | 16 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 17 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 18 | rois[:, 2:] += img_size * 0.5 19 | rois = np.hstack((batch_ind, rois)) 20 | 21 | feat = torch.randn( 22 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 23 | rois = torch.from_numpy(rois).float().cuda() 24 | inputs = (feat, rois) 25 | print('Gradcheck for roi align...') 26 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 27 | print(test) 28 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 29 | print(test) 30 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (voc_classes, imagenet_det_classes, 2 | imagenet_vid_classes, coco_classes, dataset_aliases, 3 | get_classes) 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json 5 | from .eval_hooks import (DistEvalHook, DistEvalmAPHook, CocoDistEvalRecallHook, 6 | CocoDistEvalmAPHook) 7 | from .mean_ap import average_precision, eval_map, print_map_summary 8 | from .recall import (eval_recalls, print_recall_summary, plot_num_recall, 9 | plot_iou_recall) 10 | 11 | __all__ = [ 12 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 13 | 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', 14 | 'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook', 15 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', 16 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 17 | 'plot_num_recall', 'plot_iou_recall' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .geometry import bbox_overlaps 2 | from .assigners import BaseAssigner, MaxIoUAssigner, AssignResult 3 | from .samplers import (BaseSampler, PseudoSampler, RandomSampler, 4 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 5 | CombinedSampler, SamplingResult) 6 | from .assign_sampling import build_assigner, build_sampler, assign_and_sample 7 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, 8 | bbox_mapping_back, bbox2roi, roi2bbox, bbox2result) 9 | from .bbox_target import bbox_target 10 | 11 | __all__ = [ 12 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 13 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 14 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 15 | 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample', 16 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', 17 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 'bbox_target' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def split_combined_polys(polys, poly_lens, polys_per_mask): 5 | """Split the combined 1-D polys into masks. 6 | 7 | A mask is represented as a list of polys, and a poly is represented as 8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 9 | tensor. Here we need to split the tensor into original representations. 10 | 11 | Args: 12 | polys (list): a list (length = image num) of 1-D tensors 13 | poly_lens (list): a list (length = image num) of poly length 14 | polys_per_mask (list): a list (length = image num) of poly number 15 | of each mask 16 | 17 | Returns: 18 | list: a list (length = image num) of list (length = mask num) of 19 | list (length = poly num) of numpy array 20 | """ 21 | mask_polys_list = [] 22 | for img_id in range(len(polys)): 23 | polys_single = polys[img_id] 24 | polys_lens_single = poly_lens[img_id].tolist() 25 | polys_per_mask_single = polys_per_mask[img_id].tolist() 26 | 27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 29 | mask_polys_list.append(mask_polys) 30 | return mask_polys_list 31 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assign_sampling.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import assigners, samplers 4 | 5 | 6 | def build_assigner(cfg, **kwargs): 7 | if isinstance(cfg, assigners.BaseAssigner): 8 | return cfg 9 | elif isinstance(cfg, dict): 10 | return mmcv.runner.obj_from_dict( 11 | cfg, assigners, default_args=kwargs) 12 | else: 13 | raise TypeError('Invalid type {} for building a sampler'.format( 14 | type(cfg))) 15 | 16 | 17 | def build_sampler(cfg, **kwargs): 18 | if isinstance(cfg, samplers.BaseSampler): 19 | return cfg 20 | elif isinstance(cfg, dict): 21 | return mmcv.runner.obj_from_dict( 22 | cfg, samplers, default_args=kwargs) 23 | else: 24 | raise TypeError('Invalid type {} for building a sampler'.format( 25 | type(cfg))) 26 | 27 | # cfg = train_cfg 28 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): 29 | bbox_assigner = build_assigner(cfg.assigner) 30 | bbox_sampler = build_sampler(cfg.sampler) 31 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore, 32 | gt_labels) 33 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes, 34 | gt_labels) 35 | return assign_result, sampling_result 36 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/sampling_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class SamplingResult(object): 5 | 6 | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, 7 | gt_flags): 8 | # assigned_gt_inds中正样本值对应的anchor索引值的从大到小排列 9 | self.pos_inds = pos_inds 10 | # assigned_gt_inds中负样本值对应的anchor索引值的从大到小排列 11 | self.neg_inds = neg_inds 12 | # anchor中正样本的坐标 13 | self.pos_bboxes = bboxes[pos_inds] 14 | # anchor中负样本的坐标 15 | self.neg_bboxes = bboxes[neg_inds] 16 | # self.pos_is_gt: 选出来正样本的anchor,值为0 17 | self.pos_is_gt = gt_flags[pos_inds] 18 | 19 | self.num_gts = gt_bboxes.shape[0] 20 | # self.pos_assigned_gt_inds 表示正样本anchor对应真实label值的索引 21 | # 也就是正样本anchor对应的正负样本值 - 1 22 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 23 | # self.pos_gt_bboxes 表示从gt中选出正样本anchor对应的gt的[xmin, ymin, xmax, ymax] 24 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :] 25 | if assign_result.labels is not None: 26 | # self.pos_gt_labels 表示从gt_label中选出正样本anchor对应的真值label 27 | self.pos_gt_labels = assign_result.labels[pos_inds] 28 | else: 29 | self.pos_gt_labels = None 30 | 31 | @property 32 | def bboxes(self): 33 | return torch.cat([self.pos_bboxes, self.neg_bboxes]) 34 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | 18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 19 | # torch.nonzero(input) 返回 input的 > 0 元素的索引值 20 | # assigned_gt_inds中正样本值对应的anchor索引值的从大到小排列 21 | pos_inds = torch.nonzero( 22 | assign_result.gt_inds > 0).squeeze(-1).unique() 23 | # assigned_gt_inds中负样本值对应的anchor索引值的从大到小排列 24 | neg_inds = torch.nonzero( 25 | assign_result.gt_inds == 0).squeeze(-1).unique() 26 | # ft_flags.shape: [8732] 27 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 28 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 29 | assign_result, gt_flags) 30 | return sampling_result 31 | 32 | # bboxes = torch.randn(8, 4) 33 | # gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 34 | # pos_inds = torch.LongTensor([7, 5, 3, 1]) 35 | # print(gt_flags, gt_flags.shape) 36 | # print(gt_flags[pos_inds]) -------------------------------------------------------------------------------- /mmdet/models/registry.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class Registry(object): 5 | 6 | def __init__(self, name): 7 | self._name = name 8 | self._module_dict = dict() 9 | 10 | @property 11 | def name(self): 12 | return self._name 13 | 14 | @property 15 | def module_dict(self): 16 | return self._module_dict 17 | 18 | def _register_module(self, module_class): 19 | """Register a module. 20 | 21 | Args: 22 | module (:obj:`nn.Module`): Module to be registered. 23 | """ 24 | if not issubclass(module_class, nn.Module): 25 | raise TypeError( 26 | 'module must be a child of nn.Module, but got {}'.format( 27 | module_class)) 28 | module_name = module_class.__name__ 29 | if module_name in self._module_dict: 30 | raise KeyError('{} is already registered in {}'.format( 31 | module_name, self.name)) 32 | self._module_dict[module_name] = module_class 33 | 34 | def register_module(self, cls): 35 | self._register_module(cls) 36 | return cls 37 | 38 | 39 | BACKBONES = Registry('backbone') 40 | NECKS = Registry('neck') 41 | ROI_EXTRACTORS = Registry('roi_extractor') 42 | HEADS = Registry('head') 43 | DETECTORS = Registry('detector') 44 | 45 | # print(DETECTORS.module_dict) 46 | # print(HEADS.module_dict) 47 | 48 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from mmcv.runner import get_dist_info 4 | from mmcv.parallel import collate 5 | from torch.utils.data import DataLoader 6 | 7 | from .sampler import GroupSampler, DistributedGroupSampler 8 | 9 | # https://github.com/pytorch/pytorch/issues/973 10 | import resource 11 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 12 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 13 | 14 | 15 | def build_dataloader(dataset, 16 | imgs_per_gpu, 17 | workers_per_gpu, 18 | num_gpus=1, 19 | dist=True, 20 | **kwargs): 21 | if dist: 22 | rank, world_size = get_dist_info() 23 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size, 24 | rank) 25 | batch_size = imgs_per_gpu 26 | num_workers = workers_per_gpu 27 | else: 28 | if not kwargs.get('shuffle', True): 29 | sampler = None 30 | else: 31 | sampler = GroupSampler(dataset, imgs_per_gpu) 32 | batch_size = num_gpus * imgs_per_gpu 33 | num_workers = num_gpus * workers_per_gpu 34 | 35 | data_loader = DataLoader( 36 | dataset, 37 | batch_size=batch_size, 38 | sampler=sampler, 39 | num_workers=num_workers, 40 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), 41 | pin_memory=False, 42 | **kwargs) 43 | 44 | return data_loader 45 | -------------------------------------------------------------------------------- /mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import mmcv 4 | 5 | 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 7 | cfg): 8 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 9 | mask_targets = map(mask_target_single, pos_proposals_list, 10 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 11 | mask_targets = torch.cat(list(mask_targets)) 12 | return mask_targets 13 | 14 | 15 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 16 | mask_size = cfg.mask_size 17 | num_pos = pos_proposals.size(0) 18 | mask_targets = [] 19 | if num_pos > 0: 20 | proposals_np = pos_proposals.cpu().numpy() 21 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 22 | for i in range(num_pos): 23 | gt_mask = gt_masks[pos_assigned_gt_inds[i]] 24 | bbox = proposals_np[i, :].astype(np.int32) 25 | x1, y1, x2, y2 = bbox 26 | w = np.maximum(x2 - x1 + 1, 1) 27 | h = np.maximum(y2 - y1 + 1, 1) 28 | # mask is uint8 both before and after resizing 29 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], 30 | (mask_size, mask_size)) 31 | mask_targets.append(target) 32 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( 33 | pos_proposals.device) 34 | else: 35 | mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size)) 36 | return mask_targets 37 | -------------------------------------------------------------------------------- /mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import mmcv 4 | import numpy as np 5 | from six.moves import map, zip 6 | 7 | 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 9 | num_imgs = tensor.size(0) 10 | mean = np.array(mean, dtype=np.float32) 11 | std = np.array(std, dtype=np.float32) 12 | imgs = [] 13 | for img_id in range(num_imgs): 14 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 15 | img = mmcv.imdenormalize( 16 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 17 | imgs.append(np.ascontiguousarray(img)) 18 | return imgs 19 | 20 | 21 | def multi_apply(func, *args, **kwargs): 22 | pfunc = partial(func, **kwargs) if kwargs else func 23 | #  map()是 Python 内置的高阶函数,它接收一个函数 f 和一个 list, 24 | # 并通过把函数 f 依次作用在 list 的每个元素上,得到一个新的 list 并返回 25 | map_results = map(pfunc, *args) 26 | # >>>a = [1,2,3] 27 | # >>> b = [4,5,6] 28 | # >>> zipped = zip(a,b) # 打包为元组的列表 29 | # [(1, 4), (2, 5), (3, 6)] 30 | # 就是将每张图产生的(all_labels, all_label_weights, all_bbox_targets, all_bbox_weights, 31 | # pos_inds_list, neg_inds_list) 打包成元组的列表,再打包成元组 32 | return tuple(map(list, zip(*map_results))) 33 | 34 | 35 | def unmap(data, count, inds, fill=0): 36 | """ Unmap a subset of item (data) back to the original set of items (of 37 | size count) """ 38 | if data.dim() == 1: 39 | ret = data.new_full((count, ), fill) 40 | ret[inds] = data 41 | else: 42 | new_size = (count, ) + data.size()[1:] 43 | ret = data.new_full(new_size, fill) 44 | ret[inds, :] = data 45 | return ret 46 | 47 | -------------------------------------------------------------------------------- /mmdet/models/utils/weight_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 6 | assert distribution in ['uniform', 'normal'] 7 | if distribution == 'uniform': 8 | nn.init.xavier_uniform_(module.weight, gain=gain) 9 | else: 10 | nn.init.xavier_normal_(module.weight, gain=gain) 11 | if hasattr(module, 'bias'): 12 | nn.init.constant_(module.bias, bias) 13 | 14 | 15 | def normal_init(module, mean=0, std=1, bias=0): 16 | nn.init.normal_(module.weight, mean, std) 17 | if hasattr(module, 'bias'): 18 | nn.init.constant_(module.bias, bias) 19 | 20 | 21 | def uniform_init(module, a=0, b=1, bias=0): 22 | nn.init.uniform_(module.weight, a, b) 23 | if hasattr(module, 'bias'): 24 | nn.init.constant_(module.bias, bias) 25 | 26 | 27 | def kaiming_init(module, 28 | mode='fan_out', 29 | nonlinearity='relu', 30 | bias=0, 31 | distribution='normal'): 32 | assert distribution in ['uniform', 'normal'] 33 | if distribution == 'uniform': 34 | nn.init.kaiming_uniform_( 35 | module.weight, mode=mode, nonlinearity=nonlinearity) 36 | else: 37 | nn.init.kaiming_normal_( 38 | module.weight, mode=mode, nonlinearity=nonlinearity) 39 | if hasattr(module, 'bias'): 40 | nn.init.constant_(module.bias, bias) 41 | 42 | 43 | def bias_init_with_prob(prior_prob): 44 | """ initialize conv/fc bias value according to giving probablity""" 45 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 46 | return bias_init 47 | -------------------------------------------------------------------------------- /mmdet/ops/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | # cython: language_level=3, boundscheck=False 9 | 10 | import numpy as np 11 | cimport numpy as np 12 | 13 | assert sizeof(int) == sizeof(np.int32_t) 14 | 15 | cdef extern from "gpu_nms.hpp": 16 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int, size_t) nogil 17 | size_t nms_Malloc() nogil 18 | 19 | memory_pool = {} 20 | 21 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 22 | np.int32_t device_id=0): 23 | cdef int boxes_num = dets.shape[0] 24 | cdef int boxes_dim = 5 25 | cdef int num_out 26 | cdef size_t base 27 | cdef np.ndarray[np.int32_t, ndim=1] \ 28 | keep = np.zeros(boxes_num, dtype=np.int32) 29 | cdef np.ndarray[np.float32_t, ndim=1] \ 30 | scores = dets[:, 4] 31 | cdef np.ndarray[np.int_t, ndim=1] \ 32 | order = scores.argsort()[::-1] 33 | cdef np.ndarray[np.float32_t, ndim=2] \ 34 | sorted_dets = dets[order, :5] 35 | cdef float cthresh = thresh 36 | if device_id not in memory_pool: 37 | with nogil: 38 | base = nms_Malloc() 39 | memory_pool[device_id] = base 40 | # print "malloc", base 41 | base = memory_pool[device_id] 42 | with nogil: 43 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, cthresh, device_id, base) 44 | keep = keep[:num_out] 45 | return list(order[keep]) 46 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | ### Requirements 4 | 5 | - Linux (tested on Ubuntu 16.04 and CentOS 7.2) 6 | - Python 3.4+ 7 | - PyTorch 1.0 8 | - Cython 9 | - [mmcv](https://github.com/open-mmlab/mmcv) >= 0.2.2 10 | 11 | ### Install mmdetection 12 | 13 | a. Install PyTorch 1.0 and torchvision following the [official instructions](https://pytorch.org/). 14 | 15 | b. Clone the mmdetection repository. 16 | 17 | ```shell 18 | git clone https://github.com/open-mmlab/mmdetection.git 19 | ``` 20 | 21 | c. Compile cuda extensions. 22 | 23 | ```shell 24 | cd mmdetection 25 | pip install cython # or "conda install cython" if you prefer conda 26 | ./compile.sh # or "PYTHON=python3 ./compile.sh" if you use system python3 without virtual environments 27 | ``` 28 | 29 | d. Install mmdetection (other dependencies will be installed automatically). 30 | 31 | ```shell 32 | python(3) setup.py install # add --user if you want to install it locally 33 | # or "pip install ." 34 | ``` 35 | 36 | Note: You need to run the last step each time you pull updates from github. 37 | The git commit id will be written to the version number and also saved in trained models. 38 | 39 | ### Prepare COCO dataset. 40 | 41 | It is recommended to symlink the dataset root to `$MMDETECTION/data`. 42 | 43 | ``` 44 | mmdetection 45 | ├── mmdet 46 | ├── tools 47 | ├── configs 48 | ├── data 49 | │ ├── coco 50 | │ │ ├── annotations 51 | │ │ ├── train2017 52 | │ │ ├── val2017 53 | │ │ ├── test2017 54 | │ ├── VOCdevkit 55 | │ │ ├── VOC2007 56 | │ │ ├── VOC2012 57 | 58 | ``` 59 | 60 | ### Scripts 61 | Just for reference, [Here](https://gist.github.com/hellock/bf23cd7348c727d69d48682cb6909047) is 62 | a script for setting up mmdetection with conda. 63 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | from torch import nn 3 | 4 | from .registry import BACKBONES, NECKS, ROI_EXTRACTORS, HEADS, DETECTORS 5 | 6 | 7 | def _build_module(cfg, registry, default_args): 8 | assert isinstance(cfg, dict) and 'type' in cfg 9 | assert isinstance(default_args, dict) or default_args is None 10 | args = cfg.copy() 11 | obj_type = args.pop('type') 12 | if mmcv.is_str(obj_type): 13 | if obj_type not in registry.module_dict: 14 | raise KeyError('{} is not in the {} registry'.format( 15 | obj_type, registry.name)) 16 | obj_type = registry.module_dict[obj_type] 17 | elif not isinstance(obj_type, type): 18 | raise TypeError('type must be a str or valid type, but got {}'.format( 19 | type(obj_type))) 20 | if default_args is not None: 21 | for name, value in default_args.items(): 22 | args.setdefault(name, value) 23 | return obj_type(**args) 24 | 25 | 26 | def build(cfg, registry, default_args=None): 27 | if isinstance(cfg, list): 28 | modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg] 29 | return nn.Sequential(*modules) 30 | else: 31 | return _build_module(cfg, registry, default_args) 32 | 33 | 34 | def build_backbone(cfg): 35 | return build(cfg, BACKBONES) 36 | 37 | 38 | def build_neck(cfg): 39 | return build(cfg, NECKS) 40 | 41 | 42 | def build_roi_extractor(cfg): 43 | return build(cfg, ROI_EXTRACTORS) 44 | 45 | 46 | def build_head(cfg): 47 | return build(cfg, HEADS) 48 | 49 | 50 | def build_detector(cfg, train_cfg=None, test_cfg=None): 51 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /mmdet/apis/env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import random 4 | 5 | import numpy as np 6 | import torch 7 | import torch.distributed as dist 8 | import torch.multiprocessing as mp 9 | from mmcv.runner import get_dist_info 10 | 11 | 12 | def init_dist(launcher, backend='nccl', **kwargs): 13 | if mp.get_start_method(allow_none=True) is None: 14 | mp.set_start_method('spawn') 15 | if launcher == 'pytorch': 16 | _init_dist_pytorch(backend, **kwargs) 17 | elif launcher == 'mpi': 18 | _init_dist_mpi(backend, **kwargs) 19 | elif launcher == 'slurm': 20 | _init_dist_slurm(backend, **kwargs) 21 | else: 22 | raise ValueError('Invalid launcher type: {}'.format(launcher)) 23 | 24 | 25 | def _init_dist_pytorch(backend, **kwargs): 26 | # TODO: use local_rank instead of rank % num_gpus 27 | rank = int(os.environ['RANK']) 28 | num_gpus = torch.cuda.device_count() 29 | torch.cuda.set_device(rank % num_gpus) 30 | dist.init_process_group(backend=backend, **kwargs) 31 | 32 | 33 | def _init_dist_mpi(backend, **kwargs): 34 | raise NotImplementedError 35 | 36 | 37 | def _init_dist_slurm(backend, **kwargs): 38 | raise NotImplementedError 39 | 40 | 41 | def set_random_seed(seed): 42 | random.seed(seed) 43 | np.random.seed(seed) 44 | torch.manual_seed(seed) 45 | torch.cuda.manual_seed_all(seed) 46 | 47 | 48 | def get_root_logger(log_level=logging.INFO): 49 | logger = logging.getLogger() 50 | if not logger.hasHandlers(): 51 | logging.basicConfig( 52 | format='%(asctime)s - %(levelname)s - %(message)s', 53 | level=log_level) 54 | rank, _ = get_dist_info() 55 | if rank != 0: 56 | logger.setLevel('ERROR') 57 | return logger 58 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 32 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 34 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 35 | for i in range(bboxes1.shape[0]): 36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 41 | y_end - y_start + 1, 0) 42 | if mode == 'iou': 43 | union = area1[i] + area2 - overlap 44 | else: 45 | union = area1[i] if not exchange else area2 46 | ious[i, :] = overlap / union 47 | if exchange: 48 | ious = ious.T 49 | return ious 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # cython generated cpp 107 | mmdet/ops/nms/*.cpp 108 | mmdet/version.py 109 | data 110 | .vscode 111 | .idea 112 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from .two_stage import TwoStageDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class FastRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | train_cfg, 14 | test_cfg, 15 | mask_roi_extractor=None, 16 | mask_head=None, 17 | pretrained=None): 18 | super(FastRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | bbox_roi_extractor=bbox_roi_extractor, 22 | bbox_head=bbox_head, 23 | train_cfg=train_cfg, 24 | test_cfg=test_cfg, 25 | mask_roi_extractor=mask_roi_extractor, 26 | mask_head=mask_head, 27 | pretrained=pretrained) 28 | 29 | def forward_test(self, imgs, img_metas, proposals, **kwargs): 30 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 31 | if not isinstance(var, list): 32 | raise TypeError('{} must be a list, but got {}'.format( 33 | name, type(var))) 34 | 35 | num_augs = len(imgs) 36 | if num_augs != len(img_metas): 37 | raise ValueError( 38 | 'num of augmentations ({}) != num of image meta ({})'.format( 39 | len(imgs), len(img_metas))) 40 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared 41 | imgs_per_gpu = imgs[0].size(0) 42 | assert imgs_per_gpu == 1 43 | 44 | if num_augs == 1: 45 | return self.simple_test(imgs[0], img_metas[0], proposals[0], 46 | **kwargs) 47 | else: 48 | return self.aug_test(imgs, img_metas, proposals, **kwargs) 49 | -------------------------------------------------------------------------------- /mmdet/models/utils/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | norm_cfg = { 5 | # format: layer_type: (abbreviation, module) 6 | 'BN': ('bn', nn.BatchNorm2d), 7 | 'SyncBN': ('bn', None), 8 | 'GN': ('gn', nn.GroupNorm), 9 | # and potentially 'SN' 10 | } 11 | 12 | 13 | def build_norm_layer(cfg, num_features, postfix=''): 14 | """ Build normalization layer 15 | 16 | Args: 17 | cfg (dict): cfg should contain: 18 | type (str): identify norm layer type. 19 | layer args: args needed to instantiate a norm layer. 20 | frozen (bool): [optional] whether stop gradient updates 21 | of norm layer, it is helpful to set frozen mode 22 | in backbone's norms. 23 | num_features (int): number of channels from input 24 | postfix (int, str): appended into norm abbreation to 25 | create named layer. 26 | 27 | Returns: 28 | name (str): abbreation + postfix 29 | layer (nn.Module): created norm layer 30 | """ 31 | assert isinstance(cfg, dict) and 'type' in cfg 32 | cfg_ = cfg.copy() 33 | 34 | layer_type = cfg_.pop('type') 35 | if layer_type not in norm_cfg: 36 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 37 | else: 38 | abbr, norm_layer = norm_cfg[layer_type] 39 | if norm_layer is None: 40 | raise NotImplementedError 41 | 42 | assert isinstance(postfix, (int, str)) 43 | name = abbr + str(postfix) 44 | 45 | frozen = cfg_.pop('frozen', False) 46 | cfg_.setdefault('eps', 1e-5) 47 | if layer_type != 'GN': 48 | layer = norm_layer(num_features, **cfg_) 49 | else: 50 | assert 'num_groups' in cfg_ 51 | layer = norm_layer(num_channels=num_features, **cfg_) 52 | 53 | if frozen: 54 | for param in layer.parameters(): 55 | param.requires_grad = False 56 | 57 | return name, layer 58 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class InstanceBalancedPosSampler(RandomSampler): 8 | 9 | def _sample_pos(self, assign_result, num_expected, **kwargs): 10 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 11 | if pos_inds.numel() != 0: 12 | pos_inds = pos_inds.squeeze(1) 13 | if pos_inds.numel() <= num_expected: 14 | return pos_inds 15 | else: 16 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 17 | num_gts = len(unique_gt_inds) 18 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 19 | sampled_inds = [] 20 | for i in unique_gt_inds: 21 | inds = torch.nonzero(assign_result.gt_inds == i.item()) 22 | if inds.numel() != 0: 23 | inds = inds.squeeze(1) 24 | else: 25 | continue 26 | if len(inds) > num_per_gt: 27 | inds = self.random_choice(inds, num_per_gt) 28 | sampled_inds.append(inds) 29 | sampled_inds = torch.cat(sampled_inds) 30 | if len(sampled_inds) < num_expected: 31 | num_extra = num_expected - len(sampled_inds) 32 | extra_inds = np.array( 33 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 34 | if len(extra_inds) > num_extra: 35 | extra_inds = self.random_choice(extra_inds, num_extra) 36 | extra_inds = torch.from_numpy(extra_inds).to( 37 | assign_result.gt_inds.device).long() 38 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 39 | elif len(sampled_inds) > num_expected: 40 | sampled_inds = self.random_choice(sampled_inds, num_expected) 41 | return sampled_inds 42 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/functions/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import roi_pool_cuda 5 | 6 | 7 | class RoIPoolFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, features, rois, out_size, spatial_scale): 11 | if isinstance(out_size, int): 12 | out_h = out_size 13 | out_w = out_size 14 | elif isinstance(out_size, tuple): 15 | assert len(out_size) == 2 16 | assert isinstance(out_size[0], int) 17 | assert isinstance(out_size[1], int) 18 | out_h, out_w = out_size 19 | else: 20 | raise TypeError( 21 | '"out_size" must be an integer or tuple of integers') 22 | assert features.is_cuda 23 | ctx.save_for_backward(rois) 24 | num_channels = features.size(1) 25 | num_rois = rois.size(0) 26 | out_size = (num_rois, num_channels, out_h, out_w) 27 | output = features.new_zeros(out_size) 28 | argmax = features.new_zeros(out_size, dtype=torch.int) 29 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, 30 | output, argmax) 31 | ctx.spatial_scale = spatial_scale 32 | ctx.feature_size = features.size() 33 | ctx.argmax = argmax 34 | 35 | return output 36 | 37 | @staticmethod 38 | def backward(ctx, grad_output): 39 | assert grad_output.is_cuda 40 | spatial_scale = ctx.spatial_scale 41 | feature_size = ctx.feature_size 42 | argmax = ctx.argmax 43 | rois = ctx.saved_tensors[0] 44 | assert feature_size is not None 45 | 46 | grad_input = grad_rois = None 47 | if ctx.needs_input_grad[0]: 48 | grad_input = grad_output.new_zeros(feature_size) 49 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax, 50 | spatial_scale, grad_input) 51 | 52 | return grad_input, grad_rois, None, None 53 | 54 | 55 | roi_pool = RoIPoolFunction.apply 56 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class RandomSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | neg_pos_ub=-1, 13 | add_gt_as_proposals=True, 14 | **kwargs): 15 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 16 | add_gt_as_proposals) 17 | 18 | @staticmethod 19 | def random_choice(gallery, num): 20 | """Random select some elements from the gallery. 21 | 22 | It seems that Pytorch's implementation is slower than numpy so we use 23 | numpy to randperm the indices. 24 | """ 25 | assert len(gallery) >= num 26 | if isinstance(gallery, list): 27 | gallery = np.array(gallery) 28 | cands = np.arange(len(gallery)) 29 | np.random.shuffle(cands) 30 | rand_inds = cands[:num] 31 | if not isinstance(gallery, np.ndarray): 32 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 33 | return gallery[rand_inds] 34 | 35 | def _sample_pos(self, assign_result, num_expected, **kwargs): 36 | """Randomly sample some positive samples.""" 37 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 38 | if pos_inds.numel() != 0: 39 | pos_inds = pos_inds.squeeze(1) 40 | if pos_inds.numel() <= num_expected: 41 | return pos_inds 42 | else: 43 | return self.random_choice(pos_inds, num_expected) 44 | 45 | def _sample_neg(self, assign_result, num_expected, **kwargs): 46 | """Randomly sample some negative samples.""" 47 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 48 | if neg_inds.numel() != 0: 49 | neg_inds = neg_inds.squeeze(1) 50 | if len(neg_inds) <= num_expected: 51 | return neg_inds 52 | else: 53 | return self.random_choice(neg_inds, num_expected) 54 | -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, 5 | _take_tensors) 6 | from mmcv.runner import OptimizerHook 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | if bucket_size_mb > 0: 11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 12 | buckets = _take_tensors(tensors, bucket_size_bytes) 13 | else: 14 | buckets = OrderedDict() 15 | for tensor in tensors: 16 | tp = tensor.type() 17 | if tp not in buckets: 18 | buckets[tp] = [] 19 | buckets[tp].append(tensor) 20 | buckets = buckets.values() 21 | 22 | for bucket in buckets: 23 | flat_tensors = _flatten_dense_tensors(bucket) 24 | dist.all_reduce(flat_tensors) 25 | flat_tensors.div_(world_size) 26 | for tensor, synced in zip( 27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data for param in model.parameters() 34 | if param.requires_grad and param.grad is not None 35 | ] 36 | world_size = dist.get_world_size() 37 | if coalesce: 38 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 39 | else: 40 | for tensor in grads: 41 | dist.all_reduce(tensor.div_(world_size)) 42 | 43 | 44 | class DistOptimizerHook(OptimizerHook): 45 | 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs['loss'].backward() 54 | allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb) 55 | if self.grad_clip is not None: 56 | self.clip_grads(runner.model.parameters()) 57 | runner.optimizer.step() 58 | -------------------------------------------------------------------------------- /mmdet/ops/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .gpu_nms import gpu_nms 5 | from .cpu_nms import cpu_nms 6 | from .cpu_soft_nms import cpu_soft_nms 7 | 8 | 9 | def nms(dets, iou_thr, device_id=None): 10 | """Dispatch to either CPU or GPU NMS implementations.""" 11 | if isinstance(dets, torch.Tensor): 12 | is_tensor = True 13 | if dets.is_cuda: 14 | device_id = dets.get_device() 15 | dets_np = dets.detach().cpu().numpy() 16 | elif isinstance(dets, np.ndarray): 17 | is_tensor = False 18 | dets_np = dets 19 | else: 20 | raise TypeError( 21 | 'dets must be either a Tensor or numpy array, but got {}'.format( 22 | type(dets))) 23 | 24 | if dets_np.shape[0] == 0: 25 | inds = [] 26 | else: 27 | inds = (gpu_nms(dets_np, iou_thr, device_id=device_id) 28 | if device_id is not None else cpu_nms(dets_np, iou_thr)) 29 | 30 | if is_tensor: 31 | inds = dets.new_tensor(inds, dtype=torch.long) 32 | else: 33 | inds = np.array(inds, dtype=np.int64) 34 | return dets[inds, :], inds 35 | 36 | 37 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 38 | if isinstance(dets, torch.Tensor): 39 | is_tensor = True 40 | dets_np = dets.detach().cpu().numpy() 41 | elif isinstance(dets, np.ndarray): 42 | is_tensor = False 43 | dets_np = dets 44 | else: 45 | raise TypeError( 46 | 'dets must be either a Tensor or numpy array, but got {}'.format( 47 | type(dets))) 48 | 49 | method_codes = {'linear': 1, 'gaussian': 2} 50 | if method not in method_codes: 51 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 52 | new_dets, inds = cpu_soft_nms( 53 | dets_np, 54 | iou_thr, 55 | method=method_codes[method], 56 | sigma=sigma, 57 | min_score=min_score) 58 | 59 | if is_tensor: 60 | return dets.new_tensor(new_dets), dets.new_tensor( 61 | inds, dtype=torch.long) 62 | else: 63 | return new_dets.astype(np.float32), inds.astype(np.int64) 64 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/bbox_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.ops.nms import nms_wrapper 4 | 5 | 6 | def multiclass_nms(multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1): 7 | """NMS for multi-class bboxes. 8 | 9 | Args: 10 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) 11 | multi_scores (Tensor): shape (n, #class) 12 | score_thr (float): bbox threshold, bboxes with scores lower than it 13 | will not be considered. 14 | nms_thr (float): NMS IoU threshold 15 | max_num (int): if there are more than max_num bboxes after NMS, 16 | only top max_num will be kept. 17 | 18 | Returns: 19 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels 20 | are 0-based. 21 | """ 22 | num_classes = multi_scores.shape[1] 23 | bboxes, labels = [], [] 24 | nms_cfg_ = nms_cfg.copy() 25 | nms_type = nms_cfg_.pop('type', 'nms') 26 | nms_op = getattr(nms_wrapper, nms_type) 27 | for i in range(1, num_classes): 28 | cls_inds = multi_scores[:, i] > score_thr 29 | if not cls_inds.any(): 30 | continue 31 | # get bboxes and scores of this class 32 | if multi_bboxes.shape[1] == 4: 33 | _bboxes = multi_bboxes[cls_inds, :] 34 | else: 35 | _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] 36 | _scores = multi_scores[cls_inds, i] 37 | cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1) 38 | cls_dets, _ = nms_op(cls_dets, **nms_cfg_) 39 | cls_labels = multi_bboxes.new_full( 40 | (cls_dets.shape[0], ), i - 1, dtype=torch.long) 41 | bboxes.append(cls_dets) 42 | labels.append(cls_labels) 43 | if bboxes: 44 | bboxes = torch.cat(bboxes) 45 | labels = torch.cat(labels) 46 | if bboxes.shape[0] > max_num: 47 | _, inds = bboxes[:, -1].sort(descending=True) 48 | inds = inds[:max_num] 49 | bboxes = bboxes[inds] 50 | labels = labels[inds] 51 | else: 52 | bboxes = multi_bboxes.new_zeros((0, 5)) 53 | labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) 54 | 55 | return bboxes, labels 56 | -------------------------------------------------------------------------------- /tools/voc_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | import mmcv 4 | import numpy as np 5 | 6 | from mmdet import datasets 7 | from mmdet.core import eval_map 8 | 9 | 10 | def voc_eval(result_file, dataset, iou_thr=0.5): 11 | det_results = mmcv.load(result_file) 12 | gt_bboxes = [] 13 | gt_labels = [] 14 | gt_ignore = [] 15 | for i in range(len(dataset)): 16 | ann = dataset.get_ann_info(i) 17 | bboxes = ann['bboxes'] 18 | labels = ann['labels'] 19 | if 'bboxes_ignore' in ann: 20 | ignore = np.concatenate([ 21 | np.zeros(bboxes.shape[0], dtype=np.bool), 22 | np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool) 23 | ]) 24 | gt_ignore.append(ignore) 25 | bboxes = np.vstack([bboxes, ann['bboxes_ignore']]) 26 | labels = np.concatenate([labels, ann['labels_ignore']]) 27 | gt_bboxes.append(bboxes) 28 | gt_labels.append(labels) 29 | if not gt_ignore: 30 | gt_ignore = gt_ignore 31 | if hasattr(dataset, 'year') and dataset.year == 2007: 32 | # dataset_name = 'voc07' 33 | dataset_name = '2class' 34 | # sar 图像 35 | elif hasattr(dataset, 'abc') and dataset.abc == 1: 36 | dataset_name = 'sar' 37 | else: 38 | dataset_name = dataset.CLASSES 39 | eval_map( 40 | det_results, 41 | gt_bboxes, 42 | gt_labels, 43 | gt_ignore=gt_ignore, 44 | scale_ranges=None, 45 | iou_thr=iou_thr, 46 | dataset=dataset_name, 47 | print_summary=True) 48 | 49 | 50 | def main(): 51 | parser = ArgumentParser(description='VOC Evaluation') 52 | parser.add_argument('result', help='result file path') 53 | parser.add_argument('config', help='config file path') 54 | parser.add_argument( 55 | '--iou-thr', 56 | type=float, 57 | default=0.5, 58 | help='IoU threshold for evaluation') 59 | args = parser.parse_args() 60 | cfg = mmcv.Config.fromfile(args.config) 61 | test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets) 62 | voc_eval(args.result, test_dataset, args.iou_thr) 63 | 64 | 65 | if __name__ == '__main__': 66 | main() 67 | -------------------------------------------------------------------------------- /mmdet/apis/inference.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import torch 4 | 5 | from mmdet.datasets import to_tensor 6 | from mmdet.datasets.transforms import ImageTransform 7 | from mmdet.core import get_classes 8 | 9 | 10 | def _prepare_data(img, img_transform, cfg, device): 11 | ori_shape = img.shape 12 | img, img_shape, pad_shape, scale_factor = img_transform( 13 | img, 14 | scale=cfg.data.test.img_scale, 15 | keep_ratio=cfg.data.test.get('resize_keep_ratio', True)) 16 | img = to_tensor(img).to(device).unsqueeze(0) 17 | img_meta = [ 18 | dict( 19 | ori_shape=ori_shape, 20 | img_shape=img_shape, 21 | pad_shape=pad_shape, 22 | scale_factor=scale_factor, 23 | flip=False) 24 | ] 25 | return dict(img=[img], img_meta=[img_meta]) 26 | 27 | 28 | def _inference_single(model, img, img_transform, cfg, device): 29 | img = mmcv.imread(img) 30 | data = _prepare_data(img, img_transform, cfg, device) 31 | with torch.no_grad(): 32 | result = model(return_loss=False, rescale=True, **data) 33 | return result 34 | 35 | 36 | def _inference_generator(model, imgs, img_transform, cfg, device): 37 | for img in imgs: 38 | yield _inference_single(model, img, img_transform, cfg, device) 39 | 40 | 41 | def inference_detector(model, imgs, cfg, device='cuda:0'): 42 | img_transform = ImageTransform( 43 | size_divisor=cfg.data.test.size_divisor, **cfg.img_norm_cfg) 44 | model = model.to(device) 45 | model.eval() 46 | 47 | if not isinstance(imgs, list): 48 | return _inference_single(model, imgs, img_transform, cfg, device) 49 | else: 50 | return _inference_generator(model, imgs, img_transform, cfg, device) 51 | 52 | 53 | def show_result(img, result, dataset='coco', score_thr=0.3): 54 | class_names = get_classes(dataset) 55 | labels = [ 56 | np.full(bbox.shape[0], i, dtype=np.int32) 57 | for i, bbox in enumerate(result) 58 | ] 59 | labels = np.concatenate(labels) 60 | bboxes = np.vstack(result) 61 | img = mmcv.imread(img) 62 | mmcv.imshow_det_bboxes( 63 | img.copy(), 64 | bboxes, 65 | labels, 66 | class_names=class_names, 67 | score_thr=score_thr) 68 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .. import roi_align_cuda 4 | 5 | 6 | class RoIAlignFunction(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 10 | if isinstance(out_size, int): 11 | out_h = out_size 12 | out_w = out_size 13 | elif isinstance(out_size, tuple): 14 | assert len(out_size) == 2 15 | assert isinstance(out_size[0], int) 16 | assert isinstance(out_size[1], int) 17 | out_h, out_w = out_size 18 | else: 19 | raise TypeError( 20 | '"out_size" must be an integer or tuple of integers') 21 | ctx.spatial_scale = spatial_scale 22 | ctx.sample_num = sample_num 23 | ctx.save_for_backward(rois) 24 | ctx.feature_size = features.size() 25 | 26 | batch_size, num_channels, data_height, data_width = features.size() 27 | num_rois = rois.size(0) 28 | 29 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 30 | if features.is_cuda: 31 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 32 | sample_num, output) 33 | else: 34 | raise NotImplementedError 35 | 36 | return output 37 | 38 | @staticmethod 39 | def backward(ctx, grad_output): 40 | feature_size = ctx.feature_size 41 | spatial_scale = ctx.spatial_scale 42 | sample_num = ctx.sample_num 43 | rois = ctx.saved_tensors[0] 44 | assert (feature_size is not None and grad_output.is_cuda) 45 | 46 | batch_size, num_channels, data_height, data_width = feature_size 47 | out_w = grad_output.size(3) 48 | out_h = grad_output.size(2) 49 | 50 | grad_input = grad_rois = None 51 | if ctx.needs_input_grad[0]: 52 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 53 | data_width) 54 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 55 | out_w, spatial_scale, sample_num, 56 | grad_input) 57 | 58 | return grad_input, grad_rois, None, None, None 59 | 60 | 61 | roi_align = RoIAlignFunction.apply 62 | -------------------------------------------------------------------------------- /mmdet/core/bbox/geometry.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): 5 | """Calculate overlap between two set of bboxes. 6 | 7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox 8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of 9 | bboxes1 and bboxes2. 10 | 11 | Args: 12 | bboxes1 (Tensor): shape (m, 4) 13 | bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n 14 | must be equal. 15 | mode (str): "iou" (intersection over union) or iof (intersection over 16 | foreground). 17 | 18 | Returns: 19 | ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1) 20 | """ 21 | 22 | assert mode in ['iou', 'iof'] 23 | 24 | rows = bboxes1.size(0) 25 | cols = bboxes2.size(0) 26 | if is_aligned: 27 | assert rows == cols 28 | 29 | if rows * cols == 0: 30 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) 31 | 32 | if is_aligned: 33 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 34 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 35 | 36 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2] 37 | overlap = wh[:, 0] * wh[:, 1] 38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 39 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 40 | 41 | if mode == 'iou': 42 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 43 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 44 | ious = overlap / (area1 + area2 - overlap) 45 | else: 46 | ious = overlap / area1 47 | else: 48 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] 49 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] 50 | 51 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] 52 | overlap = wh[:, :, 0] * wh[:, :, 1] 53 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 54 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 55 | 56 | if mode == 'iou': 57 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 58 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 59 | ious = overlap / (area1[:, None] + area2 - overlap) 60 | else: 61 | ious = overlap / (area1[:, None]) 62 | 63 | return ious 64 | -------------------------------------------------------------------------------- /mmdet/ops/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | # cython: language_level=3, boundscheck=False 9 | 10 | import numpy as np 11 | cimport numpy as np 12 | 13 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 14 | return a if a >= b else b 15 | 16 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 17 | return a if a <= b else b 18 | 19 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 20 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 21 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 22 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 23 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 24 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 25 | 26 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 27 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 28 | 29 | cdef int ndets = dets.shape[0] 30 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 31 | np.zeros((ndets), dtype=np.int) 32 | 33 | # nominal indices 34 | cdef int _i, _j 35 | # sorted indices 36 | cdef int i, j 37 | # temp variables for box i's (the box currently under consideration) 38 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 39 | # variables for computing overlap with box j (lower scoring box) 40 | cdef np.float32_t xx1, yy1, xx2, yy2 41 | cdef np.float32_t w, h 42 | cdef np.float32_t inter, ovr 43 | 44 | keep = [] 45 | for _i in range(ndets): 46 | i = order[_i] 47 | if suppressed[i] == 1: 48 | continue 49 | keep.append(i) 50 | ix1 = x1[i] 51 | iy1 = y1[i] 52 | ix2 = x2[i] 53 | iy2 = y2[i] 54 | iarea = areas[i] 55 | for _j in range(_i + 1, ndets): 56 | j = order[_j] 57 | if suppressed[j] == 1: 58 | continue 59 | xx1 = max(ix1, x1[j]) 60 | yy1 = max(iy1, y1[j]) 61 | xx2 = min(ix2, x2[j]) 62 | yy2 = min(iy2, y2[j]) 63 | w = max(0.0, xx2 - xx1 + 1) 64 | h = max(0.0, yy2 - yy1 + 1) 65 | inter = w * h 66 | ovr = inter / (iarea + areas[j] - inter) 67 | if ovr >= thresh: 68 | suppressed[j] = 1 69 | 70 | return keep 71 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/functions/deform_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | 4 | from .. import deform_pool_cuda 5 | 6 | 7 | class DeformRoIPoolingFunction(Function): 8 | 9 | @staticmethod 10 | def forward(ctx, 11 | data, 12 | rois, 13 | offset, 14 | spatial_scale, 15 | out_size, 16 | out_channels, 17 | no_trans, 18 | group_size=1, 19 | part_size=None, 20 | sample_per_part=4, 21 | trans_std=.0): 22 | ctx.spatial_scale = spatial_scale 23 | ctx.out_size = out_size 24 | ctx.out_channels = out_channels 25 | ctx.no_trans = no_trans 26 | ctx.group_size = group_size 27 | ctx.part_size = out_size if part_size is None else part_size 28 | ctx.sample_per_part = sample_per_part 29 | ctx.trans_std = trans_std 30 | 31 | assert 0.0 <= ctx.trans_std <= 1.0 32 | if not data.is_cuda: 33 | raise NotImplementedError 34 | 35 | n = rois.shape[0] 36 | output = data.new_empty(n, out_channels, out_size, out_size) 37 | output_count = data.new_empty(n, out_channels, out_size, out_size) 38 | deform_pool_cuda.deform_psroi_pooling_cuda_forward( 39 | data, rois, offset, output, output_count, ctx.no_trans, 40 | ctx.spatial_scale, ctx.out_channels, ctx.group_size, ctx.out_size, 41 | ctx.part_size, ctx.sample_per_part, ctx.trans_std) 42 | 43 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 44 | ctx.save_for_backward(data, rois, offset) 45 | ctx.output_count = output_count 46 | 47 | return output 48 | 49 | @staticmethod 50 | def backward(ctx, grad_output): 51 | if not grad_output.is_cuda: 52 | raise NotImplementedError 53 | 54 | data, rois, offset = ctx.saved_tensors 55 | output_count = ctx.output_count 56 | grad_input = torch.zeros_like(data) 57 | grad_rois = None 58 | grad_offset = torch.zeros_like(offset) 59 | 60 | deform_pool_cuda.deform_psroi_pooling_cuda_backward( 61 | grad_output, data, rois, offset, output_count, grad_input, 62 | grad_offset, ctx.no_trans, ctx.spatial_scale, ctx.out_channels, 63 | ctx.group_size, ctx.out_size, ctx.part_size, ctx.sample_per_part, 64 | ctx.trans_std) 65 | return (grad_input, grad_rois, grad_offset, None, None, None, None, 66 | None, None, None, None) 67 | 68 | 69 | deform_roi_pooling = DeformRoIPoolingFunction.apply 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Lightweight Object Detection(One-Stage) 3 | 4 | ## Introduction 5 | 6 | The code is based on the mmdetection. 7 | 8 | mmdetection is an open source object detection toolbox based on PyTorch. It is 9 | a part of the open-mmlab project developed by [Multimedia Laboratory, CUHK](http://mmlab.ie.cuhk.edu.hk/). 10 | 11 | Currently, it contains these features: 12 | - **Multiple Base Network**: Mobilenet V2, ShuffleNet V2 13 | - **One-Stage Lightweight Detector**: MobileV2-SSD, MobileV2-RetinaNet 14 | 15 | 16 | ## Performance 17 | 18 | | VOC2007 | SSD | RetinaNet 19 | |--------------|-----------------------------------------------------------------------------|-----------------------------------------------------------------------------| 20 | | MobilenetV2 | | 81.9% | 21 | | ShufflenetV2 | | | 22 | 23 | 24 | 25 | | SAR(SSDD) | SSD | RetinaNet 26 | |--------------|------------------------------------------------------------------------------|----------------------------------------------------------------------------| 27 | | MobilenetV2 | 90.4% | 91.7% | 28 | | ShufflenetV2 | | | 29 | 30 | 31 | | COCO2017 | SSD | RetinaNet 32 | |--------------|------------------------------------------------------------------------------|-----------------------------------------------------------------------------| 33 | | MobilenetV2 | | 31.7 | 34 | | ShufflenetV2 | | | 35 | 36 | ## Demo 37 | ![demo image](demo/V3.png) 38 | ![demo image](demo/V4.png) 39 | ![demo image](demo/1.png) 40 | 41 | ## TODO 42 | -------------------------------------------------------------------------------- /mmdet/ops/nms/setup.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | from distutils.core import setup, Extension 3 | 4 | import numpy as np 5 | from Cython.Build import cythonize 6 | from Cython.Distutils import build_ext 7 | 8 | # extensions 9 | ext_args = dict( 10 | include_dirs=[np.get_include()], 11 | language='c++', 12 | extra_compile_args={ 13 | 'cc': ['-Wno-unused-function', '-Wno-write-strings'], 14 | 'nvcc': ['-c', '--compiler-options', '-fPIC'], 15 | }, 16 | ) 17 | 18 | extensions = [ 19 | Extension('cpu_nms', ['cpu_nms.pyx'], **ext_args), 20 | Extension('cpu_soft_nms', ['cpu_soft_nms.pyx'], **ext_args), 21 | Extension('gpu_nms', ['gpu_nms.pyx', 'nms_kernel.cu'], **ext_args), 22 | ] 23 | 24 | 25 | def customize_compiler_for_nvcc(self): 26 | """inject deep into distutils to customize how the dispatch 27 | to cc/nvcc works. 28 | If you subclass UnixCCompiler, it's not trivial to get your subclass 29 | injected in, and still have the right customizations (i.e. 30 | distutils.sysconfig.customize_compiler) run on it. So instead of going 31 | the OO route, I have this. Note, it's kindof like a wierd functional 32 | subclassing going on.""" 33 | 34 | # tell the compiler it can processes .cu 35 | self.src_extensions.append('.cu') 36 | 37 | # save references to the default compiler_so and _comple methods 38 | default_compiler_so = self.compiler_so 39 | super = self._compile 40 | 41 | # now redefine the _compile method. This gets executed for each 42 | # object but distutils doesn't have the ability to change compilers 43 | # based on source extension: we add it. 44 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 45 | if osp.splitext(src)[1] == '.cu': 46 | # use the cuda for .cu files 47 | self.set_executable('compiler_so', 'nvcc') 48 | # use only a subset of the extra_postargs, which are 1-1 translated 49 | # from the extra_compile_args in the Extension class 50 | postargs = extra_postargs['nvcc'] 51 | else: 52 | postargs = extra_postargs['cc'] 53 | 54 | super(obj, src, ext, cc_args, postargs, pp_opts) 55 | # reset the default compiler_so, which we might have changed for cuda 56 | self.compiler_so = default_compiler_so 57 | 58 | # inject our redefined _compile method into the class 59 | self._compile = _compile 60 | 61 | 62 | # run the customize_compiler 63 | class custom_build_ext(build_ext): 64 | 65 | def build_extensions(self): 66 | customize_compiler_for_nvcc(self.compiler) 67 | build_ext.build_extensions(self) 68 | 69 | 70 | setup( 71 | name='nms', 72 | cmdclass={'build_ext': custom_build_ext}, 73 | ext_modules=cythonize(extensions), 74 | ) 75 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from ..transforms import bbox2roi 5 | 6 | 7 | class OHEMSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | context, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub, 17 | add_gt_as_proposals) 18 | self.bbox_roi_extractor = context.bbox_roi_extractor 19 | self.bbox_head = context.bbox_head 20 | 21 | def hard_mining(self, inds, num_expected, bboxes, labels, feats): 22 | with torch.no_grad(): 23 | rois = bbox2roi([bboxes]) 24 | bbox_feats = self.bbox_roi_extractor( 25 | feats[:self.bbox_roi_extractor.num_inputs], rois) 26 | cls_score, _ = self.bbox_head(bbox_feats) 27 | loss = self.bbox_head.loss( 28 | cls_score=cls_score, 29 | bbox_pred=None, 30 | labels=labels, 31 | label_weights=cls_score.new_ones(cls_score.size(0)), 32 | bbox_targets=None, 33 | bbox_weights=None, 34 | reduce=False)['loss_cls'] 35 | _, topk_loss_inds = loss.topk(num_expected) 36 | return inds[topk_loss_inds] 37 | 38 | def _sample_pos(self, 39 | assign_result, 40 | num_expected, 41 | bboxes=None, 42 | feats=None, 43 | **kwargs): 44 | # Sample some hard positive samples 45 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 46 | if pos_inds.numel() != 0: 47 | pos_inds = pos_inds.squeeze(1) 48 | if pos_inds.numel() <= num_expected: 49 | return pos_inds 50 | else: 51 | return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds], 52 | assign_result.labels[pos_inds], feats) 53 | 54 | def _sample_neg(self, 55 | assign_result, 56 | num_expected, 57 | bboxes=None, 58 | feats=None, 59 | **kwargs): 60 | # Sample some hard negative samples 61 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 62 | if neg_inds.numel() != 0: 63 | neg_inds = neg_inds.squeeze(1) 64 | if len(neg_inds) <= num_expected: 65 | return neg_inds 66 | else: 67 | return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds], 68 | assign_result.labels[neg_inds], feats) 69 | -------------------------------------------------------------------------------- /ssd_debug/assign_sample_debug.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | # 5 | # base_anchors = torch.Tensor([[-11., -11., 18., 18.], 6 | # [-17., -17., 24., 24.], 7 | # [-17., -7., 24., 14.], 8 | # [-7., -17., 14., 24.]]) 9 | # num_base_anchors = base_anchors.size(0) 10 | # print(num_base_anchors) 11 | # 12 | # def meshgrid(x, y, row_major=True): 13 | # xx = x.repeat(len(y)) 14 | # yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 15 | # if row_major: 16 | # return xx, yy 17 | # else: 18 | # return yy, xx 19 | # 20 | # featmap_size = (38, 38) 21 | # valid_size = (37, 37) 22 | # 23 | # feat_h, feat_w = featmap_size 24 | # valid_h, valid_w = valid_size 25 | # assert valid_h <= feat_h and valid_w <= feat_w 26 | # valid_x = torch.zeros(feat_w, dtype=torch.uint8) 27 | # valid_y = torch.zeros(feat_h, dtype=torch.uint8) 28 | # # print(valid_x) 29 | # valid_x[:valid_w] = 1 30 | # valid_y[:valid_h] = 1 31 | # valid_xx, valid_yy = meshgrid(valid_x, valid_y) 32 | # # print(valid_xx[:100]) 33 | # # print(valid_yy[-100:]) 34 | # valid = valid_xx & valid_yy 35 | # print(valid.shape) 36 | # valid = valid[:, None].expand( 37 | # valid.size(0), num_base_anchors).contiguous().view(-1) 38 | # print(valid[:200], valid.shape) 39 | 40 | 41 | # x = torch.randn(8,4) 42 | # print(x) 43 | # a = torch.tensor([0, 1, 1, 1, 0, 1, 1, 1], dtype=torch.uint8) 44 | # print(a.shape) 45 | # c = x[a,:] 46 | # print(c, c.shape) 47 | torch.manual_seed(1314) 48 | x = torch.rand(4, 8) 49 | print(x) 50 | assigned_gt_inds = x.new_full((8,), -1, dtype=torch.long) 51 | print(assigned_gt_inds) 52 | max_overlaps, argmax_overlaps = x.max(dim=0) 53 | print(max_overlaps, argmax_overlaps) 54 | assigned_gt_inds[(max_overlaps >= 0) & (max_overlaps < 0.5)] = 0 55 | print(assigned_gt_inds) 56 | pos_inds = max_overlaps >= 0.5 57 | print(pos_inds) 58 | print(assigned_gt_inds[pos_inds]) 59 | print(argmax_overlaps[pos_inds]) 60 | assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1 61 | print(assigned_gt_inds[pos_inds]) 62 | gt_max_overlaps, gt_argmax_overlaps = x.max(dim=1) 63 | print(gt_max_overlaps, gt_argmax_overlaps) 64 | print() 65 | for i in range(4): 66 | if gt_max_overlaps[i] >= 0.: 67 | assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1 68 | print(assigned_gt_inds) 69 | assigned_labels = assigned_gt_inds.new_zeros((8, )) 70 | print(assigned_labels) 71 | pos_inds = torch.nonzero(assigned_gt_inds > 0).squeeze(-1).unique() 72 | print(pos_inds, pos_inds.shape) 73 | gt_labels = torch.LongTensor([1, 1, 1, 1]) 74 | if pos_inds.numel() > 0: 75 | assigned_labels[pos_inds] = gt_labels[ 76 | assigned_gt_inds[pos_inds] - 1] 77 | print(assigned_labels) 78 | pos_assigned_gt_inds = assigned_gt_inds[pos_inds]-1 79 | print(pos_assigned_gt_inds) 80 | -------------------------------------------------------------------------------- /mmdet/models/detectors/single_stage.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from .base import BaseDetector 4 | from .. import builder 5 | from ..registry import DETECTORS 6 | from mmdet.core import bbox2result 7 | 8 | 9 | @DETECTORS.register_module 10 | class SingleStageDetector(BaseDetector): 11 | 12 | def __init__(self, 13 | backbone, 14 | neck=None, 15 | bbox_head=None, 16 | train_cfg=None, 17 | test_cfg=None, 18 | pretrained=None): 19 | super(SingleStageDetector, self).__init__() 20 | # 相当于 SSDVGG(cfg.model.backbone) 21 | # ResNet(cfg.model.backbone) 22 | self.backbone = builder.build_backbone(backbone) 23 | if neck is not None: 24 | # 相当于对应NECK(cfg.model.neck) 25 | # FPN(cfg.model.neck) 26 | self.neck = builder.build_neck(neck) 27 | # 相当于 SSDHead(cfg.model.bbox_head) 28 | # RetinaHead(cfg.model.bbox_head) 29 | self.bbox_head = builder.build_head(bbox_head) 30 | self.train_cfg = train_cfg 31 | self.test_cfg = test_cfg 32 | self.init_weights(pretrained=pretrained) 33 | 34 | def init_weights(self, pretrained=None): 35 | super(SingleStageDetector, self).init_weights(pretrained) 36 | self.backbone.init_weights(pretrained=pretrained) 37 | if self.with_neck: 38 | if isinstance(self.neck, nn.Sequential): 39 | for m in self.neck: 40 | m.init_weights() 41 | else: 42 | self.neck.init_weights() 43 | self.bbox_head.init_weights() 44 | 45 | def extract_feat(self, img): 46 | x = self.backbone(img) 47 | if self.with_neck: 48 | x = self.neck(x) 49 | return x 50 | 51 | def forward_train(self, 52 | img, 53 | img_metas, 54 | gt_bboxes, 55 | gt_labels, 56 | gt_bboxes_ignore=None): 57 | x = self.extract_feat(img) 58 | outs = self.bbox_head(x) 59 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg) 60 | losses = self.bbox_head.loss( 61 | *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 62 | return losses 63 | 64 | def simple_test(self, img, img_meta, rescale=False): 65 | x = self.extract_feat(img) 66 | outs = self.bbox_head(x) 67 | bbox_inputs = outs + (img_meta, self.test_cfg, rescale) 68 | bbox_list = self.bbox_head.get_bboxes(*bbox_inputs) 69 | bbox_results = [ 70 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) 71 | for det_bboxes, det_labels in bbox_list 72 | ] 73 | return bbox_results[0] 74 | 75 | def aug_test(self, imgs, img_metas, rescale=False): 76 | raise NotImplementedError 77 | 78 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class IoUBalancedNegSampler(RandomSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | hard_thr=0.1, 13 | hard_fraction=0.5, 14 | **kwargs): 15 | super(IoUBalancedNegSampler, self).__init__(num, pos_fraction, 16 | **kwargs) 17 | assert hard_thr > 0 18 | assert 0 < hard_fraction < 1 19 | self.hard_thr = hard_thr 20 | self.hard_fraction = hard_fraction 21 | 22 | def _sample_neg(self, assign_result, num_expected, **kwargs): 23 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 24 | if neg_inds.numel() != 0: 25 | neg_inds = neg_inds.squeeze(1) 26 | if len(neg_inds) <= num_expected: 27 | return neg_inds 28 | else: 29 | max_overlaps = assign_result.max_overlaps.cpu().numpy() 30 | # balance sampling for negative samples 31 | neg_set = set(neg_inds.cpu().numpy()) 32 | easy_set = set( 33 | np.where( 34 | np.logical_and(max_overlaps >= 0, 35 | max_overlaps < self.hard_thr))[0]) 36 | hard_set = set(np.where(max_overlaps >= self.hard_thr)[0]) 37 | easy_neg_inds = list(easy_set & neg_set) 38 | hard_neg_inds = list(hard_set & neg_set) 39 | 40 | num_expected_hard = int(num_expected * self.hard_fraction) 41 | if len(hard_neg_inds) > num_expected_hard: 42 | sampled_hard_inds = self.random_choice(hard_neg_inds, 43 | num_expected_hard) 44 | else: 45 | sampled_hard_inds = np.array(hard_neg_inds, dtype=np.int) 46 | num_expected_easy = num_expected - len(sampled_hard_inds) 47 | if len(easy_neg_inds) > num_expected_easy: 48 | sampled_easy_inds = self.random_choice(easy_neg_inds, 49 | num_expected_easy) 50 | else: 51 | sampled_easy_inds = np.array(easy_neg_inds, dtype=np.int) 52 | sampled_inds = np.concatenate((sampled_easy_inds, 53 | sampled_hard_inds)) 54 | if len(sampled_inds) < num_expected: 55 | num_extra = num_expected - len(sampled_inds) 56 | extra_inds = np.array(list(neg_set - set(sampled_inds))) 57 | if len(extra_inds) > num_extra: 58 | extra_inds = self.random_choice(extra_inds, num_extra) 59 | sampled_inds = np.concatenate((sampled_inds, extra_inds)) 60 | sampled_inds = torch.from_numpy(sampled_inds).long().to( 61 | assign_result.gt_inds.device) 62 | return sampled_inds 63 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | class BaseSampler(metaclass=ABCMeta): 9 | 10 | def __init__(self, 11 | num, 12 | pos_fraction, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | self.num = num 17 | self.pos_fraction = pos_fraction 18 | self.neg_pos_ub = neg_pos_ub 19 | self.add_gt_as_proposals = add_gt_as_proposals 20 | self.pos_sampler = self 21 | self.neg_sampler = self 22 | 23 | @abstractmethod 24 | def _sample_pos(self, assign_result, num_expected, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def _sample_neg(self, assign_result, num_expected, **kwargs): 29 | pass 30 | 31 | def sample(self, 32 | assign_result, 33 | bboxes, 34 | gt_bboxes, 35 | gt_labels=None, 36 | **kwargs): 37 | """Sample positive and negative bboxes. 38 | 39 | This is a simple implementation of bbox sampling given candidates, 40 | assigning results and ground truth bboxes. 41 | 42 | Args: 43 | assign_result (:obj:`AssignResult`): Bbox assigning results. 44 | bboxes (Tensor): Boxes to be sampled from. 45 | gt_bboxes (Tensor): Ground truth bboxes. 46 | gt_labels (Tensor, optional): Class labels of ground truth bboxes. 47 | 48 | Returns: 49 | :obj:`SamplingResult`: Sampling result. 50 | """ 51 | bboxes = bboxes[:, :4] 52 | 53 | gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8) 54 | if self.add_gt_as_proposals: 55 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 56 | assign_result.add_gt_(gt_labels) 57 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 58 | gt_flags = torch.cat([gt_ones, gt_flags]) 59 | 60 | num_expected_pos = int(self.num * self.pos_fraction) 61 | pos_inds = self.pos_sampler._sample_pos( 62 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 63 | # We found that sampled indices have duplicated items occasionally. 64 | # (may be a bug of PyTorch) 65 | # unique函数去除其中重复的元素 66 | pos_inds = pos_inds.unique() 67 | # numel()返回数组中元素的个数 68 | num_sampled_pos = pos_inds.numel() 69 | num_expected_neg = self.num - num_sampled_pos 70 | if self.neg_pos_ub >= 0: 71 | _pos = max(1, num_sampled_pos) 72 | neg_upper_bound = int(self.neg_pos_ub * _pos) 73 | if num_expected_neg > neg_upper_bound: 74 | num_expected_neg = neg_upper_bound 75 | neg_inds = self.neg_sampler._sample_neg( 76 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs) 77 | neg_inds = neg_inds.unique() 78 | 79 | return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 80 | assign_result, gt_flags) 81 | -------------------------------------------------------------------------------- /mmdet/datasets/xml_style.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | from .custom import CustomDataset 8 | 9 | 10 | class XMLDataset(CustomDataset): 11 | 12 | def __init__(self, **kwargs): 13 | super(XMLDataset, self).__init__(**kwargs) 14 | # self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)} 15 | self.cat2label = {cat: 1 for i, cat in enumerate(self.CLASSES)} 16 | 17 | def load_annotations(self, ann_file): 18 | img_infos = [] 19 | img_ids = mmcv.list_from_file(ann_file) 20 | for img_id in img_ids: 21 | filename = 'JPEGImages/{}.jpg'.format(img_id) 22 | xml_path = osp.join(self.img_prefix, 'Annotations', 23 | '{}.xml'.format(img_id)) 24 | tree = ET.parse(xml_path) 25 | root = tree.getroot() 26 | size = root.find('size') 27 | width = int(size.find('width').text) 28 | height = int(size.find('height').text) 29 | img_infos.append( 30 | dict(id=img_id, filename=filename, width=width, height=height)) 31 | return img_infos 32 | 33 | def get_ann_info(self, idx): 34 | img_id = self.img_infos[idx]['id'] 35 | xml_path = osp.join(self.img_prefix, 'Annotations', 36 | '{}.xml'.format(img_id)) 37 | tree = ET.parse(xml_path) 38 | root = tree.getroot() 39 | # gt [xmin, ymin, xmax, ymax] 40 | bboxes = [] 41 | # gt 类别对应的数字 42 | labels = [] 43 | # difficult gt [xmin, ymin, xmax, ymax] 44 | bboxes_ignore = [] 45 | # difficult gt 类别对应的数字 46 | labels_ignore = [] 47 | for obj in root.findall('object'): 48 | name = obj.find('name').text 49 | label = self.cat2label[name] 50 | difficult = int(obj.find('difficult').text) 51 | bnd_box = obj.find('bndbox') 52 | bbox = [ 53 | int(bnd_box.find('xmin').text), 54 | int(bnd_box.find('ymin').text), 55 | int(bnd_box.find('xmax').text), 56 | int(bnd_box.find('ymax').text) 57 | ] 58 | if difficult: 59 | bboxes_ignore.append(bbox) 60 | labels_ignore.append(label) 61 | else: 62 | bboxes.append(bbox) 63 | labels.append(label) 64 | if not bboxes: 65 | bboxes = np.zeros((0, 4)) 66 | labels = np.zeros((0, )) 67 | else: 68 | bboxes = np.array(bboxes, ndmin=2) - 1 69 | labels = np.array(labels) 70 | if not bboxes_ignore: 71 | bboxes_ignore = np.zeros((0, 4)) 72 | labels_ignore = np.zeros((0, )) 73 | else: 74 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 75 | labels_ignore = np.array(labels_ignore) 76 | ann = dict( 77 | bboxes=bboxes.astype(np.float32), 78 | labels=labels.astype(np.int64), 79 | bboxes_ignore=bboxes_ignore.astype(np.float32), 80 | labels_ignore=labels_ignore.astype(np.int64)) 81 | return ann 82 | -------------------------------------------------------------------------------- /mmdet/models/utils/conv_module.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import torch.nn as nn 4 | from mmcv.cnn import kaiming_init, constant_init 5 | 6 | from .norm import build_norm_layer 7 | 8 | 9 | class ConvModule(nn.Module): 10 | 11 | def __init__(self, 12 | in_channels, 13 | out_channels, 14 | kernel_size, 15 | stride=1, 16 | padding=0, 17 | dilation=1, 18 | groups=1, 19 | bias=True, 20 | normalize=None, 21 | activation='relu', 22 | inplace=True, 23 | activate_last=True): 24 | super(ConvModule, self).__init__() 25 | self.with_norm = normalize is not None 26 | self.with_activatation = activation is not None 27 | self.with_bias = bias 28 | self.activation = activation 29 | self.activate_last = activate_last 30 | 31 | if self.with_norm and self.with_bias: 32 | warnings.warn('ConvModule has norm and bias at the same time') 33 | 34 | self.conv = nn.Conv2d( 35 | in_channels, 36 | out_channels, 37 | kernel_size, 38 | stride, 39 | padding, 40 | dilation, 41 | groups, 42 | bias=bias) 43 | 44 | self.in_channels = self.conv.in_channels 45 | self.out_channels = self.conv.out_channels 46 | self.kernel_size = self.conv.kernel_size 47 | self.stride = self.conv.stride 48 | self.padding = self.conv.padding 49 | self.dilation = self.conv.dilation 50 | self.transposed = self.conv.transposed 51 | self.output_padding = self.conv.output_padding 52 | self.groups = self.conv.groups 53 | 54 | if self.with_norm: 55 | norm_channels = out_channels if self.activate_last else in_channels 56 | self.norm_name, norm = build_norm_layer(normalize, norm_channels) 57 | self.add_module(self.norm_name, norm) 58 | 59 | if self.with_activatation: 60 | assert activation in ['relu'], 'Only ReLU supported.' 61 | if self.activation == 'relu': 62 | self.activate = nn.ReLU(inplace=inplace) 63 | 64 | # Default using msra init 65 | self.init_weights() 66 | 67 | @property 68 | def norm(self): 69 | return getattr(self, self.norm_name) 70 | 71 | def init_weights(self): 72 | nonlinearity = 'relu' if self.activation is None else self.activation 73 | kaiming_init(self.conv, nonlinearity=nonlinearity) 74 | if self.with_norm: 75 | constant_init(self.norm, 1, bias=0) 76 | 77 | def forward(self, x, activate=True, norm=True): 78 | if self.activate_last: 79 | x = self.conv(x) 80 | if norm and self.with_norm: 81 | x = self.norm(x) 82 | if activate and self.with_activatation: 83 | x = self.activate(x) 84 | else: 85 | if norm and self.with_norm: 86 | x = self.norm(x) 87 | if activate and self.with_activatation: 88 | x = self.activate(x) 89 | x = self.conv(x) 90 | return x 91 | -------------------------------------------------------------------------------- /mmdet/core/bbox/bbox_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .transforms import bbox2delta 4 | from ..utils import multi_apply 5 | 6 | 7 | def bbox_target(pos_bboxes_list, 8 | neg_bboxes_list, 9 | pos_gt_bboxes_list, 10 | pos_gt_labels_list, 11 | cfg, 12 | reg_classes=1, 13 | target_means=[.0, .0, .0, .0], 14 | target_stds=[1.0, 1.0, 1.0, 1.0], 15 | concat=True): 16 | labels, label_weights, bbox_targets, bbox_weights = multi_apply( 17 | bbox_target_single, 18 | pos_bboxes_list, 19 | neg_bboxes_list, 20 | pos_gt_bboxes_list, 21 | pos_gt_labels_list, 22 | cfg=cfg, 23 | reg_classes=reg_classes, 24 | target_means=target_means, 25 | target_stds=target_stds) 26 | 27 | if concat: 28 | labels = torch.cat(labels, 0) 29 | label_weights = torch.cat(label_weights, 0) 30 | bbox_targets = torch.cat(bbox_targets, 0) 31 | bbox_weights = torch.cat(bbox_weights, 0) 32 | return labels, label_weights, bbox_targets, bbox_weights 33 | 34 | 35 | def bbox_target_single(pos_bboxes, 36 | neg_bboxes, 37 | pos_gt_bboxes, 38 | pos_gt_labels, 39 | cfg, 40 | reg_classes=1, 41 | target_means=[.0, .0, .0, .0], 42 | target_stds=[1.0, 1.0, 1.0, 1.0]): 43 | num_pos = pos_bboxes.size(0) 44 | num_neg = neg_bboxes.size(0) 45 | num_samples = num_pos + num_neg 46 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long) 47 | label_weights = pos_bboxes.new_zeros(num_samples) 48 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4) 49 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4) 50 | if num_pos > 0: 51 | labels[:num_pos] = pos_gt_labels 52 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 53 | label_weights[:num_pos] = pos_weight 54 | pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means, 55 | target_stds) 56 | bbox_targets[:num_pos, :] = pos_bbox_targets 57 | bbox_weights[:num_pos, :] = 1 58 | if num_neg > 0: 59 | label_weights[-num_neg:] = 1.0 60 | if reg_classes > 1: 61 | bbox_targets, bbox_weights = expand_target(bbox_targets, bbox_weights, 62 | labels, reg_classes) 63 | 64 | return labels, label_weights, bbox_targets, bbox_weights 65 | 66 | 67 | def expand_target(bbox_targets, bbox_weights, labels, num_classes): 68 | bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0), 69 | 4 * num_classes)) 70 | bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0), 71 | 4 * num_classes)) 72 | for i in torch.nonzero(labels > 0).squeeze(-1): 73 | start, end = labels[i] * 4, (labels[i] + 1) * 4 74 | bbox_targets_expand[i, start:end] = bbox_targets[i, :] 75 | bbox_weights_expand[i, start:end] = bbox_weights[i, :] 76 | return bbox_targets_expand, bbox_weights_expand 77 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/src/roi_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int channels, 8 | const int height, const int width, const int num_rois, 9 | const int pooled_h, const int pooled_w, 10 | at::Tensor output, at::Tensor argmax); 11 | 12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 13 | const at::Tensor argmax, const float spatial_scale, 14 | const int batch_size, const int channels, 15 | const int height, const int width, 16 | const int num_rois, const int pooled_h, 17 | const int pooled_w, at::Tensor bottom_grad); 18 | 19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 20 | #define CHECK_CONTIGUOUS(x) \ 21 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 22 | #define CHECK_INPUT(x) \ 23 | CHECK_CUDA(x); \ 24 | CHECK_CONTIGUOUS(x) 25 | 26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, 27 | int pooled_height, int pooled_width, 28 | float spatial_scale, at::Tensor output, 29 | at::Tensor argmax) { 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(argmax); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int channels = features.size(1); 45 | int height = features.size(2); 46 | int width = features.size(3); 47 | 48 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, 49 | num_rois, pooled_height, pooled_width, output, argmax); 50 | 51 | return 1; 52 | } 53 | 54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, 55 | at::Tensor argmax, float spatial_scale, 56 | at::Tensor bottom_grad) { 57 | CHECK_INPUT(top_grad); 58 | CHECK_INPUT(rois); 59 | CHECK_INPUT(argmax); 60 | CHECK_INPUT(bottom_grad); 61 | 62 | int pooled_height = top_grad.size(2); 63 | int pooled_width = top_grad.size(3); 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | 67 | if (size_rois != 5) { 68 | printf("wrong roi size\n"); 69 | return 0; 70 | } 71 | int batch_size = bottom_grad.size(0); 72 | int channels = bottom_grad.size(1); 73 | int height = bottom_grad.size(2); 74 | int width = bottom_grad.size(3); 75 | 76 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, 77 | channels, height, width, num_rois, pooled_height, 78 | pooled_width, bottom_grad); 79 | 80 | return 1; 81 | } 82 | 83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 84 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); 85 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); 86 | } 87 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int sample_num, 8 | const int channels, const int height, 9 | const int width, const int num_rois, 10 | const int pooled_height, const int pooled_width, 11 | at::Tensor output); 12 | 13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 14 | const float spatial_scale, const int sample_num, 15 | const int channels, const int height, 16 | const int width, const int num_rois, 17 | const int pooled_height, const int pooled_width, 18 | at::Tensor bottom_grad); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, 28 | int pooled_height, int pooled_width, 29 | float spatial_scale, int sample_num, 30 | at::Tensor output) { 31 | CHECK_INPUT(features); 32 | CHECK_INPUT(rois); 33 | CHECK_INPUT(output); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int num_channels = features.size(1); 45 | int data_height = features.size(2); 46 | int data_width = features.size(3); 47 | 48 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, 49 | num_channels, data_height, data_width, num_rois, 50 | pooled_height, pooled_width, output); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | int pooled_height, int pooled_width, 57 | float spatial_scale, int sample_num, 58 | at::Tensor bottom_grad) { 59 | CHECK_INPUT(top_grad); 60 | CHECK_INPUT(rois); 61 | CHECK_INPUT(bottom_grad); 62 | 63 | // Number of ROIs 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | if (size_rois != 5) { 67 | printf("wrong roi size\n"); 68 | return 0; 69 | } 70 | 71 | int num_channels = bottom_grad.size(1); 72 | int data_height = bottom_grad.size(2); 73 | int data_width = bottom_grad.size(3); 74 | 75 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, 76 | num_channels, data_height, data_width, num_rois, 77 | pooled_height, pooled_width, bottom_grad); 78 | 79 | return 1; 80 | } 81 | 82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 83 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); 84 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); 85 | } 86 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/retina_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | from mmcv.cnn import normal_init 4 | 5 | from .anchor_head import AnchorHead 6 | from ..registry import HEADS 7 | from ..utils import bias_init_with_prob 8 | 9 | 10 | @HEADS.register_module 11 | class RetinaHead(AnchorHead): 12 | # num_classes=2, 13 | # in_channels=256, 14 | # stacked_convs=4, 15 | # feat_channels=256, 16 | # octave_base_scale=4, 17 | # scales_per_octave=3, 18 | # anchor_ratios=[0.5, 1.0, 2.0], 19 | # anchor_strides=[8, 16, 32, 64, 128], 20 | # target_means=[.0, .0, .0, .0], 21 | # target_stds=[1.0, 1.0, 1.0, 1.0] 22 | def __init__(self, 23 | num_classes, 24 | in_channels, 25 | stacked_convs=4, 26 | octave_base_scale=4, 27 | scales_per_octave=3, 28 | **kwargs): 29 | self.stacked_convs = stacked_convs # 4 30 | self.octave_base_scale = octave_base_scale # 4 31 | self.scales_per_octave = scales_per_octave # 3 32 | # octave_scales = [1, 2^(1/3), 2^(2/3)] 33 | octave_scales = np.array( 34 | [2**(i / scales_per_octave) for i in range(scales_per_octave)]) 35 | # anchor_scales=[4, 4*2^(1/3), 4*2^(2/3)] 36 | anchor_scales = octave_scales * octave_base_scale 37 | super(RetinaHead, self).__init__( 38 | num_classes, 39 | in_channels, 40 | anchor_scales=anchor_scales, 41 | use_sigmoid_cls=True, 42 | use_focal_loss=True, 43 | **kwargs) 44 | 45 | def _init_layers(self): 46 | self.relu = nn.ReLU(inplace=True) 47 | self.cls_convs = nn.ModuleList() 48 | self.reg_convs = nn.ModuleList() 49 | # 俩组并行的 subnet : 由4个 conv(256, 256, 3, s=1, p=1) 组成 50 | # fro i in range(4) 51 | for i in range(self.stacked_convs): 52 | # self.in_channels = self.feat_channels = 256 53 | chn = self.in_channels if i == 0 else self.feat_channels 54 | self.cls_convs.append( 55 | nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1)) 56 | self.reg_convs.append( 57 | nn.Conv2d(chn, self.feat_channels, 3, stride=1, padding=1)) 58 | self.retina_cls = nn.Conv2d( 59 | self.feat_channels, 60 | self.num_anchors * self.cls_out_channels, 61 | 3, 62 | padding=1) 63 | self.retina_reg = nn.Conv2d( 64 | self.feat_channels, self.num_anchors * 4, 3, padding=1) 65 | 66 | def init_weights(self): 67 | for m in self.cls_convs: 68 | normal_init(m, std=0.01) 69 | for m in self.reg_convs: 70 | normal_init(m, std=0.01) 71 | bias_cls = bias_init_with_prob(0.01) 72 | normal_init(self.retina_cls, std=0.01, bias=bias_cls) 73 | normal_init(self.retina_reg, std=0.01) 74 | 75 | def forward_single(self, x): 76 | cls_feat = x 77 | reg_feat = x 78 | for cls_conv in self.cls_convs: 79 | cls_feat = self.relu(cls_conv(cls_feat)) 80 | for reg_conv in self.reg_convs: 81 | reg_feat = self.relu(reg_conv(reg_feat)) 82 | cls_score = self.retina_cls(cls_feat) 83 | bbox_pred = self.retina_reg(reg_feat) 84 | return cls_score, bbox_pred 85 | -------------------------------------------------------------------------------- /RetinaNet_debug/compute_Receptive_field.py: -------------------------------------------------------------------------------- 1 | net_struct = { 2 | 'alexnet': {'net': [[11, 4, 0], [3, 2, 0], [5, 1, 2], [3, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1], [3, 2, 0]], 3 | 'name': ['conv1', 'pool1', 'conv2', 'pool2', 'conv3', 'conv4', 'conv5', 'pool5']}, 4 | 'vgg16': {'net': [[3, 1, 1], [3, 1, 1], [2, 2, 0], [3, 1, 1], [3, 1, 1], [2, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1], 5 | [2, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1], [2, 2, 0], [3, 1, 1], [3, 1, 1], [3, 1, 1], 6 | [2, 2, 0]], 7 | 'name': ['conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2', 8 | 'conv3_3', 'pool3', 'conv4_1', 'conv4_2', 'conv4_3', 'pool4', 'conv5_1', 'conv5_2', 'conv5_3', 9 | 'pool5']}, 10 | 'resnet50': {'net': [[7,2,3], [3,2,1], [1,1,0], [3,1,1], [1,1,0], [1,1,0], [3,1,1], [1,1,0], [1,1,0], [3,1,1], [1,1,0], 11 | [1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0], 12 | [1, 1, 0], [3, 2, 1], [1, 1, 0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0], 13 | [1, 1, 0], [3, 2, 1], [1, 1, 0],[1,1,0], [3,2,1], [1,1,0],[1,1,0], [3,2,1], [1,1,0], [3,2,1], [3,2,1]], 14 | 'name':['conv1', 'pool', 'conv1_1', 'conv1_2', 'conv1_3', 'conv2_1', 'conv2_2', 'conv2_3', 'conv3_1', 'conv3_2', 'conv3_3', 15 | 'conv4_1', 'conv4_2', 'conv4_3', 'conv5_1', 'conv5_2', 'conv5_3', 'conv6_1', 'conv6_2', 'conv6_3', 'conv7_1', 'conv7_2', 'conv7_3', 16 | 'conv8_1', 'conv8_2', 'conv8_3', 'conv9_1', 'conv9_2', 'conv9_3', 'conv10_1', 'conv10_2', 'conv10_3', 'conv11_1', 'conv11_2', 'conv11_3', 'conv12_1', 'conv12_2', 'conv12_3', 'conv13_1', 'conv13_2', 'conv13_3', 17 | 'conv14_1', 'conv14_2', 'conv14_3', 'conv15_1', 'conv15_2', 'conv15_3', 'conv16_1', 'conv16_2', 'conv16_3', 'conv17', 'conv18' 18 | ]}, 19 | 20 | 'zf-5': {'net': [[7, 2, 3], [3, 2, 1], [5, 2, 2], [3, 2, 1], [3, 1, 1], [3, 1, 1], [3, 1, 1]], 21 | 'name': ['conv1', 'pool1', 'conv2', 'pool2', 'conv3', 'conv4', 'conv5']}} 22 | imsize = 640 23 | 24 | 25 | def outFromIn(isz, net, layernum): 26 | totstride = 1 27 | insize = isz 28 | for layer in range(layernum): 29 | fsize, stride, pad = net[layer] 30 | outsize = (insize - fsize + 2 * pad) / stride + 1 31 | insize = outsize 32 | totstride = totstride * stride 33 | return outsize, totstride 34 | 35 | 36 | def inFromOut(net, layernum): 37 | RF = 1 38 | for layer in reversed(range(layernum)): 39 | fsize, stride, pad = net[layer] 40 | RF = ((RF - 1) * stride) + fsize 41 | return RF 42 | 43 | 44 | if __name__ == '__main__': 45 | print("layer output sizes given image = %dx%d" % (imsize, imsize)) 46 | 47 | for net in net_struct.keys(): 48 | print('************net structrue name is %s**************' % net) 49 | for i in range(len(net_struct[net]['net'])): 50 | p = outFromIn(imsize, net_struct[net]['net'], i + 1) 51 | rf = inFromOut(net_struct[net]['net'], i + 1) 52 | print("Layer Name = %s, Output size = %3d, Stride = % 3d, RF size = %3d" % ( 53 | net_struct[net]['name'][i], p[0], p[1], rf)) -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/single_level.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from mmdet import ops 7 | from ..registry import ROI_EXTRACTORS 8 | 9 | 10 | @ROI_EXTRACTORS.register_module 11 | class SingleRoIExtractor(nn.Module): 12 | """Extract RoI features from a single level feature map. 13 | 14 | If there are mulitple input feature levels, each RoI is mapped to a level 15 | according to its scale. 16 | 17 | Args: 18 | roi_layer (dict): Specify RoI layer type and arguments. 19 | out_channels (int): Output channels of RoI layers. 20 | featmap_strides (int): Strides of input feature maps. 21 | finest_scale (int): Scale threshold of mapping to level 0. 22 | """ 23 | 24 | def __init__(self, 25 | roi_layer, 26 | out_channels, 27 | featmap_strides, 28 | finest_scale=56): 29 | super(SingleRoIExtractor, self).__init__() 30 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) 31 | self.out_channels = out_channels 32 | self.featmap_strides = featmap_strides 33 | self.finest_scale = finest_scale 34 | 35 | @property 36 | def num_inputs(self): 37 | """int: Input feature map levels.""" 38 | return len(self.featmap_strides) 39 | 40 | def init_weights(self): 41 | pass 42 | 43 | def build_roi_layers(self, layer_cfg, featmap_strides): 44 | cfg = layer_cfg.copy() 45 | layer_type = cfg.pop('type') 46 | assert hasattr(ops, layer_type) 47 | layer_cls = getattr(ops, layer_type) 48 | roi_layers = nn.ModuleList( 49 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]) 50 | return roi_layers 51 | 52 | def map_roi_levels(self, rois, num_levels): 53 | """Map rois to corresponding feature levels by scales. 54 | 55 | - scale < finest_scale: level 0 56 | - finest_scale <= scale < finest_scale * 2: level 1 57 | - finest_scale * 2 <= scale < finest_scale * 4: level 2 58 | - scale >= finest_scale * 4: level 3 59 | 60 | Args: 61 | rois (Tensor): Input RoIs, shape (k, 5). 62 | num_levels (int): Total level number. 63 | 64 | Returns: 65 | Tensor: Level index (0-based) of each RoI, shape (k, ) 66 | """ 67 | scale = torch.sqrt( 68 | (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) 69 | target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6)) 70 | target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() 71 | return target_lvls 72 | 73 | def forward(self, feats, rois): 74 | if len(feats) == 1: 75 | return self.roi_layers[0](feats[0], rois) 76 | 77 | out_size = self.roi_layers[0].out_size 78 | num_levels = len(feats) 79 | target_lvls = self.map_roi_levels(rois, num_levels) 80 | roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels, 81 | out_size, out_size).fill_(0) 82 | for i in range(num_levels): 83 | inds = target_lvls == i 84 | if inds.any(): 85 | rois_ = rois[inds, :] 86 | roi_feats_t = self.roi_layers[i](feats[i], rois_) 87 | roi_feats[inds] += roi_feats_t 88 | return roi_feats 89 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import argparse 4 | from mmcv import Config 5 | 6 | from mmdet import __version__ 7 | from mmdet.datasets import get_dataset 8 | from mmdet.apis import (train_detector, init_dist, get_root_logger, 9 | set_random_seed) 10 | from mmdet.models import build_detector 11 | import torch 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Train a detector') 16 | parser.add_argument('config', help='train config file path') 17 | parser.add_argument('--work_dir', help='the dir to save logs and models') 18 | parser.add_argument( 19 | '--resume_from', help='the checkpoint file to resume from') 20 | parser.add_argument( 21 | '--validate', 22 | action='store_true', 23 | help='whether to evaluate the checkpoint during training') 24 | parser.add_argument( 25 | '--gpus', 26 | type=int, 27 | default=1, 28 | help='number of gpus to use ' 29 | '(only applicable to non-distributed training)') 30 | parser.add_argument('--seed', type=int, default=None, help='random seed') 31 | parser.add_argument( 32 | '--launcher', 33 | choices=['none', 'pytorch', 'slurm', 'mpi'], 34 | default='none', 35 | help='job launcher') 36 | parser.add_argument('--local_rank', type=int, default=0) 37 | args = parser.parse_args() 38 | 39 | return args 40 | 41 | 42 | def main(): 43 | args = parse_args() 44 | 45 | cfg = Config.fromfile(args.config) 46 | # set cudnn_benchmark 47 | if cfg.get('cudnn_benchmark', False): 48 | torch.backends.cudnn.benchmark = True 49 | # update configs according to CLI args 50 | if args.work_dir is not None: 51 | cfg.work_dir = args.work_dir 52 | if args.resume_from is not None: 53 | cfg.resume_from = args.resume_from 54 | cfg.gpus = args.gpus 55 | if cfg.checkpoint_config is not None: 56 | # save mmdet version in checkpoints as meta data 57 | cfg.checkpoint_config.meta = dict( 58 | mmdet_version=__version__, config=cfg.text) 59 | 60 | # init distributed env first, since logger depends on the dist info. 61 | if args.launcher == 'none': 62 | distributed = False 63 | else: 64 | distributed = True 65 | init_dist(args.launcher, **cfg.dist_params) 66 | 67 | # init logger before other steps 68 | logger = get_root_logger(cfg.log_level) 69 | logger.info('Distributed training: {}'.format(distributed)) 70 | 71 | # set random seeds 72 | if args.seed is not None: 73 | logger.info('Set random seed to {}'.format(args.seed)) 74 | set_random_seed(args.seed) 75 | # 首先要先注册 BACKBONES、 NECKS、 ROI_EXTRACTORS、 HEADS、 DETECTORS、 76 | # 然后 BACKBONES.register_module(class SSDVGG) @HEADS.register_module(class AnchorHead) 77 | # @HEADS.register_module(class SSDHead) @DETECTORS.register_module(class SingleStageDetector) 78 | # 最后 build_detector() 相当于SingleStageDetector(**args) 79 | 80 | model = build_detector( 81 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) 82 | 83 | train_dataset = get_dataset(cfg.data.train) 84 | train_detector( 85 | model, 86 | train_dataset, 87 | cfg, 88 | distributed=distributed, 89 | validate=args.validate, 90 | logger=logger) 91 | 92 | 93 | if __name__ == '__main__': 94 | main() 95 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import time 4 | from setuptools import find_packages, setup 5 | 6 | 7 | def readme(): 8 | with open('README.md', encoding='utf-8') as f: 9 | content = f.read() 10 | return content 11 | 12 | 13 | MAJOR = 0 14 | MINOR = 6 15 | PATCH = 'rc0' 16 | SUFFIX = '' 17 | SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX) 18 | 19 | version_file = 'mmdet/version.py' 20 | 21 | 22 | def get_git_hash(): 23 | 24 | def _minimal_ext_cmd(cmd): 25 | # construct minimal environment 26 | env = {} 27 | for k in ['SYSTEMROOT', 'PATH', 'HOME']: 28 | v = os.environ.get(k) 29 | if v is not None: 30 | env[k] = v 31 | # LANGUAGE is used on win32 32 | env['LANGUAGE'] = 'C' 33 | env['LANG'] = 'C' 34 | env['LC_ALL'] = 'C' 35 | out = subprocess.Popen( 36 | cmd, stdout=subprocess.PIPE, env=env).communicate()[0] 37 | return out 38 | 39 | try: 40 | out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) 41 | sha = out.strip().decode('ascii') 42 | except OSError: 43 | sha = 'unknown' 44 | 45 | return sha 46 | 47 | 48 | def get_hash(): 49 | if os.path.exists('.git'): 50 | sha = get_git_hash()[:7] 51 | elif os.path.exists(version_file): 52 | try: 53 | from mmdet.version import __version__ 54 | sha = __version__.split('+')[-1] 55 | except ImportError: 56 | raise ImportError('Unable to get git version') 57 | else: 58 | sha = 'unknown' 59 | 60 | return sha 61 | 62 | 63 | def write_version_py(): 64 | content = """# GENERATED VERSION FILE 65 | # TIME: {} 66 | 67 | __version__ = '{}' 68 | short_version = '{}' 69 | """ 70 | sha = get_hash() 71 | VERSION = SHORT_VERSION + '+' + sha 72 | 73 | with open(version_file, 'w') as f: 74 | f.write(content.format(time.asctime(), VERSION, SHORT_VERSION)) 75 | 76 | 77 | def get_version(): 78 | with open(version_file, 'r') as f: 79 | exec(compile(f.read(), version_file, 'exec')) 80 | return locals()['__version__'] 81 | 82 | 83 | if __name__ == '__main__': 84 | write_version_py() 85 | setup( 86 | name='mmdet', 87 | version=get_version(), 88 | description='Open MMLab Detection Toolbox', 89 | long_description=readme(), 90 | keywords='computer vision, object detection', 91 | url='https://github.com/open-mmlab/mmdetection', 92 | packages=find_packages(exclude=('configs', 'tools', 'demo')), 93 | package_data={'mmdet.ops': ['*/*.so']}, 94 | classifiers=[ 95 | 'Development Status :: 4 - Beta', 96 | 'License :: OSI Approved :: Apache Software License', 97 | 'Operating System :: OS Independent', 98 | 'Programming Language :: Python :: 2', 99 | 'Programming Language :: Python :: 2.7', 100 | 'Programming Language :: Python :: 3', 101 | 'Programming Language :: Python :: 3.4', 102 | 'Programming Language :: Python :: 3.5', 103 | 'Programming Language :: Python :: 3.6', 104 | ], 105 | license='GPLv3', 106 | setup_requires=['pytest-runner'], 107 | tests_require=['pytest'], 108 | install_requires=[ 109 | 'mmcv', 'numpy', 'matplotlib', 'six', 'terminaltables', 110 | 'pycocotools' 111 | ], 112 | zip_safe=False) 113 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/merge_augs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import numpy as np 4 | 5 | from mmdet.ops import nms 6 | from ..bbox import bbox_mapping_back 7 | 8 | 9 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): 10 | """Merge augmented proposals (multiscale, flip, etc.) 11 | 12 | Args: 13 | aug_proposals (list[Tensor]): proposals from different testing 14 | schemes, shape (n, 5). Note that they are not rescaled to the 15 | original image size. 16 | img_metas (list[dict]): image info including "shape_scale" and "flip". 17 | rpn_test_cfg (dict): rpn test config. 18 | 19 | Returns: 20 | Tensor: shape (n, 4), proposals corresponding to original image scale. 21 | """ 22 | recovered_proposals = [] 23 | for proposals, img_info in zip(aug_proposals, img_metas): 24 | img_shape = img_info['img_shape'] 25 | scale_factor = img_info['scale_factor'] 26 | flip = img_info['flip'] 27 | _proposals = proposals.clone() 28 | _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, 29 | scale_factor, flip) 30 | recovered_proposals.append(_proposals) 31 | aug_proposals = torch.cat(recovered_proposals, dim=0) 32 | merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr) 33 | scores = merged_proposals[:, 4] 34 | _, order = scores.sort(0, descending=True) 35 | num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) 36 | order = order[:num] 37 | merged_proposals = merged_proposals[order, :] 38 | return merged_proposals 39 | 40 | 41 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): 42 | """Merge augmented detection bboxes and scores. 43 | 44 | Args: 45 | aug_bboxes (list[Tensor]): shape (n, 4*#class) 46 | aug_scores (list[Tensor] or None): shape (n, #class) 47 | img_shapes (list[Tensor]): shape (3, ). 48 | rcnn_test_cfg (dict): rcnn test config. 49 | 50 | Returns: 51 | tuple: (bboxes, scores) 52 | """ 53 | recovered_bboxes = [] 54 | for bboxes, img_info in zip(aug_bboxes, img_metas): 55 | img_shape = img_info[0]['img_shape'] 56 | scale_factor = img_info[0]['scale_factor'] 57 | flip = img_info[0]['flip'] 58 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) 59 | recovered_bboxes.append(bboxes) 60 | bboxes = torch.stack(recovered_bboxes).mean(dim=0) 61 | if aug_scores is None: 62 | return bboxes 63 | else: 64 | scores = torch.stack(aug_scores).mean(dim=0) 65 | return bboxes, scores 66 | 67 | 68 | def merge_aug_scores(aug_scores): 69 | """Merge augmented bbox scores.""" 70 | if isinstance(aug_scores[0], torch.Tensor): 71 | return torch.mean(torch.stack(aug_scores), dim=0) 72 | else: 73 | return np.mean(aug_scores, axis=0) 74 | 75 | 76 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None): 77 | """Merge augmented mask prediction. 78 | 79 | Args: 80 | aug_masks (list[ndarray]): shape (n, #class, h, w) 81 | img_shapes (list[ndarray]): shape (3, ). 82 | rcnn_test_cfg (dict): rcnn test config. 83 | 84 | Returns: 85 | tuple: (bboxes, scores) 86 | """ 87 | recovered_masks = [ 88 | mask if not img_info[0]['flip'] else mask[..., ::-1] 89 | for mask, img_info in zip(aug_masks, img_metas) 90 | ] 91 | if weights is None: 92 | merged_masks = np.mean(recovered_masks, axis=0) 93 | else: 94 | merged_masks = np.average( 95 | np.array(recovered_masks), axis=0, weights=np.array(weights)) 96 | return merged_masks 97 | -------------------------------------------------------------------------------- /mmdet/models/detectors/rpn.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from mmdet.core import tensor2imgs, bbox_mapping 4 | from .base import BaseDetector 5 | from .test_mixins import RPNTestMixin 6 | from .. import builder 7 | from ..registry import DETECTORS 8 | 9 | 10 | @DETECTORS.register_module 11 | class RPN(BaseDetector, RPNTestMixin): 12 | 13 | def __init__(self, 14 | backbone, 15 | neck, 16 | rpn_head, 17 | train_cfg, 18 | test_cfg, 19 | pretrained=None): 20 | super(RPN, self).__init__() 21 | self.backbone = builder.build_backbone(backbone) 22 | self.neck = builder.build_neck(neck) if neck is not None else None 23 | self.rpn_head = builder.build_head(rpn_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(RPN, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | self.neck.init_weights() 33 | self.rpn_head.init_weights() 34 | 35 | def extract_feat(self, img): 36 | x = self.backbone(img) 37 | if self.with_neck: 38 | x = self.neck(x) 39 | return x 40 | 41 | def forward_train(self, 42 | img, 43 | img_meta, 44 | gt_bboxes=None, 45 | gt_bboxes_ignore=None): 46 | if self.train_cfg.rpn.get('debug', False): 47 | self.rpn_head.debug_imgs = tensor2imgs(img) 48 | 49 | x = self.extract_feat(img) 50 | rpn_outs = self.rpn_head(x) 51 | 52 | rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn) 53 | losses = self.rpn_head.loss( 54 | *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 55 | return losses 56 | 57 | def simple_test(self, img, img_meta, rescale=False): 58 | x = self.extract_feat(img) 59 | proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn) 60 | if rescale: 61 | for proposals, meta in zip(proposal_list, img_meta): 62 | proposals[:, :4] /= meta['scale_factor'] 63 | # TODO: remove this restriction 64 | return proposal_list[0].cpu().numpy() 65 | 66 | def aug_test(self, imgs, img_metas, rescale=False): 67 | proposal_list = self.aug_test_rpn( 68 | self.extract_feats(imgs), img_metas, self.test_cfg.rpn) 69 | if not rescale: 70 | for proposals, img_meta in zip(proposal_list, img_metas[0]): 71 | img_shape = img_meta['img_shape'] 72 | scale_factor = img_meta['scale_factor'] 73 | flip = img_meta['flip'] 74 | proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, 75 | scale_factor, flip) 76 | # TODO: remove this restriction 77 | return proposal_list[0].cpu().numpy() 78 | 79 | def show_result(self, data, result, img_norm_cfg, dataset=None, top_k=20): 80 | """Show RPN proposals on the image. 81 | 82 | Although we assume batch size is 1, this method supports arbitrary 83 | batch size. 84 | """ 85 | img_tensor = data['img'][0] 86 | img_metas = data['img_meta'][0].data[0] 87 | imgs = tensor2imgs(img_tensor, **img_norm_cfg) 88 | assert len(imgs) == len(img_metas) 89 | for img, img_meta in zip(imgs, img_metas): 90 | h, w, _ = img_meta['img_shape'] 91 | img_show = img[:h, :w, :] 92 | mmcv.imshow_bboxes(img_show, result, top_k=top_k) 93 | -------------------------------------------------------------------------------- /configs/retinanet_r101_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='modelzoo://resnet101', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=101, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | start_level=1, 17 | add_extra_convs=True, 18 | num_outs=5), 19 | bbox_head=dict( 20 | type='RetinaHead', 21 | num_classes=81, 22 | in_channels=256, 23 | stacked_convs=4, 24 | feat_channels=256, 25 | octave_base_scale=4, 26 | scales_per_octave=3, 27 | anchor_ratios=[0.5, 1.0, 2.0], 28 | anchor_strides=[8, 16, 32, 64, 128], 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[1.0, 1.0, 1.0, 1.0])) 31 | # training and testing settings 32 | train_cfg = dict( 33 | assigner=dict( 34 | type='MaxIoUAssigner', 35 | pos_iou_thr=0.5, 36 | neg_iou_thr=0.4, 37 | min_pos_iou=0, 38 | ignore_iof_thr=-1), 39 | smoothl1_beta=0.11, 40 | gamma=2.0, 41 | alpha=0.25, 42 | allowed_border=-1, 43 | pos_weight=-1, 44 | debug=False) 45 | test_cfg = dict( 46 | nms_pre=1000, 47 | min_bbox_size=0, 48 | score_thr=0.05, 49 | nms=dict(type='nms', iou_thr=0.5), 50 | max_per_img=100) 51 | # dataset settings 52 | dataset_type = 'CocoDataset' 53 | data_root = 'data/coco/' 54 | img_norm_cfg = dict( 55 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 56 | data = dict( 57 | imgs_per_gpu=2, 58 | workers_per_gpu=2, 59 | train=dict( 60 | type=dataset_type, 61 | ann_file=data_root + 'annotations/instances_train2017.json', 62 | img_prefix=data_root + 'train2017/', 63 | img_scale=(1333, 800), 64 | img_norm_cfg=img_norm_cfg, 65 | size_divisor=32, 66 | flip_ratio=0.5, 67 | with_mask=False, 68 | with_crowd=False, 69 | with_label=True), 70 | val=dict( 71 | type=dataset_type, 72 | ann_file=data_root + 'annotations/instances_val2017.json', 73 | img_prefix=data_root + 'val2017/', 74 | img_scale=(1333, 800), 75 | img_norm_cfg=img_norm_cfg, 76 | size_divisor=32, 77 | flip_ratio=0, 78 | with_mask=False, 79 | with_crowd=False, 80 | with_label=True), 81 | test=dict( 82 | type=dataset_type, 83 | ann_file=data_root + 'annotations/instances_val2017.json', 84 | img_prefix=data_root + 'val2017/', 85 | img_scale=(1333, 800), 86 | img_norm_cfg=img_norm_cfg, 87 | size_divisor=32, 88 | flip_ratio=0, 89 | with_mask=False, 90 | with_crowd=False, 91 | with_label=False, 92 | test_mode=True)) 93 | # optimizer 94 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 95 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 96 | # learning policy 97 | lr_config = dict( 98 | policy='step', 99 | warmup='linear', 100 | warmup_iters=500, 101 | warmup_ratio=1.0 / 3, 102 | step=[8, 11]) 103 | checkpoint_config = dict(interval=1) 104 | # yapf:disable 105 | log_config = dict( 106 | interval=50, 107 | hooks=[ 108 | dict(type='TextLoggerHook'), 109 | # dict(type='TensorboardLoggerHook') 110 | ]) 111 | # yapf:enable 112 | # runtime settings 113 | total_epochs = 12 114 | device_ids = range(8) 115 | dist_params = dict(backend='nccl') 116 | log_level = 'INFO' 117 | work_dir = './work_dirs/retinanet_r101_fpn_1x' 118 | load_from = None 119 | resume_from = None 120 | workflow = [('train', 1)] 121 | -------------------------------------------------------------------------------- /configs/retinanet_r50_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='modelzoo://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | start_level=1, 17 | add_extra_convs=True, 18 | num_outs=5), 19 | bbox_head=dict( 20 | type='RetinaHead', 21 | num_classes=81, 22 | in_channels=256, 23 | stacked_convs=4, 24 | feat_channels=256, 25 | octave_base_scale=4, 26 | scales_per_octave=3, 27 | anchor_ratios=[0.5, 1.0, 2.0], 28 | anchor_strides=[8, 16, 32, 64, 128], 29 | target_means=[.0, .0, .0, .0], 30 | target_stds=[1.0, 1.0, 1.0, 1.0])) 31 | # training and testing settings 32 | train_cfg = dict( 33 | assigner=dict( 34 | type='MaxIoUAssigner', 35 | pos_iou_thr=0.5, 36 | neg_iou_thr=0.4, 37 | min_pos_iou=0, 38 | ignore_iof_thr=-1), 39 | smoothl1_beta=0.11, 40 | gamma=2.0, 41 | alpha=0.25, 42 | allowed_border=-1, 43 | pos_weight=-1, 44 | debug=False) 45 | test_cfg = dict( 46 | nms_pre=1000, 47 | min_bbox_size=0, 48 | score_thr=0.05, 49 | nms=dict(type='nms', iou_thr=0.5), 50 | max_per_img=100) 51 | # dataset settings 52 | dataset_type = 'CocoDataset' 53 | data_root = '/home/hs/data/COCO/coco2017/' 54 | img_norm_cfg = dict( 55 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 56 | data = dict( 57 | imgs_per_gpu=4, 58 | workers_per_gpu=8, 59 | train=dict( 60 | type=dataset_type, 61 | ann_file=data_root + 'annotations/instances_train2017.json', 62 | img_prefix=data_root + 'train2017/', 63 | img_scale=(1333, 800), 64 | img_norm_cfg=img_norm_cfg, 65 | size_divisor=32, 66 | flip_ratio=0.5, 67 | with_mask=False, 68 | with_crowd=False, 69 | with_label=True), 70 | val=dict( 71 | type=dataset_type, 72 | ann_file=data_root + 'annotations/instances_val2017.json', 73 | img_prefix=data_root + 'val2017/', 74 | img_scale=(1333, 800), 75 | img_norm_cfg=img_norm_cfg, 76 | size_divisor=32, 77 | flip_ratio=0, 78 | with_mask=False, 79 | with_crowd=False, 80 | with_label=True), 81 | test=dict( 82 | type=dataset_type, 83 | ann_file=data_root + 'annotations/instances_val2017.json', 84 | img_prefix=data_root + 'val2017/', 85 | img_scale=(1333, 800), 86 | img_norm_cfg=img_norm_cfg, 87 | size_divisor=32, 88 | flip_ratio=0, 89 | with_mask=False, 90 | with_crowd=False, 91 | with_label=False, 92 | test_mode=True)) 93 | # optimizer 94 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 95 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 96 | # learning policy 97 | lr_config = dict( 98 | policy='step', 99 | warmup='linear', 100 | warmup_iters=500, 101 | warmup_ratio=1.0 / 3, 102 | step=[8, 11]) 103 | checkpoint_config = dict(interval=1) 104 | # yapf:disable 105 | log_config = dict( 106 | interval=50, 107 | hooks=[ 108 | dict(type='TextLoggerHook'), 109 | # dict(type='TensorboardLoggerHook') 110 | ]) 111 | # yapf:enable 112 | # runtime settings 113 | total_epochs = 12 114 | device_ids = range(8) 115 | dist_params = dict(backend='nccl') 116 | log_level = 'INFO' 117 | work_dir = './work_dirs/retinanet_r50_fpn_1x' 118 | load_from = None 119 | resume_from = None 120 | workflow = [('train', 1)] 121 | -------------------------------------------------------------------------------- /configs/retinanet_mobileV2_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='/home/hs/hs/237014845/HuaWei/mmdetection-master/weights/mobileV2_retina_coco/mobilev2/mobilenet_v2.pth', 5 | backbone=dict( 6 | type='MobileNetV2', 7 | out_indices=(3, 6, 13, 17), 8 | width_mult=1., 9 | ), 10 | neck=dict( 11 | type='FPN', 12 | in_channels=[56, 32, 96, 320], 13 | out_channels=256, 14 | start_level=1, 15 | add_extra_convs=True, 16 | num_outs=5), 17 | bbox_head=dict( 18 | type='RetinaHead', 19 | num_classes=2, 20 | in_channels=256, 21 | stacked_convs=4, 22 | feat_channels=256, 23 | octave_base_scale=4, 24 | scales_per_octave=3, 25 | anchor_ratios=[0.5, 1.0, 2.0], 26 | anchor_strides=[8, 16, 32, 64, 128], 27 | target_means=[.0, .0, .0, .0], 28 | target_stds=[1.0, 1.0, 1.0, 1.0])) 29 | # training and testing settings 30 | train_cfg = dict( 31 | assigner=dict( 32 | type='MaxIoUAssigner', 33 | pos_iou_thr=0.5, 34 | neg_iou_thr=0.4, 35 | min_pos_iou=0, 36 | ignore_iof_thr=-1), 37 | smoothl1_beta=0.11, 38 | gamma=2.0, 39 | alpha=0.25, 40 | allowed_border=-1, 41 | pos_weight=-1, 42 | debug=False) 43 | test_cfg = dict( 44 | nms_pre=1000, 45 | min_bbox_size=0, 46 | score_thr=0.05, 47 | nms=dict(type='nms', iou_thr=0.5), 48 | max_per_img=100) 49 | # dataset settings 50 | dataset_type = 'CocoDataset' 51 | data_root = '/home/hs/data/COCO/coco2017/' 52 | img_norm_cfg = dict( 53 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 54 | data = dict( 55 | imgs_per_gpu=16, 56 | workers_per_gpu=8, 57 | train=dict( 58 | type=dataset_type, 59 | ann_file=data_root + 'annotations/instances_train2017.json', 60 | img_prefix=data_root + 'train2017/', 61 | img_scale=(640, 640), 62 | img_norm_cfg=img_norm_cfg, 63 | size_divisor=32, 64 | flip_ratio=0.5, 65 | with_mask=False, 66 | with_crowd=False, 67 | with_label=True), 68 | val=dict( 69 | type=dataset_type, 70 | ann_file=data_root + 'annotations/instances_val2017.json', 71 | img_prefix=data_root + 'val2017/', 72 | img_scale=(640, 640), 73 | img_norm_cfg=img_norm_cfg, 74 | size_divisor=32, 75 | flip_ratio=0, 76 | with_mask=False, 77 | with_crowd=False, 78 | with_label=True), 79 | test=dict( 80 | type=dataset_type, 81 | ann_file=data_root + 'annotations/instances_val2017.json', 82 | img_prefix=data_root + 'val2017/', 83 | img_scale=(640, 640), 84 | img_norm_cfg=img_norm_cfg, 85 | size_divisor=32, 86 | flip_ratio=0, 87 | with_mask=False, 88 | with_crowd=False, 89 | with_label=False, 90 | test_mode=True)) 91 | # optimizer 92 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 93 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 94 | # learning policy 95 | lr_config = dict( 96 | policy='step', 97 | warmup='linear', 98 | warmup_iters=500, 99 | warmup_ratio=1.0 / 3, 100 | step=[8, 11]) 101 | checkpoint_config = dict(interval=1) 102 | # yapf:disable 103 | log_config = dict( 104 | interval=50, 105 | hooks=[ 106 | dict(type='TextLoggerHook'), 107 | # dict(type='TensorboardLoggerHook') 108 | ]) 109 | # yapf:enable 110 | # runtime settings 111 | total_epochs = 12 112 | device_ids = range(8) 113 | dist_params = dict(backend='nccl') 114 | log_level = 'INFO' 115 | work_dir = './work_dirs/retinanet_r50_fpn_1x' 116 | load_from = None 117 | resume_from = None 118 | workflow = [('train', 1)] 119 | -------------------------------------------------------------------------------- /configs/retinanet_x101_32x4d_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='open-mmlab://resnext101_32x4d', 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=32, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs=True, 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=81, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | octave_base_scale=4, 28 | scales_per_octave=3, 29 | anchor_ratios=[0.5, 1.0, 2.0], 30 | anchor_strides=[8, 16, 32, 64, 128], 31 | target_means=[.0, .0, .0, .0], 32 | target_stds=[1.0, 1.0, 1.0, 1.0])) 33 | # training and testing settings 34 | train_cfg = dict( 35 | assigner=dict( 36 | type='MaxIoUAssigner', 37 | pos_iou_thr=0.5, 38 | neg_iou_thr=0.4, 39 | min_pos_iou=0, 40 | ignore_iof_thr=-1), 41 | smoothl1_beta=0.11, 42 | gamma=2.0, 43 | alpha=0.25, 44 | allowed_border=-1, 45 | pos_weight=-1, 46 | debug=False) 47 | test_cfg = dict( 48 | nms_pre=1000, 49 | min_bbox_size=0, 50 | score_thr=0.05, 51 | nms=dict(type='nms', iou_thr=0.5), 52 | max_per_img=100) 53 | # dataset settings 54 | dataset_type = 'CocoDataset' 55 | data_root = 'data/coco/' 56 | img_norm_cfg = dict( 57 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 58 | data = dict( 59 | imgs_per_gpu=2, 60 | workers_per_gpu=2, 61 | train=dict( 62 | type=dataset_type, 63 | ann_file=data_root + 'annotations/instances_train2017.json', 64 | img_prefix=data_root + 'train2017/', 65 | img_scale=(1333, 800), 66 | img_norm_cfg=img_norm_cfg, 67 | size_divisor=32, 68 | flip_ratio=0.5, 69 | with_mask=False, 70 | with_crowd=False, 71 | with_label=True), 72 | val=dict( 73 | type=dataset_type, 74 | ann_file=data_root + 'annotations/instances_val2017.json', 75 | img_prefix=data_root + 'val2017/', 76 | img_scale=(1333, 800), 77 | img_norm_cfg=img_norm_cfg, 78 | size_divisor=32, 79 | flip_ratio=0, 80 | with_mask=False, 81 | with_crowd=False, 82 | with_label=True), 83 | test=dict( 84 | type=dataset_type, 85 | ann_file=data_root + 'annotations/instances_val2017.json', 86 | img_prefix=data_root + 'val2017/', 87 | img_scale=(1333, 800), 88 | img_norm_cfg=img_norm_cfg, 89 | size_divisor=32, 90 | flip_ratio=0, 91 | with_mask=False, 92 | with_crowd=False, 93 | with_label=False, 94 | test_mode=True)) 95 | # optimizer 96 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 97 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 98 | # learning policy 99 | lr_config = dict( 100 | policy='step', 101 | warmup='linear', 102 | warmup_iters=500, 103 | warmup_ratio=1.0 / 3, 104 | step=[8, 11]) 105 | checkpoint_config = dict(interval=1) 106 | # yapf:disable 107 | log_config = dict( 108 | interval=50, 109 | hooks=[ 110 | dict(type='TextLoggerHook'), 111 | # dict(type='TensorboardLoggerHook') 112 | ]) 113 | # yapf:enable 114 | # runtime settings 115 | total_epochs = 12 116 | device_ids = range(8) 117 | dist_params = dict(backend='nccl') 118 | log_level = 'INFO' 119 | work_dir = './work_dirs/retinanet_r50_fpn_1x' 120 | load_from = None 121 | resume_from = None 122 | workflow = [('train', 1)] 123 | -------------------------------------------------------------------------------- /configs/retinanet_x101_64x4d_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RetinaNet', 4 | pretrained='open-mmlab://resnext101_64x4d', 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=64, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | start_level=1, 19 | add_extra_convs=True, 20 | num_outs=5), 21 | bbox_head=dict( 22 | type='RetinaHead', 23 | num_classes=2, 24 | in_channels=256, 25 | stacked_convs=4, 26 | feat_channels=256, 27 | octave_base_scale=4, 28 | scales_per_octave=3, 29 | anchor_ratios=[0.5, 1.0, 2.0], 30 | anchor_strides=[8, 16, 32, 64, 128], 31 | target_means=[.0, .0, .0, .0], 32 | target_stds=[1.0, 1.0, 1.0, 1.0])) 33 | # training and testing settings 34 | train_cfg = dict( 35 | assigner=dict( 36 | type='MaxIoUAssigner', 37 | pos_iou_thr=0.5, 38 | neg_iou_thr=0.4, 39 | min_pos_iou=0, 40 | ignore_iof_thr=-1), 41 | smoothl1_beta=0.11, 42 | gamma=2.0, 43 | alpha=0.25, 44 | allowed_border=-1, 45 | pos_weight=-1, 46 | debug=False) 47 | test_cfg = dict( 48 | nms_pre=1000, 49 | min_bbox_size=0, 50 | score_thr=0.05, 51 | nms=dict(type='nms', iou_thr=0.5), 52 | max_per_img=100) 53 | # dataset settings 54 | dataset_type = 'CocoDataset' 55 | data_root = 'data/coco/' 56 | img_norm_cfg = dict( 57 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 58 | data = dict( 59 | imgs_per_gpu=2, 60 | workers_per_gpu=2, 61 | train=dict( 62 | type=dataset_type, 63 | ann_file=data_root + 'annotations/instances_train2017.json', 64 | img_prefix=data_root + 'train2017/', 65 | img_scale=(1333, 800), 66 | img_norm_cfg=img_norm_cfg, 67 | size_divisor=32, 68 | flip_ratio=0.5, 69 | with_mask=False, 70 | with_crowd=False, 71 | with_label=True), 72 | val=dict( 73 | type=dataset_type, 74 | ann_file=data_root + 'annotations/instances_val2017.json', 75 | img_prefix=data_root + 'val2017/', 76 | img_scale=(1333, 800), 77 | img_norm_cfg=img_norm_cfg, 78 | size_divisor=32, 79 | flip_ratio=0, 80 | with_mask=False, 81 | with_crowd=False, 82 | with_label=True), 83 | test=dict( 84 | type=dataset_type, 85 | ann_file=data_root + 'annotations/instances_val2017.json', 86 | img_prefix=data_root + 'val2017/', 87 | img_scale=(1333, 800), 88 | img_norm_cfg=img_norm_cfg, 89 | size_divisor=32, 90 | flip_ratio=0, 91 | with_mask=False, 92 | with_crowd=False, 93 | with_label=False, 94 | test_mode=True)) 95 | # optimizer 96 | optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) 97 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 98 | # learning policy 99 | lr_config = dict( 100 | policy='step', 101 | warmup='linear', 102 | warmup_iters=500, 103 | warmup_ratio=1.0 / 3, 104 | step=[8, 11]) 105 | checkpoint_config = dict(interval=1) 106 | # yapf:disable 107 | log_config = dict( 108 | interval=50, 109 | hooks=[ 110 | dict(type='TextLoggerHook'), 111 | # dict(type='TensorboardLoggerHook') 112 | ]) 113 | # yapf:enable 114 | # runtime settings 115 | total_epochs = 12 116 | device_ids = range(8) 117 | dist_params = dict(backend='nccl') 118 | log_level = 'INFO' 119 | work_dir = './work_dirs/retinanet_r50_fpn_1x' 120 | load_from = None 121 | resume_from = None 122 | workflow = [('train', 1)] 123 | -------------------------------------------------------------------------------- /configs/rpn_r50_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='modelzoo://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_scales=[8], 22 | anchor_ratios=[0.5, 1.0, 2.0], 23 | anchor_strides=[4, 8, 16, 32, 64], 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0], 26 | use_sigmoid_cls=True)) 27 | # model training and testing settings 28 | train_cfg = dict( 29 | rpn=dict( 30 | assigner=dict( 31 | type='MaxIoUAssigner', 32 | pos_iou_thr=0.7, 33 | neg_iou_thr=0.3, 34 | min_pos_iou=0.3, 35 | ignore_iof_thr=-1), 36 | sampler=dict( 37 | type='RandomSampler', 38 | num=256, 39 | pos_fraction=0.5, 40 | neg_pos_ub=-1, 41 | add_gt_as_proposals=False), 42 | allowed_border=0, 43 | pos_weight=-1, 44 | smoothl1_beta=1 / 9.0, 45 | debug=False)) 46 | test_cfg = dict( 47 | rpn=dict( 48 | nms_across_levels=False, 49 | nms_pre=2000, 50 | nms_post=2000, 51 | max_num=2000, 52 | nms_thr=0.7, 53 | min_bbox_size=0)) 54 | # dataset settings 55 | dataset_type = 'CocoDataset' 56 | data_root = 'data/coco/' 57 | img_norm_cfg = dict( 58 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 59 | data = dict( 60 | imgs_per_gpu=2, 61 | workers_per_gpu=2, 62 | train=dict( 63 | type=dataset_type, 64 | ann_file=data_root + 'annotations/instances_train2017.json', 65 | img_prefix=data_root + 'train2017/', 66 | img_scale=(1333, 800), 67 | img_norm_cfg=img_norm_cfg, 68 | size_divisor=32, 69 | flip_ratio=0.5, 70 | with_mask=False, 71 | with_crowd=False, 72 | with_label=False), 73 | val=dict( 74 | type=dataset_type, 75 | ann_file=data_root + 'annotations/instances_val2017.json', 76 | img_prefix=data_root + 'val2017/', 77 | img_scale=(1333, 800), 78 | img_norm_cfg=img_norm_cfg, 79 | size_divisor=32, 80 | flip_ratio=0, 81 | with_mask=False, 82 | with_crowd=False, 83 | with_label=False), 84 | test=dict( 85 | type=dataset_type, 86 | ann_file=data_root + 'annotations/instances_val2017.json', 87 | img_prefix=data_root + 'val2017/', 88 | img_scale=(1333, 800), 89 | img_norm_cfg=img_norm_cfg, 90 | size_divisor=32, 91 | flip_ratio=0, 92 | with_mask=False, 93 | with_label=False, 94 | test_mode=True)) 95 | # optimizer 96 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 97 | # runner configs 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 99 | lr_config = dict( 100 | policy='step', 101 | warmup='linear', 102 | warmup_iters=500, 103 | warmup_ratio=1.0 / 3, 104 | step=[8, 11]) 105 | checkpoint_config = dict(interval=1) 106 | # yapf:disable 107 | log_config = dict( 108 | interval=50, 109 | hooks=[ 110 | dict(type='TextLoggerHook'), 111 | # dict(type='TensorboardLoggerHook') 112 | ]) 113 | # yapf:enable 114 | # runtime settings 115 | total_epochs = 12 116 | dist_params = dict(backend='nccl') 117 | log_level = 'INFO' 118 | work_dir = './work_dirs/rpn_r50_fpn_1x' 119 | load_from = None 120 | resume_from = None 121 | workflow = [('train', 1)] 122 | -------------------------------------------------------------------------------- /configs/rpn_r101_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='modelzoo://resnet101', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=101, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_scales=[8], 22 | anchor_ratios=[0.5, 1.0, 2.0], 23 | anchor_strides=[4, 8, 16, 32, 64], 24 | target_means=[.0, .0, .0, .0], 25 | target_stds=[1.0, 1.0, 1.0, 1.0], 26 | use_sigmoid_cls=True)) 27 | # model training and testing settings 28 | train_cfg = dict( 29 | rpn=dict( 30 | assigner=dict( 31 | type='MaxIoUAssigner', 32 | pos_iou_thr=0.7, 33 | neg_iou_thr=0.3, 34 | min_pos_iou=0.3, 35 | ignore_iof_thr=-1), 36 | sampler=dict( 37 | type='RandomSampler', 38 | num=256, 39 | pos_fraction=0.5, 40 | neg_pos_ub=-1, 41 | add_gt_as_proposals=False), 42 | allowed_border=0, 43 | pos_weight=-1, 44 | smoothl1_beta=1 / 9.0, 45 | debug=False)) 46 | test_cfg = dict( 47 | rpn=dict( 48 | nms_across_levels=False, 49 | nms_pre=2000, 50 | nms_post=2000, 51 | max_num=2000, 52 | nms_thr=0.7, 53 | min_bbox_size=0)) 54 | # dataset settings 55 | dataset_type = 'CocoDataset' 56 | data_root = 'data/coco/' 57 | img_norm_cfg = dict( 58 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 59 | data = dict( 60 | imgs_per_gpu=2, 61 | workers_per_gpu=2, 62 | train=dict( 63 | type=dataset_type, 64 | ann_file=data_root + 'annotations/instances_train2017.json', 65 | img_prefix=data_root + 'train2017/', 66 | img_scale=(1333, 800), 67 | img_norm_cfg=img_norm_cfg, 68 | size_divisor=32, 69 | flip_ratio=0.5, 70 | with_mask=False, 71 | with_crowd=False, 72 | with_label=False), 73 | val=dict( 74 | type=dataset_type, 75 | ann_file=data_root + 'annotations/instances_val2017.json', 76 | img_prefix=data_root + 'val2017/', 77 | img_scale=(1333, 800), 78 | img_norm_cfg=img_norm_cfg, 79 | size_divisor=32, 80 | flip_ratio=0, 81 | with_mask=False, 82 | with_crowd=False, 83 | with_label=False), 84 | test=dict( 85 | type=dataset_type, 86 | ann_file=data_root + 'annotations/instances_val2017.json', 87 | img_prefix=data_root + 'val2017/', 88 | img_scale=(1333, 800), 89 | img_norm_cfg=img_norm_cfg, 90 | size_divisor=32, 91 | flip_ratio=0, 92 | with_mask=False, 93 | with_label=False, 94 | test_mode=True)) 95 | # optimizer 96 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 97 | # runner configs 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 99 | lr_config = dict( 100 | policy='step', 101 | warmup='linear', 102 | warmup_iters=500, 103 | warmup_ratio=1.0 / 3, 104 | step=[8, 11]) 105 | checkpoint_config = dict(interval=1) 106 | # yapf:disable 107 | log_config = dict( 108 | interval=50, 109 | hooks=[ 110 | dict(type='TextLoggerHook'), 111 | # dict(type='TensorboardLoggerHook') 112 | ]) 113 | # yapf:enable 114 | # runtime settings 115 | total_epochs = 12 116 | dist_params = dict(backend='nccl') 117 | log_level = 'INFO' 118 | work_dir = './work_dirs/rpn_r101_fpn_1x' 119 | load_from = None 120 | resume_from = None 121 | workflow = [('train', 1)] 122 | -------------------------------------------------------------------------------- /configs/rpn_x101_32x4d_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://resnext101_32x4d', 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=32, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_scales=[8], 24 | anchor_ratios=[0.5, 1.0, 2.0], 25 | anchor_strides=[4, 8, 16, 32, 64], 26 | target_means=[.0, .0, .0, .0], 27 | target_stds=[1.0, 1.0, 1.0, 1.0], 28 | use_sigmoid_cls=True)) 29 | # model training and testing settings 30 | train_cfg = dict( 31 | rpn=dict( 32 | assigner=dict( 33 | type='MaxIoUAssigner', 34 | pos_iou_thr=0.7, 35 | neg_iou_thr=0.3, 36 | min_pos_iou=0.3, 37 | ignore_iof_thr=-1), 38 | sampler=dict( 39 | type='RandomSampler', 40 | num=256, 41 | pos_fraction=0.5, 42 | neg_pos_ub=-1, 43 | add_gt_as_proposals=False), 44 | allowed_border=0, 45 | pos_weight=-1, 46 | smoothl1_beta=1 / 9.0, 47 | debug=False)) 48 | test_cfg = dict( 49 | rpn=dict( 50 | nms_across_levels=False, 51 | nms_pre=2000, 52 | nms_post=2000, 53 | max_num=2000, 54 | nms_thr=0.7, 55 | min_bbox_size=0)) 56 | # dataset settings 57 | dataset_type = 'CocoDataset' 58 | data_root = 'data/coco/' 59 | img_norm_cfg = dict( 60 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 61 | data = dict( 62 | imgs_per_gpu=2, 63 | workers_per_gpu=2, 64 | train=dict( 65 | type=dataset_type, 66 | ann_file=data_root + 'annotations/instances_train2017.json', 67 | img_prefix=data_root + 'train2017/', 68 | img_scale=(1333, 800), 69 | img_norm_cfg=img_norm_cfg, 70 | size_divisor=32, 71 | flip_ratio=0.5, 72 | with_mask=False, 73 | with_crowd=False, 74 | with_label=False), 75 | val=dict( 76 | type=dataset_type, 77 | ann_file=data_root + 'annotations/instances_val2017.json', 78 | img_prefix=data_root + 'val2017/', 79 | img_scale=(1333, 800), 80 | img_norm_cfg=img_norm_cfg, 81 | size_divisor=32, 82 | flip_ratio=0, 83 | with_mask=False, 84 | with_crowd=False, 85 | with_label=False), 86 | test=dict( 87 | type=dataset_type, 88 | ann_file=data_root + 'annotations/instances_val2017.json', 89 | img_prefix=data_root + 'val2017/', 90 | img_scale=(1333, 800), 91 | img_norm_cfg=img_norm_cfg, 92 | size_divisor=32, 93 | flip_ratio=0, 94 | with_mask=False, 95 | with_label=False, 96 | test_mode=True)) 97 | # optimizer 98 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 99 | # runner configs 100 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 101 | lr_config = dict( 102 | policy='step', 103 | warmup='linear', 104 | warmup_iters=500, 105 | warmup_ratio=1.0 / 3, 106 | step=[8, 11]) 107 | checkpoint_config = dict(interval=1) 108 | # yapf:disable 109 | log_config = dict( 110 | interval=50, 111 | hooks=[ 112 | dict(type='TextLoggerHook'), 113 | # dict(type='TensorboardLoggerHook') 114 | ]) 115 | # yapf:enable 116 | # runtime settings 117 | total_epochs = 12 118 | dist_params = dict(backend='nccl') 119 | log_level = 'INFO' 120 | work_dir = './work_dirs/rpn_r101_fpn_1x' 121 | load_from = None 122 | resume_from = None 123 | workflow = [('train', 1)] 124 | -------------------------------------------------------------------------------- /configs/rpn_x101_64x4d_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='RPN', 4 | pretrained='open-mmlab://resnext101_64x4d', 5 | backbone=dict( 6 | type='ResNeXt', 7 | depth=101, 8 | groups=64, 9 | base_width=4, 10 | num_stages=4, 11 | out_indices=(0, 1, 2, 3), 12 | frozen_stages=1, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_scales=[8], 24 | anchor_ratios=[0.5, 1.0, 2.0], 25 | anchor_strides=[4, 8, 16, 32, 64], 26 | target_means=[.0, .0, .0, .0], 27 | target_stds=[1.0, 1.0, 1.0, 1.0], 28 | use_sigmoid_cls=True)) 29 | # model training and testing settings 30 | train_cfg = dict( 31 | rpn=dict( 32 | assigner=dict( 33 | type='MaxIoUAssigner', 34 | pos_iou_thr=0.7, 35 | neg_iou_thr=0.3, 36 | min_pos_iou=0.3, 37 | ignore_iof_thr=-1), 38 | sampler=dict( 39 | type='RandomSampler', 40 | num=256, 41 | pos_fraction=0.5, 42 | neg_pos_ub=-1, 43 | add_gt_as_proposals=False), 44 | allowed_border=0, 45 | pos_weight=-1, 46 | smoothl1_beta=1 / 9.0, 47 | debug=False)) 48 | test_cfg = dict( 49 | rpn=dict( 50 | nms_across_levels=False, 51 | nms_pre=2000, 52 | nms_post=2000, 53 | max_num=2000, 54 | nms_thr=0.7, 55 | min_bbox_size=0)) 56 | # dataset settings 57 | dataset_type = 'CocoDataset' 58 | data_root = 'data/coco/' 59 | img_norm_cfg = dict( 60 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 61 | data = dict( 62 | imgs_per_gpu=2, 63 | workers_per_gpu=2, 64 | train=dict( 65 | type=dataset_type, 66 | ann_file=data_root + 'annotations/instances_train2017.json', 67 | img_prefix=data_root + 'train2017/', 68 | img_scale=(1333, 800), 69 | img_norm_cfg=img_norm_cfg, 70 | size_divisor=32, 71 | flip_ratio=0.5, 72 | with_mask=False, 73 | with_crowd=False, 74 | with_label=False), 75 | val=dict( 76 | type=dataset_type, 77 | ann_file=data_root + 'annotations/instances_val2017.json', 78 | img_prefix=data_root + 'val2017/', 79 | img_scale=(1333, 800), 80 | img_norm_cfg=img_norm_cfg, 81 | size_divisor=32, 82 | flip_ratio=0, 83 | with_mask=False, 84 | with_crowd=False, 85 | with_label=False), 86 | test=dict( 87 | type=dataset_type, 88 | ann_file=data_root + 'annotations/instances_val2017.json', 89 | img_prefix=data_root + 'val2017/', 90 | img_scale=(1333, 800), 91 | img_norm_cfg=img_norm_cfg, 92 | size_divisor=32, 93 | flip_ratio=0, 94 | with_mask=False, 95 | with_label=False, 96 | test_mode=True)) 97 | # optimizer 98 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 99 | # runner configs 100 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 101 | lr_config = dict( 102 | policy='step', 103 | warmup='linear', 104 | warmup_iters=500, 105 | warmup_ratio=1.0 / 3, 106 | step=[8, 11]) 107 | checkpoint_config = dict(interval=1) 108 | # yapf:disable 109 | log_config = dict( 110 | interval=50, 111 | hooks=[ 112 | dict(type='TextLoggerHook'), 113 | # dict(type='TensorboardLoggerHook') 114 | ]) 115 | # yapf:enable 116 | # runtime settings 117 | total_epochs = 12 118 | dist_params = dict(backend='nccl') 119 | log_level = 'INFO' 120 | work_dir = './work_dirs/rpn_r101_fpn_1x' 121 | load_from = None 122 | resume_from = None 123 | workflow = [('train', 1)] 124 | -------------------------------------------------------------------------------- /TECHNICAL_DETAILS.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | In this section, we will introduce the main units of training a detector: 4 | data loading, model and iteration pipeline. 5 | 6 | ## Data loading 7 | 8 | Following typical conventions, we use `Dataset` and `DataLoader` for data loading 9 | with multiple workers. `Dataset` returns a dict of data items corresponding 10 | the arguments of models' forward method. 11 | Since the data in object detection may not be the same size (image size, gt bbox size, etc.), 12 | we introduce a new `DataContainer` type in `mmcv` to help collect and distribute 13 | data of different size. 14 | See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details. 15 | 16 | ## Model 17 | 18 | In mmdetection, model components are basically categorized as 4 types. 19 | 20 | - backbone: usually a FCN network to extract feature maps, e.g., ResNet. 21 | - neck: the part between backbones and heads, e.g., FPN, ASPP. 22 | - head: the part for specific tasks, e.g., bbox prediction and mask prediction. 23 | - roi extractor: the part for extracting features from feature maps, e.g., RoI Align. 24 | 25 | We also write implement some general detection pipelines with the above components, 26 | such as `SingleStageDetector` and `TwoStageDetector`. 27 | 28 | ### Build a model with basic components 29 | 30 | Following some basic pipelines (e.g., two-stage detectors), the model structure 31 | can be customized through config files with no pains. 32 | 33 | If we want to implement some new components, e.g, the path aggregation 34 | FPN structure in [Path Aggregation Network for Instance Segmentation](https://arxiv.org/abs/1803.01534), there are two things to do. 35 | 36 | 1. create a new file in `mmdet/models/necks/pafpn.py`. 37 | 38 | ```python 39 | class PAFPN(nn.Module): 40 | 41 | def __init__(self, 42 | in_channels, 43 | out_channels, 44 | num_outs, 45 | start_level=0, 46 | end_level=-1, 47 | add_extra_convs=False): 48 | pass 49 | 50 | def forward(self, inputs): 51 | # implementation is ignored 52 | pass 53 | ``` 54 | 55 | 2. modify the config file from 56 | 57 | ```python 58 | neck=dict( 59 | type='FPN', 60 | in_channels=[256, 512, 1024, 2048], 61 | out_channels=256, 62 | num_outs=5) 63 | ``` 64 | 65 | to 66 | 67 | ```python 68 | neck=dict( 69 | type='PAFPN', 70 | in_channels=[256, 512, 1024, 2048], 71 | out_channels=256, 72 | num_outs=5) 73 | ``` 74 | 75 | We will release more components (backbones, necks, heads) for research purpose. 76 | 77 | ### Write a new model 78 | 79 | To write a new detection pipeline, you need to inherit from `BaseDetector`, 80 | which defines the following abstract methods. 81 | 82 | - `extract_feat()`: given an image batch of shape (n, c, h, w), extract the feature map(s). 83 | - `forward_train()`: forward method of the training mode 84 | - `simple_test()`: single scale testing without augmentation 85 | - `aug_test()`: testing with augmentation (multi-scale, flip, etc.) 86 | 87 | [TwoStageDetector](https://github.com/hellock/mmdetection/blob/master/mmdet/models/detectors/two_stage.py) 88 | is a good example which shows how to do that. 89 | 90 | ## Iteration pipeline 91 | 92 | We adopt distributed training for both single machine and multiple machines. 93 | Supposing that the server has 8 GPUs, 8 processes will be started and each process runs on a single GPU. 94 | 95 | Each process keeps an isolated model, data loader, and optimizer. 96 | Model parameters are only synchronized once at the begining. 97 | After a forward and backward pass, gradients will be allreduced among all GPUs, 98 | and the optimizer will update model parameters. 99 | Since the gradients are allreduced, the model parameter stays the same for all processes after the iteration. 100 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/src/deform_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c 3 | 4 | // based on 5 | // author: Charles Shang 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 7 | 8 | #include 9 | 10 | #include 11 | #include 12 | 13 | void DeformablePSROIPoolForward( 14 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 15 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 16 | const int height, const int width, const int num_bbox, 17 | const int channels_trans, const int no_trans, const float spatial_scale, 18 | const int output_dim, const int group_size, const int pooled_size, 19 | const int part_size, const int sample_per_part, const float trans_std); 20 | 21 | void DeformablePSROIPoolBackwardAcc( 22 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 23 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 24 | at::Tensor trans_grad, const int batch, const int channels, 25 | const int height, const int width, const int num_bbox, 26 | const int channels_trans, const int no_trans, const float spatial_scale, 27 | const int output_dim, const int group_size, const int pooled_size, 28 | const int part_size, const int sample_per_part, const float trans_std); 29 | 30 | void deform_psroi_pooling_cuda_forward( 31 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 32 | at::Tensor top_count, const int no_trans, const float spatial_scale, 33 | const int output_dim, const int group_size, const int pooled_size, 34 | const int part_size, const int sample_per_part, const float trans_std) { 35 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 36 | 37 | const int batch = input.size(0); 38 | const int channels = input.size(1); 39 | const int height = input.size(2); 40 | const int width = input.size(3); 41 | const int channels_trans = no_trans ? 2 : trans.size(1); 42 | 43 | const int num_bbox = bbox.size(0); 44 | if (num_bbox != out.size(0)) 45 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 46 | out.size(0), num_bbox); 47 | 48 | DeformablePSROIPoolForward( 49 | input, bbox, trans, out, top_count, batch, channels, height, width, 50 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 51 | pooled_size, part_size, sample_per_part, trans_std); 52 | } 53 | 54 | void deform_psroi_pooling_cuda_backward( 55 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 56 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 57 | const int no_trans, const float spatial_scale, const int output_dim, 58 | const int group_size, const int pooled_size, const int part_size, 59 | const int sample_per_part, const float trans_std) { 60 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 61 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 62 | 63 | const int batch = input.size(0); 64 | const int channels = input.size(1); 65 | const int height = input.size(2); 66 | const int width = input.size(3); 67 | const int channels_trans = no_trans ? 2 : trans.size(1); 68 | 69 | const int num_bbox = bbox.size(0); 70 | if (num_bbox != out_grad.size(0)) 71 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 72 | out_grad.size(0), num_bbox); 73 | 74 | DeformablePSROIPoolBackwardAcc( 75 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 76 | channels, height, width, num_bbox, channels_trans, no_trans, 77 | spatial_scale, output_dim, group_size, pooled_size, part_size, 78 | sample_per_part, trans_std); 79 | } 80 | 81 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 82 | m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward, 83 | "deform psroi pooling forward(CUDA)"); 84 | m.def("deform_psroi_pooling_cuda_backward", 85 | &deform_psroi_pooling_cuda_backward, 86 | "deform psroi pooling backward(CUDA)"); 87 | } -------------------------------------------------------------------------------- /ssd_debug/test_ssd流程.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import mmcv 4 | 5 | class Registry(object): 6 | 7 | def __init__(self, name): 8 | self._name = name 9 | self._module_dict = dict() 10 | 11 | @property 12 | def name(self): 13 | return self._name 14 | 15 | @property 16 | def module_dict(self): 17 | return self._module_dict 18 | 19 | def _register_module(self, module_class): 20 | """Register a module. 21 | 22 | Args: 23 | module (:obj:`nn.Module`): Module to be registered. 24 | """ 25 | if not issubclass(module_class, nn.Module): 26 | raise TypeError( 27 | 'module must be a child of nn.Module, but got {}'.format( 28 | module_class)) 29 | module_name = module_class.__name__ 30 | if module_name in self._module_dict: 31 | raise KeyError('{} is already registered in {}'.format( 32 | module_name, self.name)) 33 | self._module_dict[module_name] = module_class 34 | 35 | def register_module(self, cls): 36 | self._register_module(cls) 37 | return cls 38 | 39 | 40 | BACKBONES = Registry('backbone') 41 | NECKS = Registry('neck') 42 | ROI_EXTRACTORS = Registry('roi_extractor') 43 | HEADS = Registry('head') 44 | DETECTORS = Registry('detector') 45 | 46 | 47 | # 和 obj_from_dict 一样 48 | def build_module(cfg, registry, default_args): 49 | assert isinstance(cfg, dict) and 'type' in cfg 50 | assert isinstance(default_args, dict) or default_args is None 51 | args = cfg.copy() 52 | obj_type = args.pop('type') 53 | if mmcv.is_str(obj_type): 54 | if obj_type not in registry.module_dict: 55 | raise KeyError('{} is not in the {} registry'.format( 56 | obj_type, registry.name)) 57 | obj_type = registry.module_dict[obj_type] 58 | elif not isinstance(obj_type, type): 59 | raise TypeError('type must be a str or valid type, but got {}'.format( 60 | type(obj_type))) 61 | if default_args is not None: 62 | for name, value in default_args.items(): 63 | args.setdefault(name, value) 64 | return obj_type(**args) 65 | 66 | 67 | def build(cfg, registry, default_args=None): 68 | if isinstance(cfg, list): 69 | modules = [_build_module(cfg_, registry, default_args) for cfg_ in cfg] 70 | return nn.Sequential(*modules) 71 | else: 72 | return build_module(cfg, registry, default_args) 73 | 74 | cfg = dict( 75 | type='SingleStageDetector', 76 | pretrained='open-mmlab://vgg16_caffe', 77 | backbone=dict( 78 | type='SSDVGG', 79 | input_size=300, 80 | depth=16, 81 | with_last_pool=False, 82 | ceil_mode=True, 83 | out_indices=(3, 4), 84 | out_feature_indices=(22, 34), 85 | l2_norm_scale=20), 86 | neck=None, 87 | bbox_head=dict( 88 | type='SSDHead', 89 | input_size=300, 90 | in_channels=(512, 1024, 512, 256, 256, 256), 91 | num_classes=2, 92 | anchor_strides=(8, 16, 32, 64, 100, 300), 93 | basesize_ratio_range=(0.2, 0.9), 94 | anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), 95 | target_means=(.0, .0, .0, .0), 96 | target_stds=(0.1, 0.1, 0.2, 0.2))) 97 | 98 | default_args = dict( 99 | train_cfg=dict( 100 | assigner=dict( 101 | type='MaxIoUAssigner', 102 | pos_iou_thr=0.5, 103 | neg_iou_thr=0.5, 104 | min_pos_iou=0., 105 | ignore_iof_thr=-1, 106 | gt_max_assign_all=False), 107 | smoothl1_beta=1., 108 | allowed_border=-1, 109 | pos_weight=-1, 110 | neg_pos_ratio=3, 111 | debug=False), 112 | test_cfg=dict( 113 | nms=dict(type='nms', iou_thr=0.45), 114 | min_bbox_size=0, 115 | score_thr=0.02, 116 | max_per_img=200)) 117 | 118 | args = cfg.copy() 119 | obj_type = args.pop('type') 120 | print(obj_type) 121 | 122 | if default_args is not None: 123 | for name, value in default_args.items(): 124 | args.setdefault(name, value) 125 | print(args) -------------------------------------------------------------------------------- /configs/fast_rcnn_r50_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='modelzoo://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | bbox_roi_extractor=dict( 18 | type='SingleRoIExtractor', 19 | roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), 20 | out_channels=256, 21 | featmap_strides=[4, 8, 16, 32]), 22 | bbox_head=dict( 23 | type='SharedFCBBoxHead', 24 | num_fcs=2, 25 | in_channels=256, 26 | fc_out_channels=1024, 27 | roi_feat_size=7, 28 | num_classes=81, 29 | target_means=[0., 0., 0., 0.], 30 | target_stds=[0.1, 0.1, 0.2, 0.2], 31 | reg_class_agnostic=False)) 32 | # model training and testing settings 33 | train_cfg = dict( 34 | rcnn=dict( 35 | assigner=dict( 36 | type='MaxIoUAssigner', 37 | pos_iou_thr=0.5, 38 | neg_iou_thr=0.5, 39 | min_pos_iou=0.5, 40 | ignore_iof_thr=-1), 41 | sampler=dict( 42 | type='RandomSampler', 43 | num=512, 44 | pos_fraction=0.25, 45 | neg_pos_ub=-1, 46 | add_gt_as_proposals=True), 47 | pos_weight=-1, 48 | debug=False)) 49 | test_cfg = dict( 50 | rcnn=dict( 51 | score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) 52 | # dataset settings 53 | dataset_type = 'CocoDataset' 54 | data_root = 'data/coco/' 55 | img_norm_cfg = dict( 56 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 57 | data = dict( 58 | imgs_per_gpu=2, 59 | workers_per_gpu=2, 60 | train=dict( 61 | type=dataset_type, 62 | ann_file=data_root + 'annotations/instances_train2017.json', 63 | img_prefix=data_root + 'train2017/', 64 | img_scale=(1333, 800), 65 | img_norm_cfg=img_norm_cfg, 66 | size_divisor=32, 67 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', 68 | flip_ratio=0.5, 69 | with_mask=False, 70 | with_crowd=True, 71 | with_label=True), 72 | val=dict( 73 | type=dataset_type, 74 | ann_file=data_root + 'annotations/instances_val2017.json', 75 | img_prefix=data_root + 'val2017/', 76 | img_scale=(1333, 800), 77 | img_norm_cfg=img_norm_cfg, 78 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', 79 | size_divisor=32, 80 | flip_ratio=0, 81 | with_mask=False, 82 | with_crowd=True, 83 | with_label=True), 84 | test=dict( 85 | type=dataset_type, 86 | ann_file=data_root + 'annotations/instances_val2017.json', 87 | img_prefix=data_root + 'val2017/', 88 | img_scale=(1333, 800), 89 | img_norm_cfg=img_norm_cfg, 90 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', 91 | size_divisor=32, 92 | flip_ratio=0, 93 | with_mask=False, 94 | with_label=False, 95 | test_mode=True)) 96 | # optimizer 97 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 99 | # learning policy 100 | lr_config = dict( 101 | policy='step', 102 | warmup='linear', 103 | warmup_iters=500, 104 | warmup_ratio=1.0 / 3, 105 | step=[8, 11]) 106 | checkpoint_config = dict(interval=1) 107 | # yapf:disable 108 | log_config = dict( 109 | interval=50, 110 | hooks=[ 111 | dict(type='TextLoggerHook'), 112 | # dict(type='TensorboardLoggerHook') 113 | ]) 114 | # yapf:enable 115 | # runtime settings 116 | total_epochs = 12 117 | dist_params = dict(backend='nccl') 118 | log_level = 'INFO' 119 | work_dir = './work_dirs/fast_rcnn_r50_fpn_1x' 120 | load_from = None 121 | resume_from = None 122 | workflow = [('train', 1)] 123 | -------------------------------------------------------------------------------- /configs/fast_rcnn_r101_fpn_1x.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='FastRCNN', 4 | pretrained='modelzoo://resnet101', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=101, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | style='pytorch'), 12 | neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | bbox_roi_extractor=dict( 18 | type='SingleRoIExtractor', 19 | roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), 20 | out_channels=256, 21 | featmap_strides=[4, 8, 16, 32]), 22 | bbox_head=dict( 23 | type='SharedFCBBoxHead', 24 | num_fcs=2, 25 | in_channels=256, 26 | fc_out_channels=1024, 27 | roi_feat_size=7, 28 | num_classes=81, 29 | target_means=[0., 0., 0., 0.], 30 | target_stds=[0.1, 0.1, 0.2, 0.2], 31 | reg_class_agnostic=False)) 32 | # model training and testing settings 33 | train_cfg = dict( 34 | rcnn=dict( 35 | assigner=dict( 36 | type='MaxIoUAssigner', 37 | pos_iou_thr=0.5, 38 | neg_iou_thr=0.5, 39 | min_pos_iou=0.5, 40 | ignore_iof_thr=-1), 41 | sampler=dict( 42 | type='RandomSampler', 43 | num=512, 44 | pos_fraction=0.25, 45 | neg_pos_ub=-1, 46 | add_gt_as_proposals=True), 47 | pos_weight=-1, 48 | debug=False)) 49 | test_cfg = dict( 50 | rcnn=dict( 51 | score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)) 52 | # dataset settings 53 | dataset_type = 'CocoDataset' 54 | data_root = 'data/coco/' 55 | img_norm_cfg = dict( 56 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 57 | data = dict( 58 | imgs_per_gpu=2, 59 | workers_per_gpu=2, 60 | train=dict( 61 | type=dataset_type, 62 | ann_file=data_root + 'annotations/instances_train2017.json', 63 | img_prefix=data_root + 'train2017/', 64 | img_scale=(1333, 800), 65 | img_norm_cfg=img_norm_cfg, 66 | size_divisor=32, 67 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl', 68 | flip_ratio=0.5, 69 | with_mask=False, 70 | with_crowd=True, 71 | with_label=True), 72 | val=dict( 73 | type=dataset_type, 74 | ann_file=data_root + 'annotations/instances_val2017.json', 75 | img_prefix=data_root + 'val2017/', 76 | img_scale=(1333, 800), 77 | img_norm_cfg=img_norm_cfg, 78 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', 79 | size_divisor=32, 80 | flip_ratio=0, 81 | with_mask=False, 82 | with_crowd=True, 83 | with_label=True), 84 | test=dict( 85 | type=dataset_type, 86 | ann_file=data_root + 'annotations/instances_val2017.json', 87 | img_prefix=data_root + 'val2017/', 88 | img_scale=(1333, 800), 89 | img_norm_cfg=img_norm_cfg, 90 | proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl', 91 | size_divisor=32, 92 | flip_ratio=0, 93 | with_mask=False, 94 | with_label=False, 95 | test_mode=True)) 96 | # optimizer 97 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 98 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 99 | # learning policy 100 | lr_config = dict( 101 | policy='step', 102 | warmup='linear', 103 | warmup_iters=500, 104 | warmup_ratio=1.0 / 3, 105 | step=[8, 11]) 106 | checkpoint_config = dict(interval=1) 107 | # yapf:disable 108 | log_config = dict( 109 | interval=50, 110 | hooks=[ 111 | dict(type='TextLoggerHook'), 112 | # dict(type='TensorboardLoggerHook') 113 | ]) 114 | # yapf:enable 115 | # runtime settings 116 | total_epochs = 12 117 | dist_params = dict(backend='nccl') 118 | log_level = 'INFO' 119 | work_dir = './work_dirs/fast_rcnn_r101_fpn_1x' 120 | load_from = None 121 | resume_from = None 122 | workflow = [('train', 1)] 123 | -------------------------------------------------------------------------------- /mmdet/datasets/transforms.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import torch 4 | 5 | __all__ = ['ImageTransform', 'BboxTransform', 'MaskTransform', 'Numpy2Tensor'] 6 | 7 | 8 | class ImageTransform(object): 9 | """Preprocess an image. 10 | 11 | 1. rescale the image to expected size 12 | 2. normalize the image 13 | 3. flip the image (if needed) 14 | 4. pad the image (if needed) 15 | 5. transpose to (c, h, w) 16 | """ 17 | 18 | def __init__(self, 19 | mean=(0, 0, 0), 20 | std=(1, 1, 1), 21 | to_rgb=True, 22 | size_divisor=None): 23 | self.mean = np.array(mean, dtype=np.float32) 24 | self.std = np.array(std, dtype=np.float32) 25 | self.to_rgb = to_rgb 26 | self.size_divisor = size_divisor 27 | 28 | def __call__(self, img, scale, flip=False, keep_ratio=True): 29 | if keep_ratio: 30 | img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) 31 | else: 32 | img, w_scale, h_scale = mmcv.imresize( 33 | img, scale, return_scale=True) 34 | scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], 35 | dtype=np.float32) 36 | img_shape = img.shape 37 | img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) 38 | if flip: 39 | img = mmcv.imflip(img) 40 | if self.size_divisor is not None: 41 | img = mmcv.impad_to_multiple(img, self.size_divisor) 42 | pad_shape = img.shape 43 | else: 44 | pad_shape = img_shape 45 | img = img.transpose(2, 0, 1) 46 | return img, img_shape, pad_shape, scale_factor 47 | 48 | 49 | def bbox_flip(bboxes, img_shape): 50 | """Flip bboxes horizontally. 51 | 52 | Args: 53 | bboxes(ndarray): shape (..., 4*k) 54 | img_shape(tuple): (height, width) 55 | """ 56 | assert bboxes.shape[-1] % 4 == 0 57 | w = img_shape[1] 58 | flipped = bboxes.copy() 59 | flipped[..., 0::4] = w - bboxes[..., 2::4] - 1 60 | flipped[..., 2::4] = w - bboxes[..., 0::4] - 1 61 | return flipped 62 | 63 | 64 | class BboxTransform(object): 65 | """Preprocess gt bboxes. 66 | 67 | 1. rescale bboxes according to image size 68 | 2. flip bboxes (if needed) 69 | 3. pad the first dimension to `max_num_gts` 70 | """ 71 | 72 | def __init__(self, max_num_gts=None): 73 | self.max_num_gts = max_num_gts 74 | 75 | def __call__(self, bboxes, img_shape, scale_factor, flip=False): 76 | gt_bboxes = bboxes * scale_factor 77 | if flip: 78 | gt_bboxes = bbox_flip(gt_bboxes, img_shape) 79 | gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1]) 80 | gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0]) 81 | if self.max_num_gts is None: 82 | return gt_bboxes 83 | else: 84 | num_gts = gt_bboxes.shape[0] 85 | padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32) 86 | padded_bboxes[:num_gts, :] = gt_bboxes 87 | return padded_bboxes 88 | 89 | 90 | class MaskTransform(object): 91 | """Preprocess masks. 92 | 93 | 1. resize masks to expected size and stack to a single array 94 | 2. flip the masks (if needed) 95 | 3. pad the masks (if needed) 96 | """ 97 | 98 | def __call__(self, masks, pad_shape, scale_factor, flip=False): 99 | masks = [ 100 | mmcv.imrescale(mask, scale_factor, interpolation='nearest') 101 | for mask in masks 102 | ] 103 | if flip: 104 | masks = [mask[:, ::-1] for mask in masks] 105 | padded_masks = [ 106 | mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks 107 | ] 108 | padded_masks = np.stack(padded_masks, axis=0) 109 | return padded_masks 110 | 111 | 112 | class Numpy2Tensor(object): 113 | 114 | def __init__(self): 115 | pass 116 | 117 | def __call__(self, *args): 118 | if len(args) == 1: 119 | return torch.from_numpy(args[0]) 120 | else: 121 | return tuple([torch.from_numpy(np.array(array)) for array in args]) 122 | -------------------------------------------------------------------------------- /mmdet/apis/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from collections import OrderedDict 4 | 5 | import torch 6 | from mmcv.runner import Runner, DistSamplerSeedHook 7 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 8 | 9 | from mmdet.core import (DistOptimizerHook, DistEvalmAPHook, 10 | CocoDistEvalRecallHook, CocoDistEvalmAPHook) 11 | from mmdet.datasets import build_dataloader 12 | from mmdet.models import RPN 13 | from .env import get_root_logger 14 | 15 | 16 | def parse_losses(losses): 17 | log_vars = OrderedDict() 18 | for loss_name, loss_value in losses.items(): 19 | if isinstance(loss_value, torch.Tensor): 20 | log_vars[loss_name] = loss_value.mean() 21 | elif isinstance(loss_value, list): 22 | log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) 23 | else: 24 | raise TypeError( 25 | '{} is not a tensor or list of tensors'.format(loss_name)) 26 | 27 | loss = sum(_value for _key, _value in log_vars.items() if 'loss' in _key) 28 | 29 | log_vars['loss'] = loss 30 | for name in log_vars: 31 | log_vars[name] = log_vars[name].item() 32 | 33 | return loss, log_vars 34 | 35 | 36 | def batch_processor(model, data, train_mode): 37 | losses = model(**data) 38 | loss, log_vars = parse_losses(losses) 39 | 40 | outputs = dict( 41 | loss=loss, log_vars=log_vars, num_samples=len(data['img'].data)) 42 | 43 | return outputs 44 | 45 | 46 | def train_detector(model, 47 | dataset, 48 | cfg, 49 | distributed=False, 50 | validate=False, 51 | logger=None): 52 | if logger is None: 53 | logger = get_root_logger(cfg.log_level) 54 | 55 | # start training 56 | if distributed: 57 | _dist_train(model, dataset, cfg, validate=validate) 58 | else: 59 | _non_dist_train(model, dataset, cfg, validate=validate) 60 | 61 | 62 | def _dist_train(model, dataset, cfg, validate=False): 63 | # prepare data loaders 64 | data_loaders = [ 65 | build_dataloader( 66 | dataset, 67 | cfg.data.imgs_per_gpu, 68 | cfg.data.workers_per_gpu, 69 | dist=True) 70 | ] 71 | # put model on gpus 72 | model = MMDistributedDataParallel(model.cuda()) 73 | # build runner 74 | runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, 75 | cfg.log_level) 76 | # register hooks 77 | optimizer_config = DistOptimizerHook(**cfg.optimizer_config) 78 | runner.register_training_hooks(cfg.lr_config, optimizer_config, 79 | cfg.checkpoint_config, cfg.log_config) 80 | runner.register_hook(DistSamplerSeedHook()) 81 | # register eval hooks 82 | if validate: 83 | if isinstance(model.module, RPN): 84 | # TODO: implement recall hooks for other datasets 85 | runner.register_hook(CocoDistEvalRecallHook(cfg.data.val)) 86 | else: 87 | if cfg.data.val.type == 'CocoDataset': 88 | runner.register_hook(CocoDistEvalmAPHook(cfg.data.val)) 89 | else: 90 | runner.register_hook(DistEvalmAPHook(cfg.data.val)) 91 | 92 | if cfg.resume_from: 93 | runner.resume(cfg.resume_from) 94 | elif cfg.load_from: 95 | runner.load_checkpoint(cfg.load_from) 96 | runner.run(data_loaders, cfg.workflow, cfg.total_epochs) 97 | 98 | 99 | def _non_dist_train(model, dataset, cfg, validate=False): 100 | # prepare data loaders 101 | data_loaders = [ 102 | build_dataloader( 103 | dataset, 104 | cfg.data.imgs_per_gpu, 105 | cfg.data.workers_per_gpu, 106 | cfg.gpus, 107 | dist=False) 108 | ] 109 | # put model on gpus 110 | model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() 111 | # build runner 112 | runner = Runner(model, batch_processor, cfg.optimizer, cfg.work_dir, 113 | cfg.log_level) 114 | runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, 115 | cfg.checkpoint_config, cfg.log_config) 116 | 117 | if cfg.resume_from: 118 | runner.resume(cfg.resume_from) 119 | elif cfg.load_from: 120 | runner.load_checkpoint(cfg.load_from) 121 | runner.run(data_loaders, cfg.workflow, cfg.total_epochs) 122 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/rpn_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from mmcv.cnn import normal_init 5 | 6 | from mmdet.core import delta2bbox 7 | from mmdet.ops import nms 8 | from .anchor_head import AnchorHead 9 | from ..registry import HEADS 10 | 11 | 12 | @HEADS.register_module 13 | class RPNHead(AnchorHead): 14 | 15 | def __init__(self, in_channels, **kwargs): 16 | super(RPNHead, self).__init__(2, in_channels, **kwargs) 17 | 18 | def _init_layers(self): 19 | self.rpn_conv = nn.Conv2d( 20 | self.in_channels, self.feat_channels, 3, padding=1) 21 | self.rpn_cls = nn.Conv2d(self.feat_channels, 22 | self.num_anchors * self.cls_out_channels, 1) 23 | self.rpn_reg = nn.Conv2d(self.feat_channels, self.num_anchors * 4, 1) 24 | 25 | def init_weights(self): 26 | normal_init(self.rpn_conv, std=0.01) 27 | normal_init(self.rpn_cls, std=0.01) 28 | normal_init(self.rpn_reg, std=0.01) 29 | 30 | def forward_single(self, x): 31 | x = self.rpn_conv(x) 32 | x = F.relu(x, inplace=True) 33 | rpn_cls_score = self.rpn_cls(x) 34 | rpn_bbox_pred = self.rpn_reg(x) 35 | return rpn_cls_score, rpn_bbox_pred 36 | 37 | def loss(self, 38 | cls_scores, 39 | bbox_preds, 40 | gt_bboxes, 41 | img_metas, 42 | cfg, 43 | gt_bboxes_ignore=None): 44 | losses = super(RPNHead, self).loss( 45 | cls_scores, 46 | bbox_preds, 47 | gt_bboxes, 48 | None, 49 | img_metas, 50 | cfg, 51 | gt_bboxes_ignore=gt_bboxes_ignore) 52 | return dict( 53 | loss_rpn_cls=losses['loss_cls'], loss_rpn_reg=losses['loss_reg']) 54 | 55 | def get_bboxes_single(self, 56 | cls_scores, 57 | bbox_preds, 58 | mlvl_anchors, 59 | img_shape, 60 | scale_factor, 61 | cfg, 62 | rescale=False): 63 | mlvl_proposals = [] 64 | for idx in range(len(cls_scores)): 65 | rpn_cls_score = cls_scores[idx] 66 | rpn_bbox_pred = bbox_preds[idx] 67 | assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] 68 | anchors = mlvl_anchors[idx] 69 | rpn_cls_score = rpn_cls_score.permute(1, 2, 0) 70 | if self.use_sigmoid_cls: 71 | rpn_cls_score = rpn_cls_score.reshape(-1) 72 | scores = rpn_cls_score.sigmoid() 73 | else: 74 | rpn_cls_score = rpn_cls_score.reshape(-1, 2) 75 | scores = rpn_cls_score.softmax(dim=1)[:, 1] 76 | rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4) 77 | if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: 78 | _, topk_inds = scores.topk(cfg.nms_pre) 79 | rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] 80 | anchors = anchors[topk_inds, :] 81 | scores = scores[topk_inds] 82 | proposals = delta2bbox(anchors, rpn_bbox_pred, self.target_means, 83 | self.target_stds, img_shape) 84 | if cfg.min_bbox_size > 0: 85 | w = proposals[:, 2] - proposals[:, 0] + 1 86 | h = proposals[:, 3] - proposals[:, 1] + 1 87 | valid_inds = torch.nonzero((w >= cfg.min_bbox_size) & 88 | (h >= cfg.min_bbox_size)).squeeze() 89 | proposals = proposals[valid_inds, :] 90 | scores = scores[valid_inds] 91 | proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1) 92 | proposals, _ = nms(proposals, cfg.nms_thr) 93 | proposals = proposals[:cfg.nms_post, :] 94 | mlvl_proposals.append(proposals) 95 | proposals = torch.cat(mlvl_proposals, 0) 96 | if cfg.nms_across_levels: 97 | proposals, _ = nms(proposals, cfg.nms_thr) 98 | proposals = proposals[:cfg.max_num, :] 99 | else: 100 | scores = proposals[:, 4] 101 | num = min(cfg.max_num, proposals.shape[0]) 102 | _, topk_inds = scores.topk(num) 103 | proposals = proposals[topk_inds, :] 104 | return proposals 105 | -------------------------------------------------------------------------------- /configs/ssd300_coco.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | input_size = 300 3 | model = dict( 4 | type='SingleStageDetector', 5 | pretrained='open-mmlab://vgg16_caffe', 6 | backbone=dict( 7 | type='SSDVGG', 8 | input_size=input_size, 9 | depth=16, 10 | with_last_pool=False, 11 | ceil_mode=True, 12 | out_indices=(3, 4), 13 | out_feature_indices=(22, 34), 14 | l2_norm_scale=20), 15 | neck=None, 16 | bbox_head=dict( 17 | type='SSDHead', 18 | input_size=input_size, 19 | in_channels=(512, 1024, 512, 256, 256, 256), 20 | num_classes=81, 21 | anchor_strides=(8, 16, 32, 64, 100, 300), 22 | basesize_ratio_range=(0.15, 0.9), 23 | anchor_ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), 24 | target_means=(.0, .0, .0, .0), 25 | target_stds=(0.1, 0.1, 0.2, 0.2))) 26 | cudnn_benchmark = True 27 | train_cfg = dict( 28 | assigner=dict( 29 | type='MaxIoUAssigner', 30 | pos_iou_thr=0.5, 31 | neg_iou_thr=0.5, 32 | min_pos_iou=0., 33 | ignore_iof_thr=-1, 34 | gt_max_assign_all=False), 35 | smoothl1_beta=1., 36 | allowed_border=-1, 37 | pos_weight=-1, 38 | neg_pos_ratio=3, 39 | debug=False) 40 | test_cfg = dict( 41 | nms=dict(type='nms', iou_thr=0.45), 42 | min_bbox_size=0, 43 | score_thr=0.02, 44 | max_per_img=200) 45 | # model training and testing settings 46 | # dataset settings 47 | dataset_type = 'CocoDataset' 48 | data_root = 'data/coco/' 49 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True) 50 | data = dict( 51 | imgs_per_gpu=8, 52 | workers_per_gpu=3, 53 | train=dict( 54 | type='RepeatDataset', 55 | times=5, 56 | dataset=dict( 57 | type=dataset_type, 58 | ann_file=data_root + 'annotations/instances_train2017.json', 59 | img_prefix=data_root + 'train2017/', 60 | img_scale=(300, 300), 61 | img_norm_cfg=img_norm_cfg, 62 | size_divisor=None, 63 | flip_ratio=0.5, 64 | with_mask=False, 65 | with_crowd=False, 66 | with_label=True, 67 | test_mode=False, 68 | extra_aug=dict( 69 | photo_metric_distortion=dict( 70 | brightness_delta=32, 71 | contrast_range=(0.5, 1.5), 72 | saturation_range=(0.5, 1.5), 73 | hue_delta=18), 74 | expand=dict( 75 | mean=img_norm_cfg['mean'], 76 | to_rgb=img_norm_cfg['to_rgb'], 77 | ratio_range=(1, 4)), 78 | random_crop=dict( 79 | min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3)), 80 | resize_keep_ratio=False)), 81 | val=dict( 82 | type=dataset_type, 83 | ann_file=data_root + 'annotations/instances_val2017.json', 84 | img_prefix=data_root + 'val2017/', 85 | img_scale=(300, 300), 86 | img_norm_cfg=img_norm_cfg, 87 | size_divisor=None, 88 | flip_ratio=0, 89 | with_mask=False, 90 | with_label=False, 91 | test_mode=True, 92 | resize_keep_ratio=False), 93 | test=dict( 94 | type=dataset_type, 95 | ann_file=data_root + 'annotations/instances_val2017.json', 96 | img_prefix=data_root + 'val2017/', 97 | img_scale=(300, 300), 98 | img_norm_cfg=img_norm_cfg, 99 | size_divisor=None, 100 | flip_ratio=0, 101 | with_mask=False, 102 | with_label=False, 103 | test_mode=True, 104 | resize_keep_ratio=False)) 105 | # optimizer 106 | optimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4) 107 | optimizer_config = dict() 108 | # learning policy 109 | lr_config = dict( 110 | policy='step', 111 | warmup='linear', 112 | warmup_iters=500, 113 | warmup_ratio=1.0 / 3, 114 | step=[16, 22]) 115 | checkpoint_config = dict(interval=1) 116 | # yapf:disable 117 | log_config = dict( 118 | interval=50, 119 | hooks=[ 120 | dict(type='TextLoggerHook'), 121 | # dict(type='TensorboardLoggerHook') 122 | ]) 123 | # yapf:enable 124 | # runtime settings 125 | total_epochs = 24 126 | dist_params = dict(backend='nccl') 127 | log_level = 'INFO' 128 | work_dir = './work_dirs/ssd300_coco' 129 | load_from = None 130 | resume_from = None 131 | workflow = [('train', 1)] 132 | --------------------------------------------------------------------------------