├── mmdet
    ├── VERSION
    ├── models
    │   ├── utils
    │   │   └── __init__.py
    │   ├── roi_heads
    │   │   ├── shared_heads
    │   │   │   ├── __init__.py
    │   │   │   └── res_layer.py
    │   │   ├── roi_extractors
    │   │   │   ├── __init__.py
    │   │   │   ├── relative_roi_extractor.py
    │   │   │   ├── base_roi_extractor.py
    │   │   │   ├── single_level_roi_extractor.py
    │   │   │   ├── generic_roi_extractor.py
    │   │   │   └── sg_single_level_roi_extractor.py
    │   │   ├── mask_heads
    │   │   │   ├── __init__.py
    │   │   │   ├── dsc_mask_head.py
    │   │   │   └── coarse_mask_head.py
    │   │   ├── bbox_heads
    │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   └── base_roi_head.py
    │   ├── dense_heads
    │   │   ├── __init__.py
    │   │   ├── base_dense_head.py
    │   │   └── rpn_test_mixin.py
    │   ├── detectors
    │   │   ├── __init__.py
    │   │   └── dsc.py
    │   ├── necks
    │   │   └── __init__.py
    │   ├── backbones
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── mse_loss.py
    │   │   ├── accuracy.py
    │   │   ├── utils.py
    │   │   └── gaussian_focal_loss.py
    │   └── builder.py
    ├── ops
    │   ├── corner_pool
    │   │   ├── __init__.py
    │   │   └── corner_pool.py
    │   ├── roi_pool
    │   │   ├── __init__.py
    │   │   ├── gradcheck.py
    │   │   └── roi_pool.py
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   └── gradcheck.py
    │   ├── masked_conv
    │   │   ├── __init__.py
    │   │   ├── src
    │   │   │   ├── masked_conv2d_ext.cpp
    │   │   │   └── cuda
    │   │   │   │   └── masked_conv2d_cuda.cpp
    │   │   └── masked_conv.py
    │   ├── nms
    │   │   ├── __init__.py
    │   │   └── src
    │   │   │   ├── cuda
    │   │   │       └── nms_cuda.cpp
    │   │   │   └── nms_ext.cpp
    │   ├── sigmoid_focal_loss
    │   │   ├── __init__.py
    │   │   ├── sigmoid_focal_loss.py
    │   │   └── src
    │   │   │   └── sigmoid_focal_loss_ext.cpp
    │   ├── carafe
    │   │   ├── __init__.py
    │   │   ├── setup.py
    │   │   ├── src
    │   │   │   ├── carafe_naive_ext.cpp
    │   │   │   ├── carafe_ext.cpp
    │   │   │   └── cuda
    │   │   │   │   └── carafe_naive_cuda.cpp
    │   │   └── grad_check.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   └── src
    │   │   │   └── compiling_info.cpp
    │   ├── dcn
    │   │   ├── __init__.py
    │   │   └── src
    │   │   │   └── deform_pool_ext.cpp
    │   ├── plugin.py
    │   ├── conv_ws.py
    │   └── __init__.py
    ├── __init__.py
    ├── core
    │   ├── fp16
    │   │   ├── __init__.py
    │   │   └── utils.py
    │   ├── bbox
    │   │   ├── iou_calculators
    │   │   │   ├── __init__.py
    │   │   │   └── builder.py
    │   │   ├── assigners
    │   │   │   ├── base_assigner.py
    │   │   │   └── __init__.py
    │   │   ├── coder
    │   │   │   ├── __init__.py
    │   │   │   ├── base_bbox_coder.py
    │   │   │   └── pseudo_bbox_coder.py
    │   │   ├── builder.py
    │   │   ├── samplers
    │   │   │   ├── __init__.py
    │   │   │   ├── combined_sampler.py
    │   │   │   ├── instance_balanced_pos_sampler.py
    │   │   │   ├── pseudo_sampler.py
    │   │   │   └── random_sampler.py
    │   │   ├── __init__.py
    │   │   └── demodata.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── dist_utils.py
    │   │   └── misc.py
    │   ├── anchor
    │   │   ├── builder.py
    │   │   ├── __init__.py
    │   │   ├── point_generator.py
    │   │   └── utils.py
    │   ├── mask
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── mask_target.py
    │   ├── __init__.py
    │   ├── post_processing
    │   │   ├── __init__.py
    │   │   └── bbox_nms.py
    │   └── evaluation
    │   │   ├── __init__.py
    │   │   ├── bbox_overlaps.py
    │   │   └── eval_hooks.py
    ├── datasets
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   └── distributed_sampler.py
    │   ├── deepfashion.py
    │   ├── pipelines
    │   │   ├── __init__.py
    │   │   ├── compose.py
    │   │   └── test_time_aug.py
    │   ├── __init__.py
    │   ├── wider_face.py
    │   └── voc.py
    ├── utils
    │   ├── __init__.py
    │   ├── logger.py
    │   ├── profiling.py
    │   └── collect_env.py
    └── apis
    │   └── __init__.py
├── requirements
    ├── readthedocs.txt
    ├── docs.txt
    ├── optional.txt
    ├── build.txt
    ├── runtime.txt
    └── tests.txt
├── img
    └── DSC_top.png
├── requirements.txt
├── configs
    ├── _base_
    │   ├── schedules
    │   │   ├── schedule_20e.py
    │   │   ├── schedule_2x.py
    │   │   └── schedule_1x.py
    │   ├── default_runtime.py
    │   ├── datasets
    │   │   ├── coco_instance.py
    │   │   ├── cityscapes_instance.py
    │   │   ├── lvis_instance.py
    │   │   ├── coco_instance_semantic.py
    │   │   ├── coco_detection.py
    │   │   ├── deepfashion.py
    │   │   ├── voc0712.py
    │   │   ├── cityscapes_detection.py
    │   │   └── wider_face.py
    │   └── models
    │   │   ├── ssd300.py
    │   │   ├── rpn_r50_caffe_c4.py
    │   │   ├── retinanet_r50_fpn.py
    │   │   ├── rpn_r50_fpn.py
    │   │   └── fast_rcnn_r50_fpn.py
    └── dsc
    │   ├── dsc_r101_fpn_20e_coco.py
    │   ├── dsc_r50_fpn_20e_coco.py
    │   ├── fast_dsc_r50_fpn_20e_coco.py
    │   ├── dsc_x101_32x4d_fpn_20e_coco.py
    │   └── dsc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py
├── tools
    ├── dist_train.sh
    ├── dist_test.sh
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── print_config.py
    ├── publish_model.py
    ├── get_flops.py
    ├── browse_dataset.py
    ├── fuse_conv_bn.py
    ├── benchmark.py
    └── regnet2mmdet.py
├── LICENSE
├── demo
    └── image_demo.py
├── .gitignore
└── conda_list.txt


/mmdet/VERSION:
--------------------------------------------------------------------------------
1 | 2.1.0
2 | 


--------------------------------------------------------------------------------
/requirements/readthedocs.txt:
--------------------------------------------------------------------------------
1 | mmcv
2 | torch
3 | torchvision
4 | 


--------------------------------------------------------------------------------
/img/DSC_top.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hding2455/DSC/HEAD/img/DSC_top.png


--------------------------------------------------------------------------------
/mmdet/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .res_layer import ResLayer
2 | 
3 | __all__ = ['ResLayer']
4 | 


--------------------------------------------------------------------------------
/requirements/docs.txt:
--------------------------------------------------------------------------------
1 | recommonmark
2 | sphinx
3 | sphinx_markdown_tables
4 | sphinx_rtd_theme
5 | 


--------------------------------------------------------------------------------
/requirements/optional.txt:
--------------------------------------------------------------------------------
1 | albumentations>=0.3.2
2 | cityscapesscripts
3 | imagecorruptions
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/corner_pool/__init__.py:
--------------------------------------------------------------------------------
1 | from .corner_pool import CornerPool
2 | 
3 | __all__ = ['CornerPool']
4 | 


--------------------------------------------------------------------------------
/requirements/build.txt:
--------------------------------------------------------------------------------
1 | # These must be installed before building mmdetection
2 | numpy
3 | torch>=1.3
4 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/shared_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .res_layer import ResLayer
2 | 
3 | __all__ = ['ResLayer']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_pool import RoIPool, roi_pool
2 | 
3 | __all__ = ['roi_pool', 'RoIPool']
4 | 


--------------------------------------------------------------------------------
/mmdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 | 
3 | __all__ = ['__version__', 'short_version']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_align import RoIAlign, roi_align
2 | 
3 | __all__ = ['roi_align', 'RoIAlign']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/__init__.py:
--------------------------------------------------------------------------------
1 | from .masked_conv import MaskedConv2d, masked_conv2d
2 | 
3 | __all__ = ['masked_conv2d', 'MaskedConv2d']
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements/build.txt
2 | -r requirements/optional.txt
3 | -r requirements/runtime.txt
4 | -r requirements/tests.txt
5 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_20e.py:
--------------------------------------------------------------------------------
1 | _base_ = './schedule_1x.py'
2 | # learning policy
3 | lr_config = dict(step=[16, 19])
4 | total_epochs = 20
5 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
1 | _base_ = './schedule_1x.py'
2 | # learning policy
3 | lr_config = dict(step=[16, 22])
4 | total_epochs = 24
5 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_wrapper import batched_nms, nms, nms_match, soft_nms
2 | 
3 | __all__ = ['nms', 'soft_nms', 'batched_nms', 'nms_match']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_head import AnchorHead
2 | from .rpn_head import RPNHead
3 | 
4 | __all__ = [
5 |     'AnchorHead', 'RPNHead',
6 | ]
7 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/__init__.py:
--------------------------------------------------------------------------------
1 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
2 | 
3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/carafe/__init__.py:
--------------------------------------------------------------------------------
1 | from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive
2 | 
3 | __all__ = ['carafe', 'carafe_naive', 'CARAFE', 'CARAFENaive', 'CARAFEPack']
4 | 


--------------------------------------------------------------------------------
/requirements/runtime.txt:
--------------------------------------------------------------------------------
 1 | matplotlib
 2 | mmcv>=0.6.0
 3 | numpy
 4 | # need older pillow until torchvision is fixed
 5 | Pillow<=6.2.2
 6 | six
 7 | terminaltables
 8 | torch>=1.3
 9 | torchvision
10 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .dsc import DSC
2 | from .base import BaseDetector
3 | from .two_stage import TwoStageDetector
4 | 
5 | __all__ = [
6 |     'DSC','BaseDetector','TwoStageDetector'
7 | ]
8 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/__init__.py:
--------------------------------------------------------------------------------
1 | from .decorators import auto_fp16, force_fp32
2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model
3 | 
4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model']
5 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/iou_calculators/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_iou_calculator
2 | from .iou2d_calculator import BboxOverlaps2D, bbox_overlaps
3 | 
4 | __all__ = ['build_iou_calculator', 'BboxOverlaps2D', 'bbox_overlaps']
5 | 


--------------------------------------------------------------------------------
/configs/dsc/dsc_r101_fpn_20e_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './dsc_r50_fpn_1x_coco.py'
2 | model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101))
3 | # learning policy
4 | lr_config = dict(step=[16, 19])
5 | total_epochs = 20
6 | 


--------------------------------------------------------------------------------
/configs/dsc/dsc_r50_fpn_20e_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './dsc_r50_fpn_1x_coco.py'
2 | #model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101))
3 | # learning policy
4 | lr_config = dict(step=[16, 19])
5 | total_epochs = 20
6 | 


--------------------------------------------------------------------------------
/mmdet/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed_sampler import DistributedSampler
2 | from .group_sampler import DistributedGroupSampler, GroupSampler
3 | 
4 | __all__ = ['DistributedSampler', 'DistributedGroupSampler', 'GroupSampler']
5 | 


--------------------------------------------------------------------------------
/mmdet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .collect_env import collect_env
2 | from .flops_counter import get_model_complexity_info
3 | from .logger import get_root_logger
4 | 
5 | __all__ = ['get_model_complexity_info', 'get_root_logger', 'collect_env']
6 | 


--------------------------------------------------------------------------------
/configs/dsc/fast_dsc_r50_fpn_20e_coco.py:
--------------------------------------------------------------------------------
1 | _base_ = './small_fast_htc_dsg_r50_fpn_1x_coco.py'
2 | #model = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101))
3 | # learning policy
4 | lr_config = dict(step=[16, 19])
5 | total_epochs = 20
6 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .dist_utils import DistOptimizerHook, allreduce_grads
2 | from .misc import multi_apply, tensor2imgs, unmap
3 | 
4 | __all__ = [
5 |     'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'multi_apply',
6 |     'unmap'
7 | ]
8 | 


--------------------------------------------------------------------------------
/requirements/tests.txt:
--------------------------------------------------------------------------------
 1 | asynctest
 2 | codecov
 3 | flake8
 4 | isort
 5 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future.
 6 | kwarray
 7 | pytest
 8 | pytest-cov
 9 | pytest-runner
10 | ubelt
11 | xdoctest >= 0.10.0
12 | yapf
13 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/builder.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils import Registry, build_from_cfg
2 | 
3 | ANCHOR_GENERATORS = Registry('Anchor generator')
4 | 
5 | 
6 | def build_anchor_generator(cfg, default_args=None):
7 |     return build_from_cfg(cfg, ANCHOR_GENERATORS, default_args)
8 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29500}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/iou_calculators/builder.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils import Registry, build_from_cfg
2 | 
3 | IOU_CALCULATORS = Registry('IoU calculator')
4 | 
5 | 
6 | def build_iou_calculator(cfg, default_args=None):
7 |     """Builder of IoU calculator"""
8 |     return build_from_cfg(cfg, IOU_CALCULATORS, default_args)
9 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | from .mask_target import mask_target
2 | from .structures import BitmapMasks, PolygonMasks
3 | from .utils import encode_mask_results, split_combined_polys
4 | 
5 | __all__ = [
6 |     'split_combined_polys', 'mask_target', 'BitmapMasks', 'PolygonMasks',
7 |     'encode_mask_results'
8 | ]
9 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29500}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/mmdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import *  # noqa: F401, F403
2 | from .bbox import *  # noqa: F401, F403
3 | from .evaluation import *  # noqa: F401, F403
4 | from .fp16 import *  # noqa: F401, F403
5 | from .mask import *  # noqa: F401, F403
6 | from .post_processing import *  # noqa: F401, F403
7 | from .utils import *  # noqa: F401, F403
8 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_nms import multiclass_nms
2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks,
3 |                          merge_aug_proposals, merge_aug_scores)
4 | 
5 | __all__ = [
6 |     'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
7 |     'merge_aug_scores', 'merge_aug_masks'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bfp import BFP
 2 | from .fpn import FPN
 3 | from .fpn_carafe import FPN_CARAFE
 4 | from .hrfpn import HRFPN
 5 | from .nas_fpn import NASFPN
 6 | from .nasfcos_fpn import NASFCOS_FPN
 7 | from .pafpn import PAFPN
 8 | 
 9 | __all__ = [
10 |     'FPN', 'BFP', 'HRFPN', 'NASFPN', 'FPN_CARAFE', 'PAFPN', 'NASFCOS_FPN'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet/ops/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # from . import compiling_info
2 | from .compiling_info import get_compiler_version, get_compiling_cuda_version
3 | 
4 | # get_compiler_version = compiling_info.get_compiler_version
5 | # get_compiling_cuda_version = compiling_info.get_compiling_cuda_version
6 | 
7 | __all__ = ['get_compiler_version', 'get_compiling_cuda_version']
8 | 


--------------------------------------------------------------------------------
/mmdet/datasets/deepfashion.py:
--------------------------------------------------------------------------------
 1 | from .builder import DATASETS
 2 | from .coco import CocoDataset
 3 | 
 4 | 
 5 | @DATASETS.register_module()
 6 | class DeepFashionDataset(CocoDataset):
 7 | 
 8 |     CLASSES = ('top', 'skirt', 'leggings', 'dress', 'outer', 'pants', 'bag',
 9 |                'neckwear', 'headwear', 'eyeglass', 'belt', 'footwear', 'hair',
10 |                'skin', 'face')
11 | 


--------------------------------------------------------------------------------
/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable
 3 | log_config = dict(
 4 |     interval=50,
 5 |     hooks=[
 6 |         dict(type='TextLoggerHook'),
 7 |         # dict(type='TensorboardLoggerHook')
 8 |     ])
 9 | # yapf:enable
10 | dist_params = dict(backend='nccl')
11 | log_level = 'INFO'
12 | load_from = None
13 | resume_from = None
14 | workflow = [('train', 1)]
15 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | from .hourglass import HourglassNet
 2 | from .hrnet import HRNet
 3 | from .regnet import RegNet
 4 | from .res2net import Res2Net
 5 | from .resnet import ResNet, ResNetV1d
 6 | from .resnext import ResNeXt
 7 | from .ssd_vgg import SSDVGG
 8 | 
 9 | __all__ = [
10 |     'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net',
11 |     'HourglassNet'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/base_assigner.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | 
 4 | class BaseAssigner(metaclass=ABCMeta):
 5 |     """Base assigner that assigns boxes to ground truth boxes"""
 6 | 
 7 |     @abstractmethod
 8 |     def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
 9 |         """Assign boxes to either a ground truth boxe or a negative boxes"""
10 |         pass
11 | 


--------------------------------------------------------------------------------
/configs/_base_/schedules/schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | #optimizer_config = dict(grad_clip=None)
 4 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 5 | # learning policy
 6 | lr_config = dict(
 7 |     policy='step',
 8 |     warmup='linear',
 9 |     warmup_iters=500,
10 |     warmup_ratio=0.001,
11 |     step=[8, 11])
12 | total_epochs = 12
13 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/roi_extractors/__init__.py:
--------------------------------------------------------------------------------
 1 | from .generic_roi_extractor import GenericRoIExtractor
 2 | from .single_level_roi_extractor import SingleRoIExtractor
 3 | from .sg_single_level_roi_extractor import SgSingleRoIExtractor
 4 | from .relative_roi_extractor import RelativeRoIExtractor
 5 | __all__ = [
 6 |     'RelativeRoIExtractor',
 7 |     'SingleRoIExtractor',
 8 |     'GenericRoIExtractor',
 9 |     'SgSingleRoIExtractor',
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/coder/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_bbox_coder import BaseBBoxCoder
 2 | from .delta_xywh_bbox_coder import DeltaXYWHBBoxCoder
 3 | from .legacy_delta_xywh_bbox_coder import LegacyDeltaXYWHBBoxCoder
 4 | from .pseudo_bbox_coder import PseudoBBoxCoder
 5 | from .tblr_bbox_coder import TBLRBBoxCoder
 6 | 
 7 | __all__ = [
 8 |     'BaseBBoxCoder', 'PseudoBBoxCoder', 'DeltaXYWHBBoxCoder',
 9 |     'LegacyDeltaXYWHBBoxCoder', 'TBLRBBoxCoder'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_generator import AnchorGenerator, LegacyAnchorGenerator
 2 | from .builder import ANCHOR_GENERATORS, build_anchor_generator
 3 | from .point_generator import PointGenerator
 4 | from .utils import anchor_inside_flags, calc_region, images_to_levels
 5 | 
 6 | __all__ = [
 7 |     'AnchorGenerator', 'LegacyAnchorGenerator', 'anchor_inside_flags',
 8 |     'PointGenerator', 'images_to_levels', 'calc_region',
 9 |     'build_anchor_generator', 'ANCHOR_GENERATORS'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet/apis/__init__.py:
--------------------------------------------------------------------------------
 1 | from .inference import (async_inference_detector, inference_detector,
 2 |                         init_detector, show_result_pyplot)
 3 | from .test import multi_gpu_test, single_gpu_test
 4 | from .train import get_root_logger, set_random_seed, train_detector
 5 | 
 6 | __all__ = [
 7 |     'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector',
 8 |     'async_inference_detector', 'inference_detector', 'show_result_pyplot',
 9 |     'multi_gpu_test', 'single_gpu_test'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/mask_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coarse_mask_head import CoarseMaskHead
 2 | from .fcn_mask_head import FCNMaskHead
 3 | from .fused_semantic_head import FusedSemanticHead
 4 | from .grid_head import GridHead
 5 | from .dsc_mask_head import DSCMaskHead
 6 | from .mask_point_head import MaskPointHead
 7 | from .maskiou_head import MaskIoUHead
 8 | 
 9 | __all__ = [
10 |     'FCNMaskHead', 'DSCMaskHead', 'FusedSemanticHead', 'GridHead',
11 |     'MaskIoUHead', 'CoarseMaskHead', 'MaskPointHead'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner
 2 | from .assign_result import AssignResult
 3 | from .atss_assigner import ATSSAssigner
 4 | from .base_assigner import BaseAssigner
 5 | from .center_region_assigner import CenterRegionAssigner
 6 | from .max_iou_assigner import MaxIoUAssigner
 7 | from .point_assigner import PointAssigner
 8 | 
 9 | __all__ = [
10 |     'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult',
11 |     'PointAssigner', 'ATSSAssigner', 'CenterRegionAssigner'
12 | ]
13 | 


--------------------------------------------------------------------------------
/configs/dsc/dsc_x101_32x4d_fpn_20e_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './dsc_r50_fpn_1x_coco.py'
 2 | model = dict(
 3 |     pretrained='open-mmlab://resnext101_32x4d',
 4 |     backbone=dict(
 5 |         type='ResNeXt',
 6 |         depth=101,
 7 |         groups=32,
 8 |         base_width=4,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         frozen_stages=1,
12 |         norm_cfg=dict(type='BN', requires_grad=True),
13 |         norm_eval=True,
14 |         style='pytorch'))
15 | # learning policy
16 | lr_config = dict(step=[16, 19])
17 | total_epochs = 20
18 | 


--------------------------------------------------------------------------------
/mmdet/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from mmcv.utils import get_logger
 4 | 
 5 | 
 6 | def get_root_logger(log_file=None, log_level=logging.INFO):
 7 |     """Get root logger
 8 | 
 9 |     Args:
10 |         log_file (str, optional): File path of log. Defaults to None.
11 |         log_level (int, optional): The level of logger.
12 |             Defaults to logging.INFO.
13 | 
14 |     Returns:
15 |         :obj:`logging.Logger`: The obtained logger
16 |     """
17 |     logger = get_logger(name='mmdet', log_file=log_file, log_level=log_level)
18 | 
19 |     return logger
20 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bbox_head import BBoxHead
 2 | from .convfc_bbox_head import (ConvFCBBoxHead, Shared2FCBBoxHead,
 3 |                                Shared4Conv1FCBBoxHead)
 4 | from .double_bbox_head import DoubleConvFCBBoxHead
 5 | from .dsc_bbox_head import (DSCBBoxHead, Shared2FCDSCBBoxHead, Shared4Conv1FCDSCBBoxHead)
 6 | 
 7 | __all__ = [
 8 |     'BBoxHead', 
 9 |     'ConvFCBBoxHead', 'Shared2FCBBoxHead','Shared4Conv1FCBBoxHead', 
10 |     'DSCBBoxHead', 'Shared2FCDSCBBoxHead', 'Shared4Conv1FCDSCBBoxHead',
11 |     'DoubleConvFCBBoxHead',
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/src/cuda/nms_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
 5 | 
 6 | at::Tensor nms_cuda_forward(const at::Tensor boxes, float nms_overlap_thresh);
 7 | 
 8 | at::Tensor nms_cuda(const at::Tensor& dets, const float threshold) {
 9 |   CHECK_CUDA(dets);
10 |   if (dets.numel() == 0)
11 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 |   return nms_cuda_forward(dets, threshold);
13 | }
14 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/coder/base_bbox_coder.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | 
 4 | class BaseBBoxCoder(metaclass=ABCMeta):
 5 |     """Base bounding box coder"""
 6 | 
 7 |     def __init__(self, **kwargs):
 8 |         pass
 9 | 
10 |     @abstractmethod
11 |     def encode(self, bboxes, gt_bboxes):
12 |         """Encode deltas between bboxes and ground truth boxes"""
13 |         pass
14 | 
15 |     @abstractmethod
16 |     def decode(self, bboxes, bboxes_pred):
17 |         """
18 |         Decode the predicted bboxes according to prediction and base boxes
19 |         """
20 |         pass
21 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import torch
 5 | from torch.autograd import gradcheck
 6 | 
 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 8 | from roi_pool import RoIPool  # noqa: E402, isort:skip
 9 | 
10 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda()
11 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55],
12 |                      [1, 67, 40, 110, 120]]).cuda()
13 | inputs = (feat, rois)
14 | print('Gradcheck for roi pooling...')
15 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3)
16 | print(test)
17 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/coder/pseudo_bbox_coder.py:
--------------------------------------------------------------------------------
 1 | from ..builder import BBOX_CODERS
 2 | from .base_bbox_coder import BaseBBoxCoder
 3 | 
 4 | 
 5 | @BBOX_CODERS.register_module()
 6 | class PseudoBBoxCoder(BaseBBoxCoder):
 7 |     """Pseudo bounding box coder"""
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         super(BaseBBoxCoder, self).__init__(**kwargs)
11 | 
12 |     def encode(self, bboxes, gt_bboxes):
13 |         """torch.Tensor: return the given ``bboxes``"""
14 |         return gt_bboxes
15 | 
16 |     def decode(self, bboxes, pred_bboxes):
17 |         """torch.Tensor: return the given ``pred_bboxes``"""
18 |         return pred_bboxes
19 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv,
 2 |                           ModulatedDeformConvPack, deform_conv,
 3 |                           modulated_deform_conv)
 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack,
 5 |                           ModulatedDeformRoIPoolingPack, deform_roi_pooling)
 6 | 
 7 | __all__ = [
 8 |     'DeformConv', 'DeformConvPack', 'ModulatedDeformConv',
 9 |     'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
10 |     'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv',
11 |     'deform_roi_pooling'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/builder.py:
--------------------------------------------------------------------------------
 1 | from mmcv.utils import Registry, build_from_cfg
 2 | 
 3 | BBOX_ASSIGNERS = Registry('bbox_assigner')
 4 | BBOX_SAMPLERS = Registry('bbox_sampler')
 5 | BBOX_CODERS = Registry('bbox_coder')
 6 | 
 7 | 
 8 | def build_assigner(cfg, **default_args):
 9 |     """Builder of box assigner"""
10 |     return build_from_cfg(cfg, BBOX_ASSIGNERS, default_args)
11 | 
12 | 
13 | def build_sampler(cfg, **default_args):
14 |     """Builder of box sampler"""
15 |     return build_from_cfg(cfg, BBOX_SAMPLERS, default_args)
16 | 
17 | 
18 | def build_bbox_coder(cfg, **default_args):
19 |     """Builder of box coder"""
20 |     return build_from_cfg(cfg, BBOX_CODERS, default_args)
21 | 


--------------------------------------------------------------------------------
/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_sampler import BaseSampler
 2 | from .combined_sampler import CombinedSampler
 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler
 5 | from .ohem_sampler import OHEMSampler
 6 | from .pseudo_sampler import PseudoSampler
 7 | from .random_sampler import RandomSampler
 8 | from .sampling_result import SamplingResult
 9 | from .score_hlr_sampler import ScoreHLRSampler
10 | 
11 | __all__ = [
12 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
13 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
14 |     'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler'
15 | ]
16 | 


--------------------------------------------------------------------------------
/tools/print_config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from mmcv import Config, DictAction
 4 | 
 5 | 
 6 | def parse_args():
 7 |     parser = argparse.ArgumentParser(description='Print the whole config')
 8 |     parser.add_argument('config', help='config file path')
 9 |     parser.add_argument(
10 |         '--options', nargs='+', action=DictAction, help='arguments in dict')
11 |     args = parser.parse_args()
12 | 
13 |     return args
14 | 
15 | 
16 | def main():
17 |     args = parse_args()
18 | 
19 |     cfg = Config.fromfile(args.config)
20 |     if args.options is not None:
21 |         cfg.merge_from_dict(args.options)
22 |     print(f'Config:\n{cfg.pretty_text}')
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | _base_ = 'coco_detection.py'
 2 | img_norm_cfg = dict(
 3 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 4 | train_pipeline = [
 5 |     dict(type='LoadImageFromFile'),
 6 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 7 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
 8 |     dict(type='RandomFlip', flip_ratio=0.5),
 9 |     dict(type='Normalize', **img_norm_cfg),
10 |     dict(type='Pad', size_divisor=32),
11 |     dict(type='DefaultFormatBundle'),
12 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
13 | ]
14 | data = dict(train=dict(pipeline=train_pipeline))
15 | evaluation = dict(metric=['bbox', 'segm'])
16 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/cityscapes_instance.py:
--------------------------------------------------------------------------------
 1 | _base_ = './cityscapes_detection.py'
 2 | img_norm_cfg = dict(
 3 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 4 | train_pipeline = [
 5 |     dict(type='LoadImageFromFile'),
 6 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 7 |     dict(
 8 |         type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
 9 |     dict(type='RandomFlip', flip_ratio=0.5),
10 |     dict(type='Normalize', **img_norm_cfg),
11 |     dict(type='Pad', size_divisor=32),
12 |     dict(type='DefaultFormatBundle'),
13 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
14 | ]
15 | data = dict(train=dict(dataset=dict(pipeline=train_pipeline)))
16 | evaluation = dict(metric=['bbox', 'segm'])
17 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/combined_sampler.py:
--------------------------------------------------------------------------------
 1 | from ..builder import BBOX_SAMPLERS, build_sampler
 2 | from .base_sampler import BaseSampler
 3 | 
 4 | 
 5 | @BBOX_SAMPLERS.register_module()
 6 | class CombinedSampler(BaseSampler):
 7 |     """A sampler that combines positive sampler and negative sampler"""
 8 | 
 9 |     def __init__(self, pos_sampler, neg_sampler, **kwargs):
10 |         super(CombinedSampler, self).__init__(**kwargs)
11 |         self.pos_sampler = build_sampler(pos_sampler, **kwargs)
12 |         self.neg_sampler = build_sampler(neg_sampler, **kwargs)
13 | 
14 |     def _sample_pos(self, **kwargs):
15 |         """Sample positive samples"""
16 |         raise NotImplementedError
17 | 
18 |     def _sample_neg(self, **kwargs):
19 |         """Sample negative samples"""
20 |         raise NotImplementedError
21 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .class_names import (cityscapes_classes, coco_classes, dataset_aliases,
 2 |                           get_classes, imagenet_det_classes,
 3 |                           imagenet_vid_classes, voc_classes)
 4 | from .eval_hooks import DistEvalHook, EvalHook
 5 | from .mean_ap import average_precision, eval_map, print_map_summary
 6 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall,
 7 |                      print_recall_summary)
 8 | 
 9 | __all__ = [
10 |     'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
11 |     'coco_classes', 'cityscapes_classes', 'dataset_aliases', 'get_classes',
12 |     'DistEvalHook', 'EvalHook', 'average_precision', 'eval_map',
13 |     'print_map_summary', 'eval_recalls', 'print_recall_summary',
14 |     'plot_num_recall', 'plot_iou_recall'
15 | ]
16 | 


--------------------------------------------------------------------------------
/mmdet/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .backbones import *  # noqa: F401,F403
 2 | from .builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
 3 |                       ROI_EXTRACTORS, SHARED_HEADS, build_backbone,
 4 |                       build_detector, build_head, build_loss, build_neck,
 5 |                       build_roi_extractor, build_shared_head)
 6 | from .dense_heads import *  # noqa: F401,F403
 7 | from .detectors import *  # noqa: F401,F403
 8 | from .losses import *  # noqa: F401,F403
 9 | from .necks import *  # noqa: F401,F403
10 | from .roi_heads import *  # noqa: F401,F403
11 | 
12 | __all__ = [
13 |     'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES',
14 |     'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor',
15 |     'build_shared_head', 'build_head', 'build_loss', 'build_detector'
16 | ]
17 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/lvis_instance.py:
--------------------------------------------------------------------------------
 1 | _base_ = 'coco_instance.py'
 2 | dataset_type = 'LVISDataset'
 3 | data_root = 'data/lvis/'
 4 | data = dict(
 5 |     samples_per_gpu=2,
 6 |     workers_per_gpu=2,
 7 |     train=dict(
 8 |         type='ClassBalancedDataset',
 9 |         oversample_thr=1e-3,
10 |         dataset=dict(
11 |             type=dataset_type,
12 |             ann_file=data_root + 'annotations/lvis_v0.5_train.json',
13 |             img_prefix=data_root + 'train2017/')),
14 |     val=dict(
15 |         type=dataset_type,
16 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
17 |         img_prefix=data_root + 'val2017/'),
18 |     test=dict(
19 |         type=dataset_type,
20 |         ann_file=data_root + 'annotations/lvis_v0.5_val.json',
21 |         img_prefix=data_root + 'val2017/'))
22 | evaluation = dict(metric=['bbox', 'segm'])
23 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/dsc.py:
--------------------------------------------------------------------------------
 1 | from ..builder import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module()
 6 | class DSC(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck=None,
11 |                  rpn_head=None,
12 |                  roi_head=None,
13 |                  train_cfg=None,
14 |                  test_cfg=None,
15 |                  pretrained=None):
16 |         super(DSC, self).__init__(
17 |             backbone=backbone,
18 |             neck=neck,
19 |             rpn_head=rpn_head,
20 |             roi_head=roi_head,
21 |             train_cfg=train_cfg,
22 |             test_cfg=test_cfg,
23 |             pretrained=pretrained)
24 | 
25 |     @property
26 |     def with_semantic(self):
27 |         """bool: whether the detector has a semantic head"""
28 |         return self.roi_head.with_semantic
29 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_roi_head import BaseRoIHead
 2 | from .dsc_roi_head import DSCRoIHead
 3 | from .bbox_heads import (BBoxHead, ConvFCBBoxHead, DoubleConvFCBBoxHead,
 4 |                          Shared2FCBBoxHead, Shared4Conv1FCBBoxHead, DSCBBoxHead, Shared2FCDSCBBoxHead, Shared4Conv1FCDSCBBoxHead)
 5 | from .mask_heads import (CoarseMaskHead, FCNMaskHead, FusedSemanticHead,
 6 |                          GridHead, DSCMaskHead, MaskIoUHead, MaskPointHead)
 7 | from .roi_extractors import SingleRoIExtractor, SgSingleRoIExtractor
 8 | from .shared_heads import ResLayer
 9 | 
10 | __all__ = [
11 |     'BaseRoIHead', 'ResLayer', 
12 |     'BBoxHead','ConvFCBBoxHead', 'Shared2FCBBoxHead', 'Shared4Conv1FCBBoxHead',
13 |     'DoubleConvFCBBoxHead','DSCBBoxHead', 'Shared2FCDSCBBoxHead', 'Shared4Conv1FCDSCBBoxHead',
14 |     'FCNMaskHead', 'DSCMaskHead', 'FusedSemanticHead',
15 |     'GridHead', 'MaskIoUHead', 'SingleRoIExtractor','SgSingleRoIExtractor'
16 | ]
17 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | from torch.autograd import gradcheck
 7 | 
 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 9 | from roi_align import RoIAlign  # noqa: E402, isort:skip
10 | 
11 | feat_size = 15
12 | spatial_scale = 1.0 / 8
13 | img_size = feat_size / spatial_scale
14 | num_imgs = 2
15 | num_rois = 20
16 | 
17 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1))
18 | rois = np.random.rand(num_rois, 4) * img_size * 0.5
19 | rois[:, 2:] += img_size * 0.5
20 | rois = np.hstack((batch_ind, rois))
21 | 
22 | feat = torch.randn(
23 |     num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0')
24 | rois = torch.from_numpy(rois).float().cuda()
25 | inputs = (feat, rois)
26 | print('Gradcheck for roi align...')
27 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3)
28 | print(test)
29 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3)
30 | print(test)
31 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from .compose import Compose
 2 | from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor,
 3 |                         Transpose, to_tensor)
 4 | from .instaboost import InstaBoost
 5 | from .loading import (LoadAnnotations, LoadImageFromFile,
 6 |                       LoadMultiChannelImageFromFiles, LoadProposals)
 7 | from .test_time_aug import MultiScaleFlipAug
 8 | from .transforms import (Albu, Expand, MinIoURandomCrop, Normalize, Pad,
 9 |                          PhotoMetricDistortion, RandomCrop, RandomFlip, Resize,
10 |                          SegRescale)
11 | 
12 | __all__ = [
13 |     'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
14 |     'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile',
15 |     'LoadMultiChannelImageFromFiles', 'LoadProposals', 'MultiScaleFlipAug',
16 |     'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 'Normalize', 'SegRescale',
17 |     'MinIoURandomCrop', 'Expand', 'PhotoMetricDistortion', 'Albu', 'InstaBoost'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet/datasets/samplers/distributed_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import DistributedSampler as _DistributedSampler
 3 | 
 4 | 
 5 | class DistributedSampler(_DistributedSampler):
 6 | 
 7 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
 8 |         super().__init__(dataset, num_replicas=num_replicas, rank=rank)
 9 |         self.shuffle = shuffle
10 | 
11 |     def __iter__(self):
12 |         # deterministically shuffle based on epoch
13 |         if self.shuffle:
14 |             g = torch.Generator()
15 |             g.manual_seed(self.epoch)
16 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
17 |         else:
18 |             indices = torch.arange(len(self.dataset)).tolist()
19 | 
20 |         # add extra samples to make it evenly divisible
21 |         indices += indices[:(self.total_size - len(indices))]
22 |         assert len(indices) == self.total_size
23 | 
24 |         # subsample
25 |         indices = indices[self.rank:self.total_size:self.num_replicas]
26 |         assert len(indices) == self.num_samples
27 | 
28 |         return iter(indices)
29 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import abc
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | 
 7 | def cast_tensor_type(inputs, src_type, dst_type):
 8 |     """Recursively convert Tensor in inputs from src_type to dst_type.
 9 | 
10 |     Args:
11 |         inputs: Inputs that to be casted.
12 |         src_type (torch.dtype): Source type..
13 |         dst_type (torch.dtype): Destination type.
14 | 
15 |     Returns:
16 |         The same type with inputs, but all contained Tensors have been cast.
17 |     """
18 |     if isinstance(inputs, torch.Tensor):
19 |         return inputs.to(dst_type)
20 |     elif isinstance(inputs, str):
21 |         return inputs
22 |     elif isinstance(inputs, np.ndarray):
23 |         return inputs
24 |     elif isinstance(inputs, abc.Mapping):
25 |         return type(inputs)({
26 |             k: cast_tensor_type(v, src_type, dst_type)
27 |             for k, v in inputs.items()
28 |         })
29 |     elif isinstance(inputs, abc.Iterable):
30 |         return type(inputs)(
31 |             cast_tensor_type(item, src_type, dst_type) for item in inputs)
32 |     else:
33 |         return inputs
34 | 


--------------------------------------------------------------------------------
/mmdet/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
 2 | from .cityscapes import CityscapesDataset
 3 | from .coco import CocoDataset
 4 | from .custom import CustomDataset
 5 | from .dataset_wrappers import (ClassBalancedDataset, ConcatDataset,
 6 |                                RepeatDataset)
 7 | from .deepfashion import DeepFashionDataset
 8 | from .lvis import LVISDataset
 9 | from .samplers import DistributedGroupSampler, DistributedSampler, GroupSampler
10 | from .voc import VOCDataset
11 | from .wider_face import WIDERFaceDataset
12 | from .xml_style import XMLDataset
13 | 
14 | __all__ = [
15 |     'CustomDataset', 'XMLDataset', 'CocoDataset', 'DeepFashionDataset',
16 |     'VOCDataset', 'CityscapesDataset', 'LVISDataset', 'GroupSampler',
17 |     'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset',
18 |     'CityscapesDataset', 'LVISDataset', 'DeepFashionDataset', 'GroupSampler',
19 |     'DistributedGroupSampler', 'DistributedSampler', 'build_dataloader',
20 |     'ConcatDataset', 'RepeatDataset', 'ClassBalancedDataset',
21 |     'WIDERFaceDataset', 'DATASETS', 'PIPELINES', 'build_dataset'
22 | ]
23 | 


--------------------------------------------------------------------------------
/mmdet/ops/carafe/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 4 | 
 5 | NVCC_ARGS = [
 6 |     '-D__CUDA_NO_HALF_OPERATORS__',
 7 |     '-D__CUDA_NO_HALF_CONVERSIONS__',
 8 |     '-D__CUDA_NO_HALF2_OPERATORS__',
 9 | ]
10 | 
11 | setup(
12 |     name='carafe',
13 |     ext_modules=[
14 |         CUDAExtension(
15 |             'carafe_ext', [
16 |                 'src/cuda/carafe_cuda.cpp', 'src/cuda/carafe_cuda_kernel.cu',
17 |                 'src/carafe_ext.cpp'
18 |             ],
19 |             define_macros=[('WITH_CUDA', None)],
20 |             extra_compile_args={
21 |                 'cxx': [],
22 |                 'nvcc': NVCC_ARGS
23 |             }),
24 |         CUDAExtension(
25 |             'carafe_naive_ext', [
26 |                 'src/cuda/carafe_naive_cuda.cpp',
27 |                 'src/cuda/carafe_naive_cuda_kernel.cu',
28 |                 'src/carafe_naive_ext.cpp'
29 |             ],
30 |             define_macros=[('WITH_CUDA', None)],
31 |             extra_compile_args={
32 |                 'cxx': [],
33 |                 'nvcc': NVCC_ARGS
34 |             })
35 |     ],
36 |     cmdclass={'build_ext': BuildExtension})
37 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/compose.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | from mmcv.utils import build_from_cfg
 4 | 
 5 | from ..builder import PIPELINES
 6 | 
 7 | 
 8 | @PIPELINES.register_module()
 9 | class Compose(object):
10 | 
11 |     def __init__(self, transforms):
12 |         assert isinstance(transforms, collections.abc.Sequence)
13 |         self.transforms = []
14 |         for transform in transforms:
15 |             if isinstance(transform, dict):
16 |                 transform = build_from_cfg(transform, PIPELINES)
17 |                 self.transforms.append(transform)
18 |             elif callable(transform):
19 |                 self.transforms.append(transform)
20 |             else:
21 |                 raise TypeError('transform must be callable or a dict')
22 | 
23 |     def __call__(self, data):
24 |         for t in self.transforms:
25 |             data = t(data)
26 |             if data is None:
27 |                 return None
28 |         return data
29 | 
30 |     def __repr__(self):
31 |         format_string = self.__class__.__name__ + '('
32 |         for t in self.transforms:
33 |             format_string += '\n'
34 |             format_string += f'    {t}'
35 |         format_string += '\n)'
36 |         return format_string
37 | 


--------------------------------------------------------------------------------
/tools/publish_model.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import subprocess
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(
 9 |         description='Process a checkpoint to be published')
10 |     parser.add_argument('in_file', help='input checkpoint filename')
11 |     parser.add_argument('out_file', help='output checkpoint filename')
12 |     args = parser.parse_args()
13 |     return args
14 | 
15 | 
16 | def process_checkpoint(in_file, out_file):
17 |     checkpoint = torch.load(in_file, map_location='cpu')
18 |     # remove optimizer for smaller file size
19 |     if 'optimizer' in checkpoint:
20 |         del checkpoint['optimizer']
21 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
22 |     # add the code here.
23 |     torch.save(checkpoint, out_file)
24 |     sha = subprocess.check_output(['sha256sum', out_file]).decode()
25 |     if out_file.endswith('.pth'):
26 |         out_file = out_file[:-4]
27 |     final_file = out_file + f'-{sha[:8]}.pth'
28 |     subprocess.Popen(['mv', out_file, final_file])
29 | 
30 | 
31 | def main():
32 |     args = parse_args()
33 |     process_checkpoint(args.in_file, args.out_file)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     main()
38 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | from .assigners import (AssignResult, BaseAssigner, CenterRegionAssigner,
 2 |                         MaxIoUAssigner)
 3 | from .builder import build_assigner, build_bbox_coder, build_sampler
 4 | from .coder import (BaseBBoxCoder, DeltaXYWHBBoxCoder, PseudoBBoxCoder,
 5 |                     TBLRBBoxCoder)
 6 | from .iou_calculators import BboxOverlaps2D, bbox_overlaps
 7 | from .samplers import (BaseSampler, CombinedSampler,
 8 |                        InstanceBalancedPosSampler, IoUBalancedNegSampler,
 9 |                        PseudoSampler, RandomSampler, SamplingResult)
10 | from .transforms import (bbox2result, bbox2roi, bbox_flip, bbox_mapping,
11 |                          bbox_mapping_back, distance2bbox, roi2bbox)
12 | 
13 | __all__ = [
14 |     'bbox_overlaps', 'BboxOverlaps2D', 'BaseAssigner', 'MaxIoUAssigner',
15 |     'AssignResult', 'BaseSampler', 'PseudoSampler', 'RandomSampler',
16 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
17 |     'SamplingResult', 'build_assigner', 'build_sampler', 'bbox_flip',
18 |     'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
19 |     'distance2bbox', 'build_bbox_coder', 'BaseBBoxCoder', 'PseudoBBoxCoder',
20 |     'DeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'CenterRegionAssigner'
21 | ]
22 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | from .accuracy import Accuracy, accuracy
 2 | from .ae_loss import AssociativeEmbeddingLoss
 3 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss
 4 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
 5 |                                  cross_entropy, mask_cross_entropy)
 6 | from .focal_loss import FocalLoss, sigmoid_focal_loss
 7 | from .gaussian_focal_loss import GaussianFocalLoss
 8 | from .ghm_loss import GHMC, GHMR
 9 | from .iou_loss import (BoundedIoULoss, GIoULoss, IoULoss, bounded_iou_loss,
10 |                        iou_loss)
11 | from .mse_loss import MSELoss, mse_loss
12 | from .pisa_loss import carl_loss, isr_p
13 | from .smooth_l1_loss import L1Loss, SmoothL1Loss, l1_loss, smooth_l1_loss
14 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss
15 | 
16 | __all__ = [
17 |     'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy',
18 |     'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss',
19 |     'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss',
20 |     'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss',
21 |     'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'GHMC', 'GHMR', 'reduce_loss',
22 |     'weight_reduce_loss', 'weighted_loss', 'L1Loss', 'l1_loss', 'isr_p',
23 |     'carl_loss', 'AssociativeEmbeddingLoss', 'GaussianFocalLoss'
24 | ]
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | DSC for non-commercial purposes
 2 | 
 3 | Copyright (c) 2020 the authors
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/mmdet/utils/profiling.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import sys
 3 | import time
 4 | 
 5 | import torch
 6 | 
 7 | if sys.version_info >= (3, 7):
 8 | 
 9 |     @contextlib.contextmanager
10 |     def profile_time(trace_name,
11 |                      name,
12 |                      enabled=True,
13 |                      stream=None,
14 |                      end_stream=None):
15 |         """Print time spent by CPU and GPU.
16 | 
17 |         Useful as a temporary context manager to find sweet spots of
18 |         code suitable for async implementation.
19 | 
20 |         """
21 |         if (not enabled) or not torch.cuda.is_available():
22 |             yield
23 |             return
24 |         stream = stream if stream else torch.cuda.current_stream()
25 |         end_stream = end_stream if end_stream else stream
26 |         start = torch.cuda.Event(enable_timing=True)
27 |         end = torch.cuda.Event(enable_timing=True)
28 |         stream.record_event(start)
29 |         try:
30 |             cpu_start = time.monotonic()
31 |             yield
32 |         finally:
33 |             cpu_end = time.monotonic()
34 |             end_stream.record_event(end)
35 |             end.synchronize()
36 |             cpu_time = (cpu_end - cpu_start) * 1000
37 |             gpu_time = start.elapsed_time(end)
38 |             msg = f'{trace_name} {name} cpu_time {cpu_time:.2f} ms '
39 |             msg += f'gpu_time {gpu_time:.2f} ms stream {stream}'
40 |             print(msg, end_stream)
41 | 


--------------------------------------------------------------------------------
/mmdet/datasets/wider_face.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | import mmcv
 5 | 
 6 | from .builder import DATASETS
 7 | from .xml_style import XMLDataset
 8 | 
 9 | 
10 | @DATASETS.register_module()
11 | class WIDERFaceDataset(XMLDataset):
12 |     """
13 |     Reader for the WIDER Face dataset in PASCAL VOC format.
14 |     Conversion scripts can be found in
15 |     https://github.com/sovrasov/wider-face-pascal-voc-annotations
16 |     """
17 |     CLASSES = ('face', )
18 | 
19 |     def __init__(self, **kwargs):
20 |         super(WIDERFaceDataset, self).__init__(**kwargs)
21 | 
22 |     def load_annotations(self, ann_file):
23 |         data_infos = []
24 |         img_ids = mmcv.list_from_file(ann_file)
25 |         for img_id in img_ids:
26 |             filename = f'{img_id}.jpg'
27 |             xml_path = osp.join(self.img_prefix, 'Annotations',
28 |                                 f'{img_id}.xml')
29 |             tree = ET.parse(xml_path)
30 |             root = tree.getroot()
31 |             size = root.find('size')
32 |             width = int(size.find('width').text)
33 |             height = int(size.find('height').text)
34 |             folder = root.find('folder').text
35 |             data_infos.append(
36 |                 dict(
37 |                     id=img_id,
38 |                     filename=osp.join(folder, filename),
39 |                     width=width,
40 |                     height=height))
41 | 
42 |         return data_infos
43 | 


--------------------------------------------------------------------------------
/mmdet/ops/plugin.py:
--------------------------------------------------------------------------------
 1 | from mmcv.cnn import ConvModule
 2 | 
 3 | from .context_block import ContextBlock
 4 | from .generalized_attention import GeneralizedAttention
 5 | from .non_local import NonLocal2D
 6 | 
 7 | plugin_cfg = {
 8 |     # format: layer_type: (abbreviation, module)
 9 |     'ContextBlock': ('context_block', ContextBlock),
10 |     'GeneralizedAttention': ('gen_attention_block', GeneralizedAttention),
11 |     'NonLocal2D': ('nonlocal_block', NonLocal2D),
12 |     'ConvModule': ('conv_block', ConvModule),
13 | }
14 | 
15 | 
16 | def build_plugin_layer(cfg, postfix='', **kwargs):
17 |     """ Build plugin layer
18 | 
19 |     Args:
20 |         cfg (None or dict): cfg should contain:
21 |             type (str): identify plugin layer type.
22 |             layer args: args needed to instantiate a plugin layer.
23 |         postfix (int, str): appended into norm abbreviation to
24 |             create named layer.
25 | 
26 |     Returns:
27 |         name (str): abbreviation + postfix
28 |         layer (nn.Module): created plugin layer
29 |     """
30 |     assert isinstance(cfg, dict) and 'type' in cfg
31 |     cfg_ = cfg.copy()
32 | 
33 |     layer_type = cfg_.pop('type')
34 |     if layer_type not in plugin_cfg:
35 |         raise KeyError(f'Unrecognized plugin type {layer_type}')
36 |     else:
37 |         abbr, plugin_layer = plugin_cfg[layer_type]
38 | 
39 |     assert isinstance(postfix, (int, str))
40 |     name = abbr + str(postfix)
41 | 
42 |     layer = plugin_layer(**kwargs, **cfg_)
43 | 
44 |     return name, layer
45 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/point_generator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .builder import ANCHOR_GENERATORS
 4 | 
 5 | 
 6 | @ANCHOR_GENERATORS.register_module()
 7 | class PointGenerator(object):
 8 | 
 9 |     def _meshgrid(self, x, y, row_major=True):
10 |         xx = x.repeat(len(y))
11 |         yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
12 |         if row_major:
13 |             return xx, yy
14 |         else:
15 |             return yy, xx
16 | 
17 |     def grid_points(self, featmap_size, stride=16, device='cuda'):
18 |         feat_h, feat_w = featmap_size
19 |         shift_x = torch.arange(0., feat_w, device=device) * stride
20 |         shift_y = torch.arange(0., feat_h, device=device) * stride
21 |         shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
22 |         stride = shift_x.new_full((shift_xx.shape[0], ), stride)
23 |         shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1)
24 |         all_points = shifts.to(device)
25 |         return all_points
26 | 
27 |     def valid_flags(self, featmap_size, valid_size, device='cuda'):
28 |         feat_h, feat_w = featmap_size
29 |         valid_h, valid_w = valid_size
30 |         assert valid_h <= feat_h and valid_w <= feat_w
31 |         valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device)
32 |         valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device)
33 |         valid_x[:valid_w] = 1
34 |         valid_y[:valid_h] = 1
35 |         valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
36 |         valid = valid_xx & valid_yy
37 |         return valid
38 | 


--------------------------------------------------------------------------------
/configs/_base_/models/ssd300.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | input_size = 300
 3 | model = dict(
 4 |     type='SingleStageDetector',
 5 |     pretrained='open-mmlab://vgg16_caffe',
 6 |     backbone=dict(
 7 |         type='SSDVGG',
 8 |         input_size=input_size,
 9 |         depth=16,
10 |         with_last_pool=False,
11 |         ceil_mode=True,
12 |         out_indices=(3, 4),
13 |         out_feature_indices=(22, 34),
14 |         l2_norm_scale=20),
15 |     neck=None,
16 |     bbox_head=dict(
17 |         type='SSDHead',
18 |         in_channels=(512, 1024, 512, 256, 256, 256),
19 |         num_classes=80,
20 |         anchor_generator=dict(
21 |             type='SSDAnchorGenerator',
22 |             scale_major=False,
23 |             input_size=input_size,
24 |             basesize_ratio_range=(0.15, 0.9),
25 |             strides=[8, 16, 32, 64, 100, 300],
26 |             ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),
27 |         bbox_coder=dict(
28 |             type='DeltaXYWHBBoxCoder',
29 |             target_means=[.0, .0, .0, .0],
30 |             target_stds=[0.1, 0.1, 0.2, 0.2])))
31 | cudnn_benchmark = True
32 | train_cfg = dict(
33 |     assigner=dict(
34 |         type='MaxIoUAssigner',
35 |         pos_iou_thr=0.5,
36 |         neg_iou_thr=0.5,
37 |         min_pos_iou=0.,
38 |         ignore_iof_thr=-1,
39 |         gt_max_assign_all=False),
40 |     smoothl1_beta=1.,
41 |     allowed_border=-1,
42 |     pos_weight=-1,
43 |     neg_pos_ratio=3,
44 |     debug=False)
45 | test_cfg = dict(
46 |     nms=dict(type='nms', iou_thr=0.45),
47 |     min_bbox_size=0,
48 |     score_thr=0.02,
49 |     max_per_img=200)
50 | 


--------------------------------------------------------------------------------
/mmdet/ops/conv_ws.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | from mmcv.cnn import CONV_LAYERS
 4 | 
 5 | 
 6 | def conv_ws_2d(input,
 7 |                weight,
 8 |                bias=None,
 9 |                stride=1,
10 |                padding=0,
11 |                dilation=1,
12 |                groups=1,
13 |                eps=1e-5):
14 |     c_in = weight.size(0)
15 |     weight_flat = weight.view(c_in, -1)
16 |     mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)
17 |     std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)
18 |     weight = (weight - mean) / (std + eps)
19 |     return F.conv2d(input, weight, bias, stride, padding, dilation, groups)
20 | 
21 | 
22 | @CONV_LAYERS.register_module('ConvWS')
23 | class ConvWS2d(nn.Conv2d):
24 | 
25 |     def __init__(self,
26 |                  in_channels,
27 |                  out_channels,
28 |                  kernel_size,
29 |                  stride=1,
30 |                  padding=0,
31 |                  dilation=1,
32 |                  groups=1,
33 |                  bias=True,
34 |                  eps=1e-5):
35 |         super(ConvWS2d, self).__init__(
36 |             in_channels,
37 |             out_channels,
38 |             kernel_size,
39 |             stride=stride,
40 |             padding=padding,
41 |             dilation=dilation,
42 |             groups=groups,
43 |             bias=bias)
44 |         self.eps = eps
45 | 
46 |     def forward(self, x):
47 |         return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,
48 |                           self.dilation, self.groups, self.eps)
49 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_instance_semantic.py:
--------------------------------------------------------------------------------
 1 | _base_ = 'coco_detection.py'
 2 | data_root = 'data/coco/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(
 8 |         type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),
 9 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='SegRescale', scale_factor=1 / 8),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(
16 |         type='Collect',
17 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(1333, 800),
24 |         flip=False,
25 |         transforms=[
26 |             dict(type='Resize', keep_ratio=True),
27 |             dict(type='RandomFlip', flip_ratio=0.5),
28 |             dict(type='Normalize', **img_norm_cfg),
29 |             dict(type='Pad', size_divisor=32),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     train=dict(
36 |         seg_prefix=data_root + 'stuffthingmaps/train2017/',
37 |         pipeline=train_pipeline),
38 |     val=dict(pipeline=test_pipeline),
39 |     test=dict(pipeline=test_pipeline))
40 | evaluation = dict(metric=['bbox', 'segm'])
41 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/mse_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from ..builder import LOSSES
 5 | from .utils import weighted_loss
 6 | 
 7 | 
 8 | @weighted_loss
 9 | def mse_loss(pred, target):
10 |     """Warpper of mse loss"""
11 |     return F.mse_loss(pred, target, reduction='none')
12 | 
13 | 
14 | @LOSSES.register_module()
15 | class MSELoss(nn.Module):
16 |     """MSELoss
17 | 
18 |     Args:
19 |         reduction (str, optional): The method that reduces the loss to a
20 |             scalar. Options are "none", "mean" and "sum".
21 |         loss_weight (float, optional): The weight of the loss. Defaults to 1.0
22 |     """
23 | 
24 |     def __init__(self, reduction='mean', loss_weight=1.0):
25 |         super().__init__()
26 |         self.reduction = reduction
27 |         self.loss_weight = loss_weight
28 | 
29 |     def forward(self, pred, target, weight=None, avg_factor=None):
30 |         """Forward function of loss
31 | 
32 |         Args:
33 |             pred (torch.Tensor): The prediction.
34 |             target (torch.Tensor): The learning target of the prediction.
35 |             weight (torch.Tensor, optional): Weight of the loss for each
36 |                 prediction. Defaults to None.
37 |             avg_factor (int, optional): Average factor that is used to average
38 |                 the loss. Defaults to None.
39 | 
40 |         Returns:
41 |             torch.Tensor: The calculated loss
42 |         """
43 |         loss = self.loss_weight * mse_loss(
44 |             pred,
45 |             target,
46 |             weight,
47 |             reduction=self.reduction,
48 |             avg_factor=avg_factor)
49 |         return loss
50 | 


--------------------------------------------------------------------------------
/configs/dsc/dsc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py:
--------------------------------------------------------------------------------
 1 | _base_ = './dsc_r50_fpn_1x_coco.py'
 2 | model = dict(
 3 |     pretrained='open-mmlab://resnext101_64x4d',
 4 |     backbone=dict(
 5 |         type='ResNeXt',
 6 |         depth=101,
 7 |         groups=64,
 8 |         base_width=4,
 9 |         num_stages=4,
10 |         out_indices=(0, 1, 2, 3),
11 |         frozen_stages=1,
12 |         norm_cfg=dict(type='BN', requires_grad=True),
13 |         norm_eval=True,
14 |         style='pytorch',
15 |         dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False),
16 |         stage_with_dcn=(False, True, True, True)))
17 | # dataset settings
18 | img_norm_cfg = dict(
19 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
20 | train_pipeline = [
21 |     dict(type='LoadImageFromFile'),
22 |     dict(
23 |         type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),
24 |     dict(
25 |         type='Resize',
26 |         img_scale=[(1600, 400), (1600, 1400)],
27 |         multiscale_mode='range',
28 |         keep_ratio=True),
29 |     dict(type='RandomFlip', flip_ratio=0.5),
30 |     dict(type='Normalize', **img_norm_cfg),
31 |     dict(type='Pad', size_divisor=32),
32 |     dict(type='SegRescale', scale_factor=1 / 8),
33 |     dict(type='DefaultFormatBundle'),
34 |     dict(
35 |         type='Collect',
36 |         keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),
37 | ]
38 | data = dict(
39 |     samples_per_gpu=1, workers_per_gpu=1, train=dict(pipeline=train_pipeline))
40 | # learning policy
41 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
42 | lr_config = dict(step=[16, 19])
43 | total_epochs = 20
44 | 


--------------------------------------------------------------------------------
/mmdet/ops/utils/src/compiling_info.cpp:
--------------------------------------------------------------------------------
 1 | // modified from
 2 | // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp
 3 | #include <torch/extension.h>
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include <cuda_runtime_api.h>
 7 | int get_cudart_version() { return CUDART_VERSION; }
 8 | #endif
 9 | 
10 | std::string get_compiling_cuda_version() {
11 | #ifdef WITH_CUDA
12 |   std::ostringstream oss;
13 | 
14 |   // copied from
15 |   // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
16 |   auto printCudaStyleVersion = [&](int v) {
17 |     oss << (v / 1000) << "." << (v / 10 % 100);
18 |     if (v % 10 != 0) {
19 |       oss << "." << (v % 10);
20 |     }
21 |   };
22 |   printCudaStyleVersion(get_cudart_version());
23 |   return oss.str();
24 | #else
25 |   return std::string("not available");
26 | #endif
27 | }
28 | 
29 | // similar to
30 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
31 | std::string get_compiler_version() {
32 |   std::ostringstream ss;
33 | #if defined(__GNUC__)
34 | #ifndef __clang__
35 |   { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
36 | #endif
37 | #endif
38 | 
39 | #if defined(__clang_major__)
40 |   {
41 |     ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
42 |        << __clang_patchlevel__;
43 |   }
44 | #endif
45 | 
46 | #if defined(_MSC_VER)
47 |   { ss << "MSVC " << _MSC_FULL_VER; }
48 | #endif
49 |   return ss.str();
50 | }
51 | 
52 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
53 |   m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
54 |   m.def("get_compiling_cuda_version", &get_compiling_cuda_version,
55 |         "get_compiling_cuda_version");
56 | }
57 | 


--------------------------------------------------------------------------------
/demo/image_demo.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from mmdet.apis import inference_detector, init_detector, show_result_pyplot
 4 | 
 5 | 
 6 | def main():
 7 |     parser = ArgumentParser()
 8 |     parser.add_argument('img_path', help='Image file path')
 9 |     parser.add_argument('save_path', help='Image save path')
10 |     parser.add_argument('config', help='Config file')
11 |     parser.add_argument('checkpoint', help='Checkpoint file')
12 |     parser.add_argument(
13 |         '--device', default='cuda:0', help='Device used for inference')
14 |     parser.add_argument(
15 |         '--score-thr', type=float, default=0.7, help='bbox score threshold')
16 |     args = parser.parse_args()
17 | 
18 |     # build the model from a config file and a checkpoint file
19 |     import os
20 |     model = init_detector(args.config, args.checkpoint, device=args.device)
21 |     # test a single image
22 |     #image_names = ["000000433243","000000000776", "000000015497", "000000018193", "000000046497", "000000080274", "000000144300", "000000171757", "000000215723",
23 |     #"000000080274", "000000095786", "000000170278", "000000367082", "000000452891", "000000459153", "000000489339", "000000550714", "000000564280"]
24 |     #image_names = set(image_names)
25 |     for img_file in os.listdir(args.img_path):
26 |     #    if img_file[:-4] not in image_names:
27 |     #        continue
28 |     #    print(args.img_path + img_file)
29 |         img_path = args.img_path + img_file
30 |         save_path = args.save_path + img_file
31 |         result = inference_detector(model, img_path)
32 |         # show the results
33 |         show_result_pyplot(model, save_path ,img_path, result, score_thr=args.score_thr)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     main()
38 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/mask_heads/dsc_mask_head.py:
--------------------------------------------------------------------------------
 1 | from mmcv.cnn import ConvModule, build_conv_layer
 2 | import torch.nn as nn
 3 | 
 4 | from mmdet.models.builder import HEADS
 5 | from .fcn_mask_head import FCNMaskHead
 6 | 
 7 | 
 8 | @HEADS.register_module()
 9 | class DSCMaskHead(FCNMaskHead):
10 | 
11 |     def __init__(self, with_conv_res=True, *args, **kwargs):
12 |         super(DSCMaskHead, self).__init__(*args, **kwargs)
13 |         self.with_conv_res = with_conv_res
14 |         if self.with_conv_res:
15 |             norm_cfg = dict(type='BN', requires_grad=True) 
16 |             self.conv_res = ConvModule(
17 |                 self.conv_out_channels,
18 |                 self.conv_out_channels,
19 |                 1,
20 |                 conv_cfg=self.conv_cfg,
21 |                 norm_cfg=norm_cfg)
22 | 
23 |     def init_weights(self):
24 |         super(DSCMaskHead, self).init_weights()
25 |         if self.with_conv_res:
26 |             self.conv_res.init_weights()
27 | 
28 |     def forward(self, x, res_feat=None, return_logits=True, return_feat=True):
29 |         if res_feat is not None:
30 |             assert self.with_conv_res
31 |             res_feat = self.conv_res(res_feat)
32 |             res_feat = nn.functional.adaptive_avg_pool2d(res_feat, x.shape[-2:])
33 |             x = x + res_feat
34 |         for conv in self.convs:
35 |             x = conv(x)
36 |         res_feat = x
37 |         outs = []
38 |         if return_logits:
39 |             x = self.upsample(x)
40 |             if self.upsample_method == 'deconv':
41 |                 x = self.relu(x)
42 |             mask_pred = self.conv_logits(x)
43 |             outs.append(mask_pred)
44 |         if return_feat:
45 |             outs.append(res_feat)
46 |         return outs if len(outs) > 1 else outs[0]
47 | 


--------------------------------------------------------------------------------
/tools/get_flops.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from mmcv import Config
 4 | 
 5 | from mmdet.models import build_detector
 6 | from mmdet.utils import get_model_complexity_info
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='Train a detector')
11 |     parser.add_argument('config', help='train config file path')
12 |     parser.add_argument(
13 |         '--shape',
14 |         type=int,
15 |         nargs='+',
16 |         default=[1280, 800],
17 |         help='input image size')
18 |     args = parser.parse_args()
19 |     return args
20 | 
21 | 
22 | def main():
23 | 
24 |     args = parse_args()
25 | 
26 |     if len(args.shape) == 1:
27 |         input_shape = (3, args.shape[0], args.shape[0])
28 |     elif len(args.shape) == 2:
29 |         input_shape = (3, ) + tuple(args.shape)
30 |     else:
31 |         raise ValueError('invalid input shape')
32 | 
33 |     cfg = Config.fromfile(args.config)
34 |     model = build_detector(
35 |         cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg).cuda()
36 |     model.eval()
37 | 
38 |     if hasattr(model, 'forward_dummy'):
39 |         model.forward = model.forward_dummy
40 |     else:
41 |         raise NotImplementedError(
42 |             'FLOPs counter is currently not currently supported with {}'.
43 |             format(model.__class__.__name__))
44 | 
45 |     flops, params = get_model_complexity_info(model, input_shape)
46 |     split_line = '=' * 30
47 |     print(f'{split_line}\nInput shape: {input_shape}\n'
48 |           f'Flops: {flops}\nParams: {params}\n{split_line}')
49 |     print('!!!Please be cautious if you use the results in papers. '
50 |           'You may need to check if all ops are supported and verify that the '
51 |           'flops computation is correct.')
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     main()
56 | 


--------------------------------------------------------------------------------
/mmdet/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .context_block import ContextBlock
 2 | from .conv_ws import ConvWS2d, conv_ws_2d
 3 | from .corner_pool import CornerPool
 4 | from .dcn import (DeformConv, DeformConvPack, DeformRoIPooling,
 5 |                   DeformRoIPoolingPack, ModulatedDeformConv,
 6 |                   ModulatedDeformConvPack, ModulatedDeformRoIPoolingPack,
 7 |                   deform_conv, deform_roi_pooling, modulated_deform_conv)
 8 | from .generalized_attention import GeneralizedAttention
 9 | from .masked_conv import MaskedConv2d
10 | from .nms import batched_nms, nms, nms_match, soft_nms
11 | from .non_local import NonLocal2D
12 | from .plugin import build_plugin_layer
13 | from .point_sample import (SimpleRoIAlign, point_sample,
14 |                            rel_roi_point_to_rel_img_point)
15 | from .roi_align import RoIAlign, roi_align
16 | from .roi_pool import RoIPool, roi_pool
17 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
18 | from .utils import get_compiler_version, get_compiling_cuda_version
19 | from .wrappers import Conv2d, ConvTranspose2d, Linear, MaxPool2d
20 | 
21 | __all__ = [
22 |     'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool',
23 |     'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
24 |     'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv',
25 |     'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv',
26 |     'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss',
27 |     'MaskedConv2d', 'ContextBlock', 'GeneralizedAttention', 'NonLocal2D',
28 |     'get_compiler_version', 'get_compiling_cuda_version', 'ConvWS2d',
29 |     'conv_ws_2d', 'build_plugin_layer', 'batched_nms', 'Conv2d',
30 |     'ConvTranspose2d', 'MaxPool2d', 'Linear', 'nms_match', 'CornerPool',
31 |     'point_sample', 'rel_roi_point_to_rel_img_point', 'SimpleRoIAlign'
32 | ]
33 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', eps=1e-6):
 5 |     """Calculate the ious between each bbox of bboxes1 and bboxes2.
 6 | 
 7 |     Args:
 8 |         bboxes1(ndarray): shape (n, 4)
 9 |         bboxes2(ndarray): shape (k, 4)
10 |         mode(str): iou (intersection over union) or iof (intersection
11 |             over foreground)
12 | 
13 |     Returns:
14 |         ious(ndarray): shape (n, k)
15 |     """
16 | 
17 |     assert mode in ['iou', 'iof']
18 | 
19 |     bboxes1 = bboxes1.astype(np.float32)
20 |     bboxes2 = bboxes2.astype(np.float32)
21 |     rows = bboxes1.shape[0]
22 |     cols = bboxes2.shape[0]
23 |     ious = np.zeros((rows, cols), dtype=np.float32)
24 |     if rows * cols == 0:
25 |         return ious
26 |     exchange = False
27 |     if bboxes1.shape[0] > bboxes2.shape[0]:
28 |         bboxes1, bboxes2 = bboxes2, bboxes1
29 |         ious = np.zeros((cols, rows), dtype=np.float32)
30 |         exchange = True
31 |     area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1])
32 |     area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1])
33 |     for i in range(bboxes1.shape[0]):
34 |         x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
35 |         y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
36 |         x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
37 |         y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
38 |         overlap = np.maximum(x_end - x_start, 0) * np.maximum(
39 |             y_end - y_start, 0)
40 |         if mode == 'iou':
41 |             union = area1[i] + area2 - overlap
42 |         else:
43 |             union = area1[i] if not exchange else area2
44 |         union = np.maximum(union, eps)
45 |         ious[i, :] = overlap / union
46 |     if exchange:
47 |         ious = ious.T
48 |     return ious
49 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | 
 5 | from . import sigmoid_focal_loss_ext
 6 | 
 7 | 
 8 | class SigmoidFocalLossFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(ctx, input, target, gamma=2.0, alpha=0.25):
12 |         ctx.save_for_backward(input, target)
13 |         num_classes = input.shape[1]
14 |         ctx.num_classes = num_classes
15 |         ctx.gamma = gamma
16 |         ctx.alpha = alpha
17 | 
18 |         loss = sigmoid_focal_loss_ext.forward(input, target, num_classes,
19 |                                               gamma, alpha)
20 |         return loss
21 | 
22 |     @staticmethod
23 |     @once_differentiable
24 |     def backward(ctx, d_loss):
25 |         input, target = ctx.saved_tensors
26 |         num_classes = ctx.num_classes
27 |         gamma = ctx.gamma
28 |         alpha = ctx.alpha
29 |         d_loss = d_loss.contiguous()
30 |         d_input = sigmoid_focal_loss_ext.backward(input, target, d_loss,
31 |                                                   num_classes, gamma, alpha)
32 |         return d_input, None, None, None, None
33 | 
34 | 
35 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply
36 | 
37 | 
38 | # TODO: remove this module
39 | class SigmoidFocalLoss(nn.Module):
40 | 
41 |     def __init__(self, gamma, alpha):
42 |         super(SigmoidFocalLoss, self).__init__()
43 |         self.gamma = gamma
44 |         self.alpha = alpha
45 | 
46 |     def forward(self, logits, targets):
47 |         assert logits.is_cuda
48 |         loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha)
49 |         return loss.sum()
50 | 
51 |     def __repr__(self):
52 |         tmpstr = self.__class__.__name__
53 |         tmpstr += f'(gamma={self.gamma}, alpha={self.alpha})'
54 |         return tmpstr
55 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/coco_detection.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/coco/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True),
 8 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
 9 |     dict(type='RandomFlip', flip_ratio=0.5),
10 |     dict(type='Normalize', **img_norm_cfg),
11 |     dict(type='Pad', size_divisor=32),
12 |     dict(type='DefaultFormatBundle'),
13 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
14 | ]
15 | test_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(
18 |         type='MultiScaleFlipAug',
19 |         img_scale=(1333, 800),
20 |         flip=False,
21 |         transforms=[
22 |             dict(type='Resize', keep_ratio=True),
23 |             dict(type='RandomFlip'),
24 |             dict(type='Normalize', **img_norm_cfg),
25 |             dict(type='Pad', size_divisor=32),
26 |             dict(type='ImageToTensor', keys=['img']),
27 |             dict(type='Collect', keys=['img']),
28 |         ])
29 | ]
30 | data = dict(
31 |     samples_per_gpu=2,
32 |     workers_per_gpu=2,
33 |     train=dict(
34 |         type=dataset_type,
35 |         ann_file=data_root + 'annotations/instances_train2017.json',
36 |         img_prefix=data_root + 'train2017/',
37 |         pipeline=train_pipeline),
38 |     val=dict(
39 |         type=dataset_type,
40 |         ann_file=data_root + 'annotations/instances_val2017.json',
41 |         img_prefix=data_root + 'val2017/',
42 |         pipeline=test_pipeline),
43 |     test=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root + 'annotations/instances_val2017.json',
46 |         img_prefix=data_root + 'val2017/',
47 |         pipeline=test_pipeline))
48 | evaluation = dict(interval=1, metric='bbox')
49 | 


--------------------------------------------------------------------------------
/mmdet/models/builder.py:
--------------------------------------------------------------------------------
 1 | from mmcv.utils import Registry, build_from_cfg
 2 | from torch import nn
 3 | 
 4 | BACKBONES = Registry('backbone')
 5 | NECKS = Registry('neck')
 6 | ROI_EXTRACTORS = Registry('roi_extractor')
 7 | SHARED_HEADS = Registry('shared_head')
 8 | HEADS = Registry('head')
 9 | LOSSES = Registry('loss')
10 | DETECTORS = Registry('detector')
11 | 
12 | 
13 | def build(cfg, registry, default_args=None):
14 |     """Build a module
15 | 
16 |     Args:
17 |         cfg (dict, list[dict]): The config of modules, is is either a dict
18 |             or a list of configs.
19 |         registry (:obj:`Registry`): A registry the module belongs to.
20 |         default_args (dict, optional): Default arguments to build the module.
21 |             Defaults to None.
22 | 
23 |     Returns:
24 |         nn.Module: A built nn module.
25 |     """
26 |     if isinstance(cfg, list):
27 |         modules = [
28 |             build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
29 |         ]
30 |         return nn.Sequential(*modules)
31 |     else:
32 |         return build_from_cfg(cfg, registry, default_args)
33 | 
34 | 
35 | def build_backbone(cfg):
36 |     """Build backbone"""
37 |     return build(cfg, BACKBONES)
38 | 
39 | 
40 | def build_neck(cfg):
41 |     """Build neck"""
42 |     return build(cfg, NECKS)
43 | 
44 | 
45 | def build_roi_extractor(cfg):
46 |     """Build roi extractor"""
47 |     return build(cfg, ROI_EXTRACTORS)
48 | 
49 | 
50 | def build_shared_head(cfg):
51 |     """Build shared head"""
52 |     return build(cfg, SHARED_HEADS)
53 | 
54 | 
55 | def build_head(cfg):
56 |     """Build head"""
57 |     return build(cfg, HEADS)
58 | 
59 | 
60 | def build_loss(cfg):
61 |     """Build loss"""
62 |     return build(cfg, LOSSES)
63 | 
64 | 
65 | def build_detector(cfg, train_cfg=None, test_cfg=None):
66 |     """Build detector"""
67 |     return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
68 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/src/nms_ext.cpp:
--------------------------------------------------------------------------------
 1 | // Modified from https://github.com/bharatsingh430/soft-nms/blob/master/lib/nms/cpu_nms.pyx, Soft-NMS is added
 2 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | #include <torch/extension.h>
 4 | 
 5 | at::Tensor nms_cpu(const at::Tensor& dets, const float threshold);
 6 | 
 7 | at::Tensor soft_nms_cpu(const at::Tensor& dets, const float threshold,
 8 |                     const unsigned char method, const float sigma, const
 9 |                     float min_score);
10 | 
11 | std::vector<std::vector<int> > nms_match_cpu(const at::Tensor& dets, const float threshold);
12 | 
13 | 
14 | #ifdef WITH_CUDA
15 | at::Tensor nms_cuda(const at::Tensor& dets, const float threshold);
16 | #endif
17 | 
18 | at::Tensor nms(const at::Tensor& dets, const float threshold){
19 |   if (dets.device().is_cuda()) {
20 | #ifdef WITH_CUDA
21 |     return nms_cuda(dets, threshold);
22 | #else
23 |     AT_ERROR("nms is not compiled with GPU support");
24 | #endif
25 |   }
26 |   return nms_cpu(dets, threshold);
27 | }
28 | 
29 | at::Tensor soft_nms(const at::Tensor& dets, const float threshold,
30 |                         const unsigned char method, const float sigma, const
31 |                         float min_score) {
32 |   if (dets.device().is_cuda()) {
33 |     AT_ERROR("soft_nms is not implemented on GPU");
34 |   }
35 |   return soft_nms_cpu(dets, threshold, method, sigma, min_score);
36 | }
37 | 
38 | std::vector<std::vector<int> > nms_match(const at::Tensor& dets, const float threshold) {
39 |   if (dets.type().is_cuda()) {
40 |     AT_ERROR("nms_match is not implemented on GPU");
41 |   }
42 |   return nms_match_cpu(dets, threshold);
43 | }
44 | 
45 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
46 |   m.def("nms", &nms, "non-maximum suppression");
47 |   m.def("soft_nms", &soft_nms, "soft non-maximum suppression");
48 |   m.def("nms_match", &nms_match, "non-maximum suppression match");
49 | }
50 | 


--------------------------------------------------------------------------------
/configs/_base_/models/rpn_r50_caffe_c4.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     pretrained='open-mmlab://detectron2/resnet50_caffe',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=3,
 9 |         strides=(1, 2, 2),
10 |         dilations=(1, 1, 1),
11 |         out_indices=(2, ),
12 |         frozen_stages=1,
13 |         norm_cfg=dict(type='BN', requires_grad=False),
14 |         norm_eval=True,
15 |         style='caffe'),
16 |     neck=None,
17 |     rpn_head=dict(
18 |         type='RPNHead',
19 |         in_channels=1024,
20 |         feat_channels=1024,
21 |         anchor_generator=dict(
22 |             type='AnchorGenerator',
23 |             scales=[2, 4, 8, 16, 32],
24 |             ratios=[0.5, 1.0, 2.0],
25 |             strides=[16]),
26 |         bbox_coder=dict(
27 |             type='DeltaXYWHBBoxCoder',
28 |             target_means=[.0, .0, .0, .0],
29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
30 |         loss_cls=dict(
31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)))
33 | # model training and testing settings
34 | train_cfg = dict(
35 |     rpn=dict(
36 |         assigner=dict(
37 |             type='MaxIoUAssigner',
38 |             pos_iou_thr=0.7,
39 |             neg_iou_thr=0.3,
40 |             min_pos_iou=0.3,
41 |             ignore_iof_thr=-1),
42 |         sampler=dict(
43 |             type='RandomSampler',
44 |             num=256,
45 |             pos_fraction=0.5,
46 |             neg_pos_ub=-1,
47 |             add_gt_as_proposals=False),
48 |         allowed_border=0,
49 |         pos_weight=-1,
50 |         debug=False))
51 | test_cfg = dict(
52 |     rpn=dict(
53 |         nms_across_levels=False,
54 |         nms_pre=12000,
55 |         nms_post=2000,
56 |         max_num=2000,
57 |         nms_thr=0.7,
58 |         min_bbox_size=0))
59 | 


--------------------------------------------------------------------------------
/configs/_base_/models/retinanet_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RetinaNet',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         start_level=1,
19 |         add_extra_convs='on_input',
20 |         num_outs=5),
21 |     bbox_head=dict(
22 |         type='RetinaHead',
23 |         num_classes=80,
24 |         in_channels=256,
25 |         stacked_convs=4,
26 |         feat_channels=256,
27 |         anchor_generator=dict(
28 |             type='AnchorGenerator',
29 |             octave_base_scale=4,
30 |             scales_per_octave=3,
31 |             ratios=[0.5, 1.0, 2.0],
32 |             strides=[8, 16, 32, 64, 128]),
33 |         bbox_coder=dict(
34 |             type='DeltaXYWHBBoxCoder',
35 |             target_means=[.0, .0, .0, .0],
36 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
37 |         loss_cls=dict(
38 |             type='FocalLoss',
39 |             use_sigmoid=True,
40 |             gamma=2.0,
41 |             alpha=0.25,
42 |             loss_weight=1.0),
43 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)))
44 | # training and testing settings
45 | train_cfg = dict(
46 |     assigner=dict(
47 |         type='MaxIoUAssigner',
48 |         pos_iou_thr=0.5,
49 |         neg_iou_thr=0.4,
50 |         min_pos_iou=0,
51 |         ignore_iof_thr=-1),
52 |     allowed_border=-1,
53 |     pos_weight=-1,
54 |     debug=False)
55 | test_cfg = dict(
56 |     nms_pre=1000,
57 |     min_bbox_size=0,
58 |     score_thr=0.05,
59 |     nms=dict(type='nms', iou_thr=0.5),
60 |     max_per_img=100)
61 | 


--------------------------------------------------------------------------------
/configs/_base_/models/rpn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='RPN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     rpn_head=dict(
20 |         type='RPNHead',
21 |         in_channels=256,
22 |         feat_channels=256,
23 |         anchor_generator=dict(
24 |             type='AnchorGenerator',
25 |             scales=[8],
26 |             ratios=[0.5, 1.0, 2.0],
27 |             strides=[4, 8, 16, 32, 64]),
28 |         bbox_coder=dict(
29 |             type='DeltaXYWHBBoxCoder',
30 |             target_means=[.0, .0, .0, .0],
31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
32 |         loss_cls=dict(
33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)))
35 | # model training and testing settings
36 | train_cfg = dict(
37 |     rpn=dict(
38 |         assigner=dict(
39 |             type='MaxIoUAssigner',
40 |             pos_iou_thr=0.7,
41 |             neg_iou_thr=0.3,
42 |             min_pos_iou=0.3,
43 |             ignore_iof_thr=-1),
44 |         sampler=dict(
45 |             type='RandomSampler',
46 |             num=256,
47 |             pos_fraction=0.5,
48 |             neg_pos_ub=-1,
49 |             add_gt_as_proposals=False),
50 |         allowed_border=0,
51 |         pos_weight=-1,
52 |         debug=False))
53 | test_cfg = dict(
54 |     rpn=dict(
55 |         nms_across_levels=False,
56 |         nms_pre=2000,
57 |         nms_post=1000,
58 |         max_num=1000,
59 |         nms_thr=0.7,
60 |         min_bbox_size=0))
61 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | mmdet/version.py
107 | data/
108 | .vscode
109 | .idea
110 | .DS_Store
111 | 
112 | # custom
113 | *.pkl
114 | *.pkl.json
115 | *.log.json
116 | work_dirs/
117 | 
118 | # Pytorch
119 | *.pth
120 | *.py~
121 | *.sh~
122 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/demodata.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def ensure_rng(rng=None):
 6 |     """
 7 |     Simple version of the ``kwarray.ensure_rng``
 8 | 
 9 |     Args:
10 |         rng (int | numpy.random.RandomState | None):
11 |             if None, then defaults to the global rng. Otherwise this can be an
12 |             integer or a RandomState class
13 |     Returns:
14 |         (numpy.random.RandomState) : rng -
15 |             a numpy random number generator
16 | 
17 |     References:
18 |         https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270
19 |     """
20 | 
21 |     if rng is None:
22 |         rng = np.random.mtrand._rand
23 |     elif isinstance(rng, int):
24 |         rng = np.random.RandomState(rng)
25 |     else:
26 |         rng = rng
27 |     return rng
28 | 
29 | 
30 | def random_boxes(num=1, scale=1, rng=None):
31 |     """
32 |     Simple version of ``kwimage.Boxes.random``
33 | 
34 |     Returns:
35 |         Tensor: shape (n, 4) in x1, y1, x2, y2 format.
36 | 
37 |     References:
38 |         https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390
39 | 
40 |     Example:
41 |         >>> num = 3
42 |         >>> scale = 512
43 |         >>> rng = 0
44 |         >>> boxes = random_boxes(num, scale, rng)
45 |         >>> print(boxes)
46 |         tensor([[280.9925, 278.9802, 308.6148, 366.1769],
47 |                 [216.9113, 330.6978, 224.0446, 456.5878],
48 |                 [405.3632, 196.3221, 493.3953, 270.7942]])
49 |     """
50 |     rng = ensure_rng(rng)
51 | 
52 |     tlbr = rng.rand(num, 4).astype(np.float32)
53 | 
54 |     tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2])
55 |     tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3])
56 |     br_x = np.maximum(tlbr[:, 0], tlbr[:, 2])
57 |     br_y = np.maximum(tlbr[:, 1], tlbr[:, 3])
58 | 
59 |     tlbr[:, 0] = tl_x * scale
60 |     tlbr[:, 1] = tl_y * scale
61 |     tlbr[:, 2] = br_x * scale
62 |     tlbr[:, 3] = br_y * scale
63 | 
64 |     boxes = torch.from_numpy(tlbr)
65 |     return boxes
66 | 


--------------------------------------------------------------------------------
/mmdet/ops/carafe/src/carafe_naive_ext.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/ATen.h>
 2 | #include <torch/torch.h>
 3 | 
 4 | #include <cmath>
 5 | #include <vector>
 6 | 
 7 | #ifdef WITH_CUDA
 8 | int carafe_naive_forward_cuda(at::Tensor features, at::Tensor masks,
 9 |                               int kernel_size, int group_size, int scale_factor,
10 |                               at::Tensor output);
11 | 
12 | int carafe_naive_backward_cuda(at::Tensor top_grad, at::Tensor features,
13 |                                at::Tensor masks, int kernel_size,
14 |                                int group_size, int scale_factor,
15 |                                at::Tensor bottom_grad, at::Tensor mask_grad);
16 | #endif
17 | 
18 | int carafe_naive_forward(at::Tensor features, at::Tensor masks,
19 |                          int kernel_size, int group_size, int scale_factor,
20 |                          at::Tensor output) {
21 |   if (features.device().is_cuda()) {
22 | #ifdef WITH_CUDA
23 |     return carafe_naive_forward_cuda(features, masks, kernel_size,
24 |         group_size, scale_factor, output);
25 | #else
26 |     AT_ERROR("carafe naive is not compiled with GPU support");
27 | #endif
28 |   }
29 |   AT_ERROR("carafe naive is not implemented on CPU");
30 | }
31 | 
32 | int carafe_naive_backward(at::Tensor top_grad, at::Tensor features,
33 |                                at::Tensor masks, int kernel_size,
34 |                                int group_size, int scale_factor,
35 |                                at::Tensor bottom_grad, at::Tensor mask_grad) {
36 |   if (top_grad.device().is_cuda()) {
37 | #ifdef WITH_CUDA
38 |     return carafe_naive_backward_cuda(top_grad, features, masks, kernel_size,
39 |         group_size, scale_factor, bottom_grad, mask_grad);
40 | #else
41 |     AT_ERROR("carafe naive is not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("carafe naive is not implemented on CPU");
45 | 
46 | }
47 | 
48 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
49 |   m.def("forward", &carafe_naive_forward, "carafe_naive forward");
50 |   m.def("backward", &carafe_naive_backward, "carafe_naive backward");
51 | }
52 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/deepfashion.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'DeepFashionDataset'
 3 | data_root = 'data/DeepFashion/In-shop/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(type='Resize', img_scale=(750, 1101), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(750, 1101),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     imgs_per_gpu=2,
33 |     workers_per_gpu=1,
34 |     train=dict(
35 |         type=dataset_type,
36 |         ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
37 |         img_prefix=data_root + 'Img/',
38 |         pipeline=train_pipeline,
39 |         data_root=data_root),
40 |     val=dict(
41 |         type=dataset_type,
42 |         ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',
43 |         img_prefix=data_root + 'Img/',
44 |         pipeline=test_pipeline,
45 |         data_root=data_root),
46 |     test=dict(
47 |         type=dataset_type,
48 |         ann_file=data_root +
49 |         'annotations/DeepFashion_segmentation_gallery.json',
50 |         img_prefix=data_root + 'Img/',
51 |         pipeline=test_pipeline,
52 |         data_root=data_root))
53 | evaluation = dict(interval=5, metric=['bbox', 'segm'])
54 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/voc0712.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'VOCDataset'
 3 | data_root = 'data/VOCdevkit/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True),
 9 |     dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(1000, 600),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=2,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type='RepeatDataset',
36 |         times=3,
37 |         dataset=dict(
38 |             type=dataset_type,
39 |             ann_file=[
40 |                 data_root + 'VOC2007/ImageSets/Main/trainval.txt',
41 |                 data_root + 'VOC2012/ImageSets/Main/trainval.txt'
42 |             ],
43 |             img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
44 |             pipeline=train_pipeline)),
45 |     val=dict(
46 |         type=dataset_type,
47 |         ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
48 |         img_prefix=data_root + 'VOC2007/',
49 |         pipeline=test_pipeline),
50 |     test=dict(
51 |         type=dataset_type,
52 |         ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
53 |         img_prefix=data_root + 'VOC2007/',
54 |         pipeline=test_pipeline))
55 | evaluation = dict(interval=1, metric='mAP')
56 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/cityscapes_detection.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CityscapesDataset'
 2 | data_root = 'data/cityscapes/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True),
 8 |     dict(
 9 |         type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),
10 |     dict(type='RandomFlip', flip_ratio=0.5),
11 |     dict(type='Normalize', **img_norm_cfg),
12 |     dict(type='Pad', size_divisor=32),
13 |     dict(type='DefaultFormatBundle'),
14 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
15 | ]
16 | test_pipeline = [
17 |     dict(type='LoadImageFromFile'),
18 |     dict(
19 |         type='MultiScaleFlipAug',
20 |         img_scale=(2048, 1024),
21 |         flip=False,
22 |         transforms=[
23 |             dict(type='Resize', keep_ratio=True),
24 |             dict(type='RandomFlip'),
25 |             dict(type='Normalize', **img_norm_cfg),
26 |             dict(type='Pad', size_divisor=32),
27 |             dict(type='ImageToTensor', keys=['img']),
28 |             dict(type='Collect', keys=['img']),
29 |         ])
30 | ]
31 | data = dict(
32 |     samples_per_gpu=1,
33 |     workers_per_gpu=2,
34 |     train=dict(
35 |         type='RepeatDataset',
36 |         times=8,
37 |         dataset=dict(
38 |             type=dataset_type,
39 |             ann_file=data_root +
40 |             'annotations/instancesonly_filtered_gtFine_train.json',
41 |             img_prefix=data_root + 'leftImg8bit/train/',
42 |             pipeline=train_pipeline)),
43 |     val=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root +
46 |         'annotations/instancesonly_filtered_gtFine_val.json',
47 |         img_prefix=data_root + 'leftImg8bit/val/',
48 |         pipeline=test_pipeline),
49 |     test=dict(
50 |         type=dataset_type,
51 |         ann_file=data_root +
52 |         'annotations/instancesonly_filtered_gtFine_test.json',
53 |         img_prefix=data_root + 'leftImg8bit/test/',
54 |         pipeline=test_pipeline))
55 | evaluation = dict(interval=1, metric='bbox')
56 | 


--------------------------------------------------------------------------------
/configs/_base_/models/fast_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='FastRCNN',
 4 |     pretrained='torchvision://resnet50',
 5 |     backbone=dict(
 6 |         type='ResNet',
 7 |         depth=50,
 8 |         num_stages=4,
 9 |         out_indices=(0, 1, 2, 3),
10 |         frozen_stages=1,
11 |         norm_cfg=dict(type='BN', requires_grad=True),
12 |         norm_eval=True,
13 |         style='pytorch'),
14 |     neck=dict(
15 |         type='FPN',
16 |         in_channels=[256, 512, 1024, 2048],
17 |         out_channels=256,
18 |         num_outs=5),
19 |     roi_head=dict(
20 |         type='StandardRoIHead',
21 |         bbox_roi_extractor=dict(
22 |             type='SingleRoIExtractor',
23 |             roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0),
24 |             out_channels=256,
25 |             featmap_strides=[4, 8, 16, 32]),
26 |         bbox_head=dict(
27 |             type='Shared2FCBBoxHead',
28 |             in_channels=256,
29 |             fc_out_channels=1024,
30 |             roi_feat_size=7,
31 |             num_classes=80,
32 |             bbox_coder=dict(
33 |                 type='DeltaXYWHBBoxCoder',
34 |                 target_means=[0., 0., 0., 0.],
35 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
36 |             reg_class_agnostic=False,
37 |             loss_cls=dict(
38 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
39 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))))
40 | # model training and testing settings
41 | train_cfg = dict(
42 |     rcnn=dict(
43 |         assigner=dict(
44 |             type='MaxIoUAssigner',
45 |             pos_iou_thr=0.5,
46 |             neg_iou_thr=0.5,
47 |             min_pos_iou=0.5,
48 |             match_low_quality=False,
49 |             ignore_iof_thr=-1),
50 |         sampler=dict(
51 |             type='RandomSampler',
52 |             num=512,
53 |             pos_fraction=0.25,
54 |             neg_pos_ub=-1,
55 |             add_gt_as_proposals=True),
56 |         pos_weight=-1,
57 |         debug=False))
58 | test_cfg = dict(
59 |     rcnn=dict(
60 |         score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100))
61 | 


--------------------------------------------------------------------------------
/tools/browse_dataset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | from pathlib import Path
 4 | 
 5 | import mmcv
 6 | from mmcv import Config
 7 | 
 8 | from mmdet.datasets.builder import build_dataset
 9 | 
10 | 
11 | def parse_args():
12 |     parser = argparse.ArgumentParser(description='Browse a dataset')
13 |     parser.add_argument('config', help='train config file path')
14 |     parser.add_argument(
15 |         '--skip-type',
16 |         type=str,
17 |         nargs='+',
18 |         default=['DefaultFormatBundle', 'Normalize', 'Collect'],
19 |         help='skip some useless pipeline')
20 |     parser.add_argument(
21 |         '--output-dir',
22 |         default=None,
23 |         type=str,
24 |         help='If there is no display interface, you can save it')
25 |     parser.add_argument('--not-show', default=False, action='store_true')
26 |     parser.add_argument(
27 |         '--show-interval',
28 |         type=int,
29 |         default=999,
30 |         help='the interval of show (ms)')
31 |     args = parser.parse_args()
32 |     return args
33 | 
34 | 
35 | def retrieve_data_cfg(config_path, skip_type):
36 |     cfg = Config.fromfile(config_path)
37 |     train_data_cfg = cfg.data.train
38 |     train_data_cfg['pipeline'] = [
39 |         x for x in train_data_cfg.pipeline if x['type'] not in skip_type
40 |     ]
41 | 
42 |     return cfg
43 | 
44 | 
45 | def main():
46 |     args = parse_args()
47 |     cfg = retrieve_data_cfg(args.config, args.skip_type)
48 | 
49 |     dataset = build_dataset(cfg.data.train)
50 | 
51 |     progress_bar = mmcv.ProgressBar(len(dataset))
52 |     for item in dataset:
53 |         filename = os.path.join(args.output_dir,
54 |                                 Path(item['filename']).name
55 |                                 ) if args.output_dir is not None else None
56 |         mmcv.imshow_det_bboxes(
57 |             item['img'],
58 |             item['gt_bboxes'],
59 |             item['gt_labels'] - 1,
60 |             class_names=dataset.CLASSES,
61 |             show=not args.not_show,
62 |             out_file=filename,
63 |             wait_time=args.show_interval)
64 |         progress_bar.update()
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     main()
69 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/accuracy.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | def accuracy(pred, target, topk=1):
 5 |     """Calculate accuracy according to the prediction and target
 6 | 
 7 |     Args:
 8 |         pred (torch.Tensor): The model prediction.
 9 |         target (torch.Tensor): The target of each prediction
10 |         topk (int | tuple[int], optional): If the predictions in ``topk``
11 |             matches the target, the predictions will be regarded as
12 |             correct ones. Defaults to 1.
13 | 
14 |     Returns:
15 |         float | tuple[float]: If the input ``topk`` is a single integer,
16 |             the function will return a single float as accuracy. If
17 |             ``topk`` is a tuple containing multiple integers, the
18 |             function will return a tuple containing accuracies of
19 |             each ``topk`` number.
20 |     """
21 |     assert isinstance(topk, (int, tuple))
22 |     if isinstance(topk, int):
23 |         topk = (topk, )
24 |         return_single = True
25 |     else:
26 |         return_single = False
27 | 
28 |     maxk = max(topk)
29 |     _, pred_label = pred.topk(maxk, dim=1)
30 |     pred_label = pred_label.t()
31 |     correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
32 | 
33 |     res = []
34 |     for k in topk:
35 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
36 |         res.append(correct_k.mul_(100.0 / pred.size(0)))
37 |     return res[0] if return_single else res
38 | 
39 | 
40 | class Accuracy(nn.Module):
41 | 
42 |     def __init__(self, topk=(1, )):
43 |         """Module to calculate the accuracy
44 | 
45 |         Args:
46 |             topk (tuple, optional): The criterion used to calculate the
47 |                 accuracy. Defaults to (1,).
48 |         """
49 |         super().__init__()
50 |         self.topk = topk
51 | 
52 |     def forward(self, pred, target):
53 |         """Forward function to calculate accuracy
54 | 
55 |         Args:
56 |             pred (torch.Tensor): Prediction of models.
57 |             target (torch.Tensor): Target for each prediction.
58 | 
59 |         Returns:
60 |             tuple[float]: The accuracies under different topk criterions.
61 |         """
62 |         return accuracy(pred, target, self.topk)
63 | 


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/src/masked_conv2d_ext.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | #ifdef WITH_CUDA
 7 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx,
 8 |                                const at::Tensor mask_w_idx, const int kernel_h,
 9 |                                const int kernel_w, const int pad_h,
10 |                                const int pad_w, at::Tensor col);
11 | 
12 | int masked_col2im_forward_cuda(const at::Tensor col,
13 |                                const at::Tensor mask_h_idx,
14 |                                const at::Tensor mask_w_idx, int height,
15 |                                int width, int channels, at::Tensor im);
16 | #endif
17 | 
18 | int masked_im2col_forward(const at::Tensor im, const at::Tensor mask_h_idx,
19 |                                const at::Tensor mask_w_idx, const int kernel_h,
20 |                                const int kernel_w, const int pad_h,
21 |                                const int pad_w, at::Tensor col) {
22 |   if (im.device().is_cuda()) {
23 | #ifdef WITH_CUDA
24 |     return masked_im2col_forward_cuda(im, mask_h_idx, mask_w_idx, kernel_h,
25 |       kernel_w, pad_h, pad_w, col);
26 | #else
27 |     AT_ERROR("masked_im2col is not compiled with GPU support");
28 | #endif
29 |   }
30 |   AT_ERROR("masked_im2col is not implemented on CPU");
31 | }
32 | 
33 | int masked_col2im_forward(const at::Tensor col,
34 |                                const at::Tensor mask_h_idx,
35 |                                const at::Tensor mask_w_idx, int height,
36 |                                int width, int channels, at::Tensor im) {
37 |   if (col.device().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return masked_col2im_forward_cuda(col, mask_h_idx, mask_w_idx, height,
40 |       width, channels, im);
41 | #else
42 |     AT_ERROR("masked_col2im is not compiled with GPU support");
43 | #endif
44 |   }
45 |   AT_ERROR("masked_col2im is not implemented on CPU");
46 | }
47 | 
48 | 
49 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
50 |   m.def("masked_im2col_forward", &masked_im2col_forward,
51 |         "masked_im2col forward");
52 |   m.def("masked_col2im_forward", &masked_col2im_forward,
53 |         "masked_col2im forward");
54 | }
55 | 


--------------------------------------------------------------------------------
/mmdet/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import subprocess
 3 | import sys
 4 | from collections import defaultdict
 5 | 
 6 | import cv2
 7 | import mmcv
 8 | import torch
 9 | import torchvision
10 | 
11 | import mmdet
12 | 
13 | 
14 | def collect_env():
15 |     """Collect the information of the running environments."""
16 |     env_info = {}
17 |     env_info['sys.platform'] = sys.platform
18 |     env_info['Python'] = sys.version.replace('\n', '')
19 | 
20 |     cuda_available = torch.cuda.is_available()
21 |     env_info['CUDA available'] = cuda_available
22 | 
23 |     if cuda_available:
24 |         from torch.utils.cpp_extension import CUDA_HOME
25 |         env_info['CUDA_HOME'] = CUDA_HOME
26 | 
27 |         if CUDA_HOME is not None and osp.isdir(CUDA_HOME):
28 |             try:
29 |                 nvcc = osp.join(CUDA_HOME, 'bin/nvcc')
30 |                 nvcc = subprocess.check_output(
31 |                     f'"{nvcc}" -V | tail -n1', shell=True)
32 |                 nvcc = nvcc.decode('utf-8').strip()
33 |             except subprocess.SubprocessError:
34 |                 nvcc = 'Not Available'
35 |             env_info['NVCC'] = nvcc
36 | 
37 |         devices = defaultdict(list)
38 |         for k in range(torch.cuda.device_count()):
39 |             devices[torch.cuda.get_device_name(k)].append(str(k))
40 |         for name, devids in devices.items():
41 |             env_info['GPU ' + ','.join(devids)] = name
42 | 
43 |     gcc = subprocess.check_output('gcc --version | head -n1', shell=True)
44 |     gcc = gcc.decode('utf-8').strip()
45 |     env_info['GCC'] = gcc
46 | 
47 |     env_info['PyTorch'] = torch.__version__
48 |     env_info['PyTorch compiling details'] = torch.__config__.show()
49 | 
50 |     env_info['TorchVision'] = torchvision.__version__
51 | 
52 |     env_info['OpenCV'] = cv2.__version__
53 | 
54 |     env_info['MMCV'] = mmcv.__version__
55 |     env_info['MMDetection'] = mmdet.__version__
56 |     from mmdet.ops import get_compiler_version, get_compiling_cuda_version
57 |     env_info['MMDetection Compiler'] = get_compiler_version()
58 |     env_info['MMDetection CUDA Compiler'] = get_compiling_cuda_version()
59 |     return env_info
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     for name, val in collect_env().items():
64 |         print(f'{name}: {val}')
65 | 


--------------------------------------------------------------------------------
/configs/_base_/datasets/wider_face.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | dataset_type = 'WIDERFaceDataset'
 3 | data_root = 'data/WIDERFace/'
 4 | img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile', to_float32=True),
 7 |     dict(type='LoadAnnotations', with_bbox=True),
 8 |     dict(
 9 |         type='PhotoMetricDistortion',
10 |         brightness_delta=32,
11 |         contrast_range=(0.5, 1.5),
12 |         saturation_range=(0.5, 1.5),
13 |         hue_delta=18),
14 |     dict(
15 |         type='Expand',
16 |         mean=img_norm_cfg['mean'],
17 |         to_rgb=img_norm_cfg['to_rgb'],
18 |         ratio_range=(1, 4)),
19 |     dict(
20 |         type='MinIoURandomCrop',
21 |         min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),
22 |         min_crop_size=0.3),
23 |     dict(type='Resize', img_scale=(300, 300), keep_ratio=False),
24 |     dict(type='Normalize', **img_norm_cfg),
25 |     dict(type='RandomFlip', flip_ratio=0.5),
26 |     dict(type='DefaultFormatBundle'),
27 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
28 | ]
29 | test_pipeline = [
30 |     dict(type='LoadImageFromFile'),
31 |     dict(
32 |         type='MultiScaleFlipAug',
33 |         img_scale=(300, 300),
34 |         flip=False,
35 |         transforms=[
36 |             dict(type='Resize', keep_ratio=False),
37 |             dict(type='Normalize', **img_norm_cfg),
38 |             dict(type='ImageToTensor', keys=['img']),
39 |             dict(type='Collect', keys=['img']),
40 |         ])
41 | ]
42 | data = dict(
43 |     samples_per_gpu=60,
44 |     workers_per_gpu=2,
45 |     train=dict(
46 |         type='RepeatDataset',
47 |         times=2,
48 |         dataset=dict(
49 |             type=dataset_type,
50 |             ann_file=data_root + 'train.txt',
51 |             img_prefix=data_root + 'WIDER_train/',
52 |             min_size=17,
53 |             pipeline=train_pipeline)),
54 |     val=dict(
55 |         type=dataset_type,
56 |         ann_file=data_root + 'val.txt',
57 |         img_prefix=data_root + 'WIDER_val/',
58 |         pipeline=test_pipeline),
59 |     test=dict(
60 |         type=dataset_type,
61 |         ann_file=data_root + 'val.txt',
62 |         img_prefix=data_root + 'WIDER_val/',
63 |         pipeline=test_pipeline))
64 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from collections import OrderedDict
 3 | 
 4 | import torch.distributed as dist
 5 | from mmcv.runner import OptimizerHook
 6 | from torch._utils import (_flatten_dense_tensors, _take_tensors,
 7 |                           _unflatten_dense_tensors)
 8 | 
 9 | 
10 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
11 |     if bucket_size_mb > 0:
12 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
13 |         buckets = _take_tensors(tensors, bucket_size_bytes)
14 |     else:
15 |         buckets = OrderedDict()
16 |         for tensor in tensors:
17 |             tp = tensor.type()
18 |             if tp not in buckets:
19 |                 buckets[tp] = []
20 |             buckets[tp].append(tensor)
21 |         buckets = buckets.values()
22 | 
23 |     for bucket in buckets:
24 |         flat_tensors = _flatten_dense_tensors(bucket)
25 |         dist.all_reduce(flat_tensors)
26 |         flat_tensors.div_(world_size)
27 |         for tensor, synced in zip(
28 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
29 |             tensor.copy_(synced)
30 | 
31 | 
32 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
33 |     """Allreduce gradients
34 | 
35 |     Args:
36 |         params (list[torch.Parameters]): List of parameters of a model
37 |         coalesce (bool, optional): Whether allreduce parameters as a whole.
38 |             Defaults to True.
39 |         bucket_size_mb (int, optional): Size of bucket, the unit is MB.
40 |             Defaults to -1.
41 |     """
42 |     grads = [
43 |         param.grad.data for param in params
44 |         if param.requires_grad and param.grad is not None
45 |     ]
46 |     world_size = dist.get_world_size()
47 |     if coalesce:
48 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
49 |     else:
50 |         for tensor in grads:
51 |             dist.all_reduce(tensor.div_(world_size))
52 | 
53 | 
54 | class DistOptimizerHook(OptimizerHook):
55 |     """Deprecated optimizer hook for distributed training"""
56 | 
57 |     def __init__(self, *args, **kwargs):
58 |         warnings.warn('"DistOptimizerHook" is deprecated, please switch to'
59 |                       '"mmcv.runner.OptimizerHook".')
60 |         super().__init__(*args, **kwargs)
61 | 


--------------------------------------------------------------------------------
/mmdet/models/dense_heads/base_dense_head.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | class BaseDenseHead(nn.Module, metaclass=ABCMeta):
 7 |     """Base class for DenseHeads"""
 8 | 
 9 |     def __init__(self):
10 |         super(BaseDenseHead, self).__init__()
11 | 
12 |     @abstractmethod
13 |     def loss(self, **kwargs):
14 |         """Compute losses of the head."""
15 |         pass
16 | 
17 |     @abstractmethod
18 |     def get_bboxes(self, **kwargs):
19 |         """Transform network output for a batch into bbox predictions."""
20 |         pass
21 | 
22 |     def forward_train(self,
23 |                       x,
24 |                       img_metas,
25 |                       gt_bboxes,
26 |                       gt_labels=None,
27 |                       gt_bboxes_ignore=None,
28 |                       proposal_cfg=None,
29 |                       **kwargs):
30 |         """
31 |         Args:
32 |             x (list[Tensor]): Features from FPN.
33 |             img_metas (list[dict]): Meta information of each image, e.g.,
34 |                 image size, scaling factor, etc.
35 |             gt_bboxes (Tensor): Ground truth bboxes of the image,
36 |                 shape (num_gts, 4).
37 |             gt_labels (Tensor): Ground truth labels of each box,
38 |                 shape (num_gts,).
39 |             gt_bboxes_ignore (Tensor): Ground truth bboxes to be
40 |                 ignored, shape (num_ignored_gts, 4).
41 |             proposal_cfg (mmcv.Config): Test / postprocessing configuration,
42 |                 if None, test_cfg would be used
43 | 
44 |         Returns:
45 |             tuple:
46 |                 losses: (dict[str, Tensor]): A dictionary of loss components.
47 |                 proposal_list (list[Tensor]): Proposals of each image.
48 |         """
49 |         outs = self(x)
50 |         if gt_labels is None:
51 |             loss_inputs = outs + (gt_bboxes, img_metas)
52 |         else:
53 |             loss_inputs = outs + (gt_bboxes, gt_labels, img_metas)
54 |         losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
55 |         if proposal_cfg is None:
56 |             return losses
57 |         else:
58 |             proposal_list = self.get_bboxes(*outs, img_metas, cfg=proposal_cfg)
59 |             return losses, proposal_list
60 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/bbox_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmdet.ops.nms import batched_nms
 4 | 
 5 | 
 6 | def multiclass_nms(multi_bboxes,
 7 |                    multi_scores,
 8 |                    score_thr,
 9 |                    nms_cfg,
10 |                    max_num=-1,
11 |                    score_factors=None):
12 |     """NMS for multi-class bboxes.
13 | 
14 |     Args:
15 |         multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
16 |         multi_scores (Tensor): shape (n, #class), where the last column
17 |             contains scores of the background class, but this will be ignored.
18 |         score_thr (float): bbox threshold, bboxes with scores lower than it
19 |             will not be considered.
20 |         nms_thr (float): NMS IoU threshold
21 |         max_num (int): if there are more than max_num bboxes after NMS,
22 |             only top max_num will be kept.
23 |         score_factors (Tensor): The factors multiplied to scores before
24 |             applying NMS
25 | 
26 |     Returns:
27 |         tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
28 |             are 0-based.
29 |     """
30 |     num_classes = multi_scores.size(1) - 1
31 |     # exclude background category
32 |     if multi_bboxes.shape[1] > 4:
33 |         bboxes = multi_bboxes.view(multi_scores.size(0), -1, 4)
34 |     else:
35 |         bboxes = multi_bboxes[:, None].expand(-1, num_classes, 4)
36 |     scores = multi_scores[:, :-1]
37 | 
38 |     # filter out boxes with low scores
39 |     valid_mask = scores > score_thr
40 |     #valid_mask = scores > 0.5
41 |     #print(valid_mask.shape)
42 |     bboxes = bboxes[valid_mask]
43 |     if score_factors is not None:
44 |         scores = scores * score_factors[:, None]
45 |     scores = scores[valid_mask]
46 |     rois_inds = valid_mask.nonzero()[:, 0]
47 |     labels = valid_mask.nonzero()[:, 1]
48 | 
49 |     if bboxes.numel() == 0:
50 |         bboxes = multi_bboxes.new_zeros((0, 5))
51 |         labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
52 |         rois_inds = multi_bboxes.new_zeros((0, ), dtype=torch.long)
53 |         return bboxes, labels, rois_inds
54 | 
55 |     dets, keep = batched_nms(bboxes, scores, labels, nms_cfg)
56 | 
57 |     if max_num > 0:
58 |         dets = dets[:max_num]
59 |         keep = keep[:max_num]
60 | 
61 |     return dets, labels[keep], rois_inds[keep]
62 | 


--------------------------------------------------------------------------------
/mmdet/models/dense_heads/rpn_test_mixin.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | from mmdet.core import merge_aug_proposals
 4 | 
 5 | if sys.version_info >= (3, 7):
 6 |     from mmdet.utils.contextmanagers import completed
 7 | 
 8 | 
 9 | class RPNTestMixin(object):
10 |     """Test methods of RPN."""
11 | 
12 |     if sys.version_info >= (3, 7):
13 | 
14 |         async def async_simple_test_rpn(self, x, img_metas):
15 |             sleep_interval = self.rpn_head.test_cfg.pop(
16 |                 'async_sleep_interval', 0.025)
17 |             async with completed(
18 |                     __name__, 'rpn_head_forward',
19 |                     sleep_interval=sleep_interval):
20 |                 rpn_outs = self(x)
21 | 
22 |             proposal_list = self.get_bboxes(*rpn_outs, img_metas)
23 |             return proposal_list
24 | 
25 |     def simple_test_rpn(self, x, img_metas):
26 |         """Test without augmentation.
27 | 
28 |         Args:
29 |             x (tuple[Tensor]): Features from the upstream network, each is
30 |                 a 4D-tensor.
31 |             img_metas (list[dict]): Meta info of each image.
32 | 
33 |         Returns:
34 |             list[Tensor]: Proposals of each image.
35 |         """
36 |         rpn_outs = self(x)
37 |         proposal_list = self.get_bboxes(*rpn_outs, img_metas)
38 |         return proposal_list
39 | 
40 |     def aug_test_rpn(self, feats, img_metas):
41 |         samples_per_gpu = len(img_metas[0])
42 |         aug_proposals = [[] for _ in range(samples_per_gpu)]
43 |         for x, img_meta in zip(feats, img_metas):
44 |             proposal_list = self.simple_test_rpn(x, img_meta)
45 |             for i, proposals in enumerate(proposal_list):
46 |                 aug_proposals[i].append(proposals)
47 |         # reorganize the order of 'img_metas' to match the dimensions
48 |         # of 'aug_proposals'
49 |         aug_img_metas = []
50 |         for i in range(samples_per_gpu):
51 |             aug_img_meta = []
52 |             for j in range(len(img_metas)):
53 |                 aug_img_meta.append(img_metas[j][i])
54 |             aug_img_metas.append(aug_img_meta)
55 |         # after merging, proposals will be rescaled to the original image size
56 |         merged_proposals = [
57 |             merge_aug_proposals(proposals, aug_img_meta, self.test_cfg)
58 |             for proposals, aug_img_meta in zip(aug_proposals, aug_img_metas)
59 |         ]
60 |         return merged_proposals
61 | 


--------------------------------------------------------------------------------
/mmdet/ops/carafe/grad_check.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import mmcv
 5 | import torch
 6 | from torch.autograd import gradcheck
 7 | 
 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 9 | from mmdet.ops.carafe import CARAFE, CARAFENaive  # noqa: E402, isort:skip
10 | from mmdet.ops.carafe import carafe, carafe_naive  # noqa: E402, isort:skip
11 | 
12 | feat = torch.randn(2, 64, 3, 3, requires_grad=True, device='cuda:0').double()
13 | mask = torch.randn(
14 |     2, 100, 6, 6, requires_grad=True, device='cuda:0').sigmoid().double()
15 | 
16 | print('Gradcheck for carafe...')
17 | test = gradcheck(CARAFE(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4)
18 | print(test)
19 | 
20 | print('Gradcheck for carafe naive...')
21 | test = gradcheck(CARAFENaive(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4)
22 | print(test)
23 | 
24 | feat = torch.randn(
25 |     2, 1024, 100, 100, requires_grad=True, device='cuda:0').float()
26 | mask = torch.randn(
27 |     2, 25, 200, 200, requires_grad=True, device='cuda:0').sigmoid().float()
28 | loop_num = 500
29 | 
30 | time_forward = 0
31 | time_backward = 0
32 | bar = mmcv.ProgressBar(loop_num)
33 | timer = mmcv.Timer()
34 | for i in range(loop_num):
35 |     x = carafe(feat.clone(), mask.clone(), 5, 1, 2)
36 |     torch.cuda.synchronize()
37 |     time_forward += timer.since_last_check()
38 |     x.sum().backward(retain_graph=True)
39 |     torch.cuda.synchronize()
40 |     time_backward += timer.since_last_check()
41 |     bar.update()
42 | forward_speed = (time_forward + 1e-3) * 1e3 / loop_num
43 | backward_speed = (time_backward + 1e-3) * 1e3 / loop_num
44 | print(f'\nCARAFE time forward: {forward_speed} '
45 |       f'ms/iter | time backward: {backward_speed} ms/iter')
46 | 
47 | time_naive_forward = 0
48 | time_naive_backward = 0
49 | bar = mmcv.ProgressBar(loop_num)
50 | timer = mmcv.Timer()
51 | for i in range(loop_num):
52 |     x = carafe_naive(feat.clone(), mask.clone(), 5, 1, 2)
53 |     torch.cuda.synchronize()
54 |     time_naive_forward += timer.since_last_check()
55 |     x.sum().backward(retain_graph=True)
56 |     torch.cuda.synchronize()
57 |     time_naive_backward += timer.since_last_check()
58 |     bar.update()
59 | forward_speed = (time_naive_forward + 1e-3) * 1e3 / loop_num
60 | backward_speed = (time_naive_backward + 1e-3) * 1e3 / loop_num
61 | print('\nCARAFE naive time forward: '
62 |       f'{forward_speed} ms/iter | time backward: {backward_speed} ms/iter')
63 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import mmcv
 4 | import numpy as np
 5 | import torch
 6 | from six.moves import map, zip
 7 | 
 8 | 
 9 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
10 |     """Convert tensor to images
11 | 
12 |     Args:
13 |         tensor (torch.Tensor): Tensor that contains multiple images
14 |         mean (tuple[float], optional): Mean of images. Defaults to (0, 0, 0).
15 |         std (tuple[float], optional): Standard deviation of images.
16 |             Defaults to (1, 1, 1).
17 |         to_rgb (bool, optional): Whether convert the images to RGB format.
18 |             Defaults to True.
19 | 
20 |     Returns:
21 |         list[np.ndarray]: A list that contains multiple images.
22 |     """
23 |     num_imgs = tensor.size(0)
24 |     mean = np.array(mean, dtype=np.float32)
25 |     std = np.array(std, dtype=np.float32)
26 |     imgs = []
27 |     for img_id in range(num_imgs):
28 |         img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
29 |         img = mmcv.imdenormalize(
30 |             img, mean, std, to_bgr=to_rgb).astype(np.uint8)
31 |         imgs.append(np.ascontiguousarray(img))
32 |     return imgs
33 | 
34 | 
35 | def multi_apply(func, *args, **kwargs):
36 |     """Apply function to a list of arguments
37 | 
38 |     Note:
39 |         This function applies the ``func`` to multiple inputs and
40 |             map the multiple outputs of the ``func`` into different
41 |             list. Each list contains the same type of outputs corresponding
42 |             to different inputs.
43 | 
44 |     Args:
45 |         func (Function): A function that will be applied to a list of
46 |             arguments
47 | 
48 |     Returns:
49 |         tuple(list): A tuple containing multiple list, each list contains
50 |             a kind of returned results by the function
51 |     """
52 |     pfunc = partial(func, **kwargs) if kwargs else func
53 |     map_results = map(pfunc, *args)
54 |     return tuple(map(list, zip(*map_results)))
55 | 
56 | 
57 | def unmap(data, count, inds, fill=0):
58 |     """ Unmap a subset of item (data) back to the original set of items (of
59 |     size count) """
60 |     if data.dim() == 1:
61 |         ret = data.new_full((count, ), fill)
62 |         ret[inds.type(torch.bool)] = data
63 |     else:
64 |         new_size = (count, ) + data.size()[1:]
65 |         ret = data.new_full(new_size, fill)
66 |         ret[inds.type(torch.bool), :] = data
67 |     return ret
68 | 


--------------------------------------------------------------------------------
/tools/fuse_conv_bn.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from mmcv.runner import save_checkpoint
 6 | 
 7 | from mmdet.apis import init_detector
 8 | 
 9 | 
10 | def fuse_conv_bn(conv, bn):
11 |     """ During inference, the functionary of batch norm layers is turned off
12 |     but only the mean and var alone channels are used, which exposes the
13 |     chance to fuse it with the preceding conv layers to save computations and
14 |     simplify network structures.
15 |     """
16 |     conv_w = conv.weight
17 |     conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
18 |         bn.running_mean)
19 | 
20 |     factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
21 |     conv.weight = nn.Parameter(conv_w *
22 |                                factor.reshape([conv.out_channels, 1, 1, 1]))
23 |     conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
24 |     return conv
25 | 
26 | 
27 | def fuse_module(m):
28 |     last_conv = None
29 |     last_conv_name = None
30 | 
31 |     for name, child in m.named_children():
32 |         if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)):
33 |             if last_conv is None:  # only fuse BN that is after Conv
34 |                 continue
35 |             fused_conv = fuse_conv_bn(last_conv, child)
36 |             m._modules[last_conv_name] = fused_conv
37 |             # To reduce changes, set BN as Identity instead of deleting it.
38 |             m._modules[name] = nn.Identity()
39 |             last_conv = None
40 |         elif isinstance(child, nn.Conv2d):
41 |             last_conv = child
42 |             last_conv_name = name
43 |         else:
44 |             fuse_module(child)
45 |     return m
46 | 
47 | 
48 | def parse_args():
49 |     parser = argparse.ArgumentParser(
50 |         description='fuse Conv and BN layers in a model')
51 |     parser.add_argument('config', help='config file path')
52 |     parser.add_argument('checkpoint', help='checkpoint file path')
53 |     parser.add_argument('out', help='output path of the converted model')
54 |     args = parser.parse_args()
55 |     return args
56 | 
57 | 
58 | def main():
59 |     args = parse_args()
60 |     # build the model from a config file and a checkpoint file
61 |     model = init_detector(args.config, args.checkpoint)
62 |     # fuse conv and bn layers of the model
63 |     fused_model = fuse_module(model)
64 |     save_checkpoint(fused_model, args.out)
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     main()
69 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from ..builder import BBOX_SAMPLERS
 5 | from .random_sampler import RandomSampler
 6 | 
 7 | 
 8 | @BBOX_SAMPLERS.register_module()
 9 | class InstanceBalancedPosSampler(RandomSampler):
10 |     """Instance balanced sampler that samples equal number of positive samples
11 |     for each instance."""
12 | 
13 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
14 |         """Sample positive boxes
15 | 
16 |         Args:
17 |             assign_result (:obj:`AssignResult`): The assigned results of boxes.
18 |             num_expected (int): The number of expected positive samples
19 | 
20 |         Returns:
21 |             Tensor or ndarray: sampled indices.
22 |         """
23 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)
24 |         if pos_inds.numel() != 0:
25 |             pos_inds = pos_inds.squeeze(1)
26 |         if pos_inds.numel() <= num_expected:
27 |             return pos_inds
28 |         else:
29 |             unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
30 |             num_gts = len(unique_gt_inds)
31 |             num_per_gt = int(round(num_expected / float(num_gts)) + 1)
32 |             sampled_inds = []
33 |             for i in unique_gt_inds:
34 |                 inds = torch.nonzero(
35 |                     assign_result.gt_inds == i.item(), as_tuple=False)
36 |                 if inds.numel() != 0:
37 |                     inds = inds.squeeze(1)
38 |                 else:
39 |                     continue
40 |                 if len(inds) > num_per_gt:
41 |                     inds = self.random_choice(inds, num_per_gt)
42 |                 sampled_inds.append(inds)
43 |             sampled_inds = torch.cat(sampled_inds)
44 |             if len(sampled_inds) < num_expected:
45 |                 num_extra = num_expected - len(sampled_inds)
46 |                 extra_inds = np.array(
47 |                     list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
48 |                 if len(extra_inds) > num_extra:
49 |                     extra_inds = self.random_choice(extra_inds, num_extra)
50 |                 extra_inds = torch.from_numpy(extra_inds).to(
51 |                     assign_result.gt_inds.device).long()
52 |                 sampled_inds = torch.cat([sampled_inds, extra_inds])
53 |             elif len(sampled_inds) > num_expected:
54 |                 sampled_inds = self.random_choice(sampled_inds, num_expected)
55 |             return sampled_inds
56 | 


--------------------------------------------------------------------------------
/mmdet/ops/carafe/src/carafe_ext.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/ATen.h>
 2 | #include <torch/extension.h>
 3 | 
 4 | #include <cmath>
 5 | #include <vector>
 6 | 
 7 | #ifdef WITH_CUDA
 8 | int carafe_forward_cuda(at::Tensor features, at::Tensor rfeatures,
 9 |                         at::Tensor masks, at::Tensor rmasks, int kernel_size,
10 |                         int group_size, int scale_factor, at::Tensor routput,
11 |                         at::Tensor output);
12 | 
13 | int carafe_backward_cuda(at::Tensor top_grad, at::Tensor rfeatures,
14 |                          at::Tensor masks, int kernel_size, int group_size,
15 |                          int scale_factor, at::Tensor rtop_grad,
16 |                          at::Tensor rbottom_grad_hs, at::Tensor rbottom_grad,
17 |                          at::Tensor rmask_grad, at::Tensor bottom_grad,
18 |                          at::Tensor mask_grad);
19 | #endif
20 | 
21 | int carafe_forward(at::Tensor features, at::Tensor rfeatures,
22 |                    at::Tensor masks, at::Tensor rmasks, int kernel_size,
23 |                    int group_size, int scale_factor, at::Tensor routput,
24 |                    at::Tensor output) {
25 |   if (features.device().is_cuda()) {
26 | #ifdef WITH_CUDA
27 |     return carafe_forward_cuda(features, rfeatures, masks, rmasks, kernel_size,
28 |                                group_size, scale_factor, routput, output);
29 | #else
30 |     AT_ERROR("carafe is not compiled with GPU support");
31 | #endif
32 |   }
33 |   AT_ERROR("carafe is not implemented on CPU");
34 | }
35 | 
36 | int carafe_backward(at::Tensor top_grad, at::Tensor rfeatures,
37 |                     at::Tensor masks, int kernel_size, int group_size,
38 |                     int scale_factor, at::Tensor rtop_grad,
39 |                     at::Tensor rbottom_grad_hs, at::Tensor rbottom_grad,
40 |                     at::Tensor rmask_grad, at::Tensor bottom_grad,
41 |                     at::Tensor mask_grad) {
42 |   if (top_grad.device().is_cuda()) {
43 | #ifdef WITH_CUDA
44 |     return carafe_backward_cuda(top_grad, rfeatures, masks, kernel_size,
45 |         group_size, scale_factor, rtop_grad, rbottom_grad_hs, rbottom_grad,
46 |         rmask_grad, bottom_grad, mask_grad);
47 | #else
48 |     AT_ERROR("carafe is not compiled with GPU support");
49 | #endif
50 |   }
51 |   AT_ERROR("carafe is not implemented on CPU");
52 | }
53 | 
54 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
55 |   m.def("forward", &carafe_forward, "carafe forward");
56 |   m.def("backward", &carafe_backward, "carafe backward");
57 | }
58 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/utils.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | import numpy as np
 3 | import pycocotools.mask as mask_util
 4 | 
 5 | 
 6 | def split_combined_polys(polys, poly_lens, polys_per_mask):
 7 |     """Split the combined 1-D polys into masks.
 8 | 
 9 |     A mask is represented as a list of polys, and a poly is represented as
10 |     a 1-D array. In dataset, all masks are concatenated into a single 1-D
11 |     tensor. Here we need to split the tensor into original representations.
12 | 
13 |     Args:
14 |         polys (list): a list (length = image num) of 1-D tensors
15 |         poly_lens (list): a list (length = image num) of poly length
16 |         polys_per_mask (list): a list (length = image num) of poly number
17 |             of each mask
18 | 
19 |     Returns:
20 |         list: a list (length = image num) of list (length = mask num) of
21 |             list (length = poly num) of numpy array
22 |     """
23 |     mask_polys_list = []
24 |     for img_id in range(len(polys)):
25 |         polys_single = polys[img_id]
26 |         polys_lens_single = poly_lens[img_id].tolist()
27 |         polys_per_mask_single = polys_per_mask[img_id].tolist()
28 | 
29 |         split_polys = mmcv.slice_list(polys_single, polys_lens_single)
30 |         mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
31 |         mask_polys_list.append(mask_polys)
32 |     return mask_polys_list
33 | 
34 | 
35 | # TODO: move this function to more proper place
36 | def encode_mask_results(mask_results):
37 |     """Encode bitmap mask to RLE code.
38 | 
39 |     Args:
40 |         mask_results (list | tuple[list]): bitmap mask results.
41 |             In mask scoring rcnn, mask_results is a tuple of (segm_results,
42 |             segm_cls_score).
43 | 
44 |     Returns:
45 |         list | tuple: RLE encoded mask.
46 |     """
47 |     if isinstance(mask_results, tuple):  # mask scoring
48 |         cls_segms, cls_mask_scores = mask_results
49 |     else:
50 |         cls_segms = mask_results
51 |     num_classes = len(cls_segms)
52 |     encoded_mask_results = [[] for _ in range(num_classes)]
53 |     for i in range(len(cls_segms)):
54 |         for cls_segm in cls_segms[i]:
55 |             encoded_mask_results[i].append(
56 |                 mask_util.encode(
57 |                     np.array(
58 |                         cls_segm[:, :, np.newaxis], order='F',
59 |                         dtype='uint8'))[0])  # encoded with RLE
60 |     if isinstance(mask_results, tuple):
61 |         return encoded_mask_results, cls_mask_scores
62 |     else:
63 |         return encoded_mask_results
64 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/mask_target.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch.nn.modules.utils import _pair
 4 | 
 5 | 
 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
 7 |                 cfg):
 8 |     """ Compute mask target for positive proposals in multiple images.
 9 | 
10 |     Args:
11 |         pos_proposals_list (list[Tensor]): Positive proposals in multiple
12 |             images.
13 |         pos_assigned_gt_inds_list (list[Tensor]): Assigned GT indices for each
14 |             positive proposals.
15 |         gt_masks_list (list[:obj:`BaseInstanceMasks`]): Ground truth masks of
16 |             each image.
17 |         cfg (dict): Config dict that specifies the mask size.
18 | 
19 |     Returns:
20 |         list[Tensor]: Mask target of each image.
21 |     """
22 |     cfg_list = [cfg for _ in range(len(pos_proposals_list))]
23 |     mask_targets = map(mask_target_single, pos_proposals_list,
24 |                        pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
25 |     mask_targets = list(mask_targets)
26 |     if len(mask_targets) > 0:
27 |         mask_targets = torch.cat(mask_targets)
28 |     return mask_targets
29 | 
30 | 
31 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
32 |     """Compute mask target for each positive proposal in the image.
33 | 
34 |     Args:
35 |         pos_proposals (Tensor): Positive proposals.
36 |         pos_assigned_gt_inds (Tensor): Assigned GT inds of positive proposals.
37 |         gt_masks (:obj:`BaseInstanceMasks`): GT masks in the format of Bitmap
38 |             or Polygon.
39 |         cfg (dict): Config dict that indicate the mask size.
40 | 
41 |     Returns:
42 |         Tensor: Mask target of each positive proposals in the image.
43 |     """
44 |     device = pos_proposals.device
45 |     mask_size = _pair(cfg.mask_size)
46 |     num_pos = pos_proposals.size(0)
47 |     if num_pos > 0:
48 |         proposals_np = pos_proposals.cpu().numpy()
49 |         maxh, maxw = gt_masks.height, gt_masks.width
50 |         proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw)
51 |         proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh)
52 |         pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
53 | 
54 |         mask_targets = gt_masks.crop_and_resize(
55 |             proposals_np, mask_size, device=device,
56 |             inds=pos_assigned_gt_inds).to_ndarray()
57 | 
58 |         mask_targets = torch.from_numpy(mask_targets).float().to(device)
59 |     else:
60 |         mask_targets = pos_proposals.new_zeros((0, ) + mask_size)
61 | 
62 |     return mask_targets
63 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_ext.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h
 3 | #include <torch/extension.h>
 4 | 
 5 | #ifdef WITH_CUDA
 6 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
 7 |                                          const at::Tensor &targets,
 8 |                                          const int num_classes,
 9 |                                          const float gamma, const float alpha);
10 | 
11 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
12 |                                           const at::Tensor &targets,
13 |                                           const at::Tensor &d_losses,
14 |                                           const int num_classes,
15 |                                           const float gamma, const float alpha);
16 | #endif
17 | 
18 | // Interface for Python
19 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits,
20 |                                     const at::Tensor &targets,
21 |                                     const int num_classes, const float gamma,
22 |                                     const float alpha) {
23 |   if (logits.device().is_cuda()) {
24 | #ifdef WITH_CUDA
25 |     at::DeviceGuard guard(logits.device());
26 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma,
27 |                                          alpha);
28 | #else
29 |       AT_ERROR("SigmoidFocalLoss is not compiled with GPU support");
30 | #endif
31 |   }
32 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
33 | }
34 | 
35 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits,
36 |                                      const at::Tensor &targets,
37 |                                      const at::Tensor &d_losses,
38 |                                      const int num_classes, const float gamma,
39 |                                      const float alpha) {
40 |   if (logits.device().is_cuda()) {
41 | #ifdef WITH_CUDA
42 |     at::DeviceGuard guard(logits.device());
43 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses,
44 |                                           num_classes, gamma, alpha);
45 | #else
46 |       AT_ERROR("SigmoidFocalLoss is not compiled with GPU support");
47 | #endif
48 |   }
49 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
50 | }
51 | 
52 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
53 |   m.def("forward", &SigmoidFocalLoss_forward,
54 |         "SigmoidFocalLoss forward");
55 |   m.def("backward", &SigmoidFocalLoss_backward,
56 |         "SigmoidFocalLoss backward");
57 | }
58 | 


--------------------------------------------------------------------------------
/mmdet/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | from mmdet.core import eval_map, eval_recalls
 2 | from .builder import DATASETS
 3 | from .xml_style import XMLDataset
 4 | 
 5 | 
 6 | @DATASETS.register_module()
 7 | class VOCDataset(XMLDataset):
 8 | 
 9 |     CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
10 |                'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
11 |                'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
12 |                'tvmonitor')
13 | 
14 |     def __init__(self, **kwargs):
15 |         super(VOCDataset, self).__init__(**kwargs)
16 |         if 'VOC2007' in self.img_prefix:
17 |             self.year = 2007
18 |         elif 'VOC2012' in self.img_prefix:
19 |             self.year = 2012
20 |         else:
21 |             raise ValueError('Cannot infer dataset year from img_prefix')
22 | 
23 |     def evaluate(self,
24 |                  results,
25 |                  metric='mAP',
26 |                  logger=None,
27 |                  proposal_nums=(100, 300, 1000),
28 |                  iou_thr=0.5,
29 |                  scale_ranges=None):
30 |         if not isinstance(metric, str):
31 |             assert len(metric) == 1
32 |             metric = metric[0]
33 |         allowed_metrics = ['mAP', 'recall']
34 |         if metric not in allowed_metrics:
35 |             raise KeyError(f'metric {metric} is not supported')
36 |         annotations = [self.get_ann_info(i) for i in range(len(self))]
37 |         eval_results = {}
38 |         if metric == 'mAP':
39 |             assert isinstance(iou_thr, float)
40 |             if self.year == 2007:
41 |                 ds_name = 'voc07'
42 |             else:
43 |                 ds_name = self.dataset.CLASSES
44 |             mean_ap, _ = eval_map(
45 |                 results,
46 |                 annotations,
47 |                 scale_ranges=None,
48 |                 iou_thr=iou_thr,
49 |                 dataset=ds_name,
50 |                 logger=logger)
51 |             eval_results['mAP'] = mean_ap
52 |         elif metric == 'recall':
53 |             gt_bboxes = [ann['bboxes'] for ann in annotations]
54 |             if isinstance(iou_thr, float):
55 |                 iou_thr = [iou_thr]
56 |             recalls = eval_recalls(
57 |                 gt_bboxes, results, proposal_nums, iou_thr, logger=logger)
58 |             for i, num in enumerate(proposal_nums):
59 |                 for j, iou in enumerate(iou_thr):
60 |                     eval_results[f'recall@{num}@{iou}'] = recalls[i, j]
61 |             if recalls.shape[1] > 1:
62 |                 ar = recalls.mean(axis=1)
63 |                 for i, num in enumerate(proposal_nums):
64 |                     eval_results[f'AR@{num}'] = ar[i]
65 |         return eval_results
66 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def images_to_levels(target, num_levels):
 5 |     """Convert targets by image to targets by feature level.
 6 | 
 7 |     [target_img0, target_img1] -> [target_level0, target_level1, ...]
 8 |     """
 9 |     target = torch.stack(target, 0)
10 |     level_targets = []
11 |     start = 0
12 |     for n in num_levels:
13 |         end = start + n
14 |         # level_targets.append(target[:, start:end].squeeze(0))
15 |         level_targets.append(target[:, start:end])
16 |         start = end
17 |     return level_targets
18 | 
19 | 
20 | def anchor_inside_flags(flat_anchors,
21 |                         valid_flags,
22 |                         img_shape,
23 |                         allowed_border=0):
24 |     """Check whether the anchors are inside the border
25 | 
26 |     Args:
27 |         flat_anchors (torch.Tensor): Flatten anchors, shape (n, 4).
28 |         valid_flags (torch.Tensor): An existing valid flags of anchors.
29 |         img_shape (tuple(int)): Shape of current image.
30 |         allowed_border (int, optional): The border to allow the valid anchor.
31 |             Defaults to 0.
32 | 
33 |     Returns:
34 |         torch.Tensor: Flags indicating whether the anchors are inside a
35 |             valid range.
36 |     """
37 |     img_h, img_w = img_shape[:2]
38 |     if allowed_border >= 0:
39 |         inside_flags = valid_flags & \
40 |             (flat_anchors[:, 0] >= -allowed_border) & \
41 |             (flat_anchors[:, 1] >= -allowed_border) & \
42 |             (flat_anchors[:, 2] < img_w + allowed_border) & \
43 |             (flat_anchors[:, 3] < img_h + allowed_border)
44 |     else:
45 |         inside_flags = valid_flags
46 |     return inside_flags
47 | 
48 | 
49 | def calc_region(bbox, ratio, featmap_size=None):
50 |     """Calculate a proportional bbox region.
51 | 
52 |     The bbox center are fixed and the new h' and w' is h * ratio and w * ratio.
53 | 
54 |     Args:
55 |         bbox (Tensor): Bboxes to calculate regions, shape (n, 4).
56 |         ratio (float): Ratio of the output region.
57 |         featmap_size (tuple): Feature map size used for clipping the boundary.
58 | 
59 |     Returns:
60 |         tuple: x1, y1, x2, y2
61 |     """
62 |     x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long()
63 |     y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long()
64 |     x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long()
65 |     y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long()
66 |     if featmap_size is not None:
67 |         x1 = x1.clamp(min=0, max=featmap_size[1])
68 |         y1 = y1.clamp(min=0, max=featmap_size[0])
69 |         x2 = x2.clamp(min=0, max=featmap_size[1])
70 |         y2 = y2.clamp(min=0, max=featmap_size[0])
71 |     return (x1, y1, x2, y2)
72 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/shared_heads/res_layer.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from mmcv.cnn import constant_init, kaiming_init
 3 | from mmcv.runner import load_checkpoint
 4 | 
 5 | from mmdet.core import auto_fp16
 6 | from mmdet.models.backbones import ResNet
 7 | from mmdet.models.builder import SHARED_HEADS
 8 | from mmdet.models.utils import ResLayer as _ResLayer
 9 | from mmdet.utils import get_root_logger
10 | 
11 | 
12 | @SHARED_HEADS.register_module()
13 | class ResLayer(nn.Module):
14 | 
15 |     def __init__(self,
16 |                  depth,
17 |                  stage=3,
18 |                  stride=2,
19 |                  dilation=1,
20 |                  style='pytorch',
21 |                  norm_cfg=dict(type='BN', requires_grad=True),
22 |                  norm_eval=True,
23 |                  with_cp=False,
24 |                  dcn=None):
25 |         super(ResLayer, self).__init__()
26 |         self.norm_eval = norm_eval
27 |         self.norm_cfg = norm_cfg
28 |         self.stage = stage
29 |         self.fp16_enabled = False
30 |         block, stage_blocks = ResNet.arch_settings[depth]
31 |         stage_block = stage_blocks[stage]
32 |         planes = 64 * 2**stage
33 |         inplanes = 64 * 2**(stage - 1) * block.expansion
34 | 
35 |         res_layer = _ResLayer(
36 |             block,
37 |             inplanes,
38 |             planes,
39 |             stage_block,
40 |             stride=stride,
41 |             dilation=dilation,
42 |             style=style,
43 |             with_cp=with_cp,
44 |             norm_cfg=self.norm_cfg,
45 |             dcn=dcn)
46 |         self.add_module(f'layer{stage + 1}', res_layer)
47 | 
48 |     def init_weights(self, pretrained=None):
49 |         """Initialize the weights in the module
50 | 
51 |         Args:
52 |             pretrained (str, optional): Path to pre-trained weights.
53 |                 Defaults to None.
54 |         """
55 |         if isinstance(pretrained, str):
56 |             logger = get_root_logger()
57 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
58 |         elif pretrained is None:
59 |             for m in self.modules():
60 |                 if isinstance(m, nn.Conv2d):
61 |                     kaiming_init(m)
62 |                 elif isinstance(m, nn.BatchNorm2d):
63 |                     constant_init(m, 1)
64 |         else:
65 |             raise TypeError('pretrained must be a str or None')
66 | 
67 |     @auto_fp16()
68 |     def forward(self, x):
69 |         res_layer = getattr(self, f'layer{self.stage + 1}')
70 |         out = res_layer(x)
71 |         return out
72 | 
73 |     def train(self, mode=True):
74 |         super(ResLayer, self).train(mode)
75 |         if self.norm_eval:
76 |             for m in self.modules():
77 |                 if isinstance(m, nn.BatchNorm2d):
78 |                     m.eval()
79 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from . import roi_pool_ext
 8 | 
 9 | 
10 | class RoIPoolFunction(Function):
11 | 
12 |     @staticmethod
13 |     def forward(ctx, features, rois, out_size, spatial_scale):
14 |         assert features.is_cuda
15 |         out_h, out_w = _pair(out_size)
16 |         assert isinstance(out_h, int) and isinstance(out_w, int)
17 |         ctx.save_for_backward(rois)
18 |         num_channels = features.size(1)
19 |         num_rois = rois.size(0)
20 |         out_size = (num_rois, num_channels, out_h, out_w)
21 |         output = features.new_zeros(out_size)
22 |         argmax = features.new_zeros(out_size, dtype=torch.int)
23 |         roi_pool_ext.forward(features, rois, out_h, out_w, spatial_scale,
24 |                              output, argmax)
25 |         ctx.spatial_scale = spatial_scale
26 |         ctx.feature_size = features.size()
27 |         ctx.argmax = argmax
28 | 
29 |         return output
30 | 
31 |     @staticmethod
32 |     @once_differentiable
33 |     def backward(ctx, grad_output):
34 |         assert grad_output.is_cuda
35 |         spatial_scale = ctx.spatial_scale
36 |         feature_size = ctx.feature_size
37 |         argmax = ctx.argmax
38 |         rois = ctx.saved_tensors[0]
39 |         assert feature_size is not None
40 | 
41 |         grad_input = grad_rois = None
42 |         if ctx.needs_input_grad[0]:
43 |             grad_input = grad_output.new_zeros(feature_size)
44 |             roi_pool_ext.backward(grad_output.contiguous(), rois, argmax,
45 |                                   spatial_scale, grad_input)
46 | 
47 |         return grad_input, grad_rois, None, None
48 | 
49 | 
50 | roi_pool = RoIPoolFunction.apply
51 | 
52 | 
53 | class RoIPool(nn.Module):
54 | 
55 |     def __init__(self, out_size, spatial_scale, use_torchvision=False):
56 |         super(RoIPool, self).__init__()
57 | 
58 |         self.out_size = _pair(out_size)
59 |         self.spatial_scale = float(spatial_scale)
60 |         self.use_torchvision = use_torchvision
61 | 
62 |     def forward(self, features, rois):
63 |         if self.use_torchvision:
64 |             from torchvision.ops import roi_pool as tv_roi_pool
65 |             return tv_roi_pool(features, rois, self.out_size,
66 |                                self.spatial_scale)
67 |         else:
68 |             return roi_pool(features, rois, self.out_size, self.spatial_scale)
69 | 
70 |     def __repr__(self):
71 |         format_str = self.__class__.__name__
72 |         format_str += f'(out_size={self.out_size}, '
73 |         format_str += f'spatial_scale={self.spatial_scale}, '
74 |         format_str += f'use_torchvision={self.use_torchvision})'
75 |         return format_str
76 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/roi_extractors/relative_roi_extractor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmdet.core import force_fp32
 4 | from mmdet.models.builder import ROI_EXTRACTORS
 5 | from .base_roi_extractor import BaseRoIExtractor
 6 | 
 7 | 
 8 | @ROI_EXTRACTORS.register_module()
 9 | class RelativeRoIExtractor(BaseRoIExtractor):
10 |     """Extract RoI features from a single level feature map.
11 | 
12 |     If there are multiple input feature levels, each RoI is mapped to a level
13 |     according to its scale. The mapping rule is proposed in
14 |     `FPN <https://arxiv.org/abs/1612.03144>`_.
15 | 
16 |     Args:
17 |         roi_layer (dict): Specify RoI layer type and arguments.
18 |         out_channels (int): Output channels of RoI layers.
19 |         featmap_strides (int): Strides of input feature maps.
20 |         finest_scale (int): Scale threshold of mapping to level 0. Default: 56.
21 |     """
22 | 
23 |     def __init__(self,
24 |                  roi_layer,
25 |                  out_channels,
26 |                  featmap_strides=[1.0]):
27 |         super(RelativeRoIExtractor, self).__init__(roi_layer, out_channels,
28 |                                                  featmap_strides)
29 |     def compute_relative_rois(self, rois, base_rois, feature_shape):
30 |         #rois[:,1][rois[:,1] < base_rois[:,1]] = base_rois[:,1]
31 |         #rois[:,2][rois[:,2] < base_rois[:,2]] = base_rois[:,2]
32 |         #rois[:,3][rois[:,3] > base_rois[:,3]] = base_rois[:,3]
33 |         #rois[:,4][rois[:,4] > base_rois[:,4]] = base_rois[:,4]
34 |         base_w = base_rois[:,3] - base_rois[:,1]
35 |         base_h = base_rois[:,4] - base_rois[:,2]
36 |         relative_rois = torch.zeros_like(rois)
37 |         relative_rois[:,0] = torch.arange(len(relative_rois)).to(dtype=relative_rois.dtype)
38 |         relative_rois[:,1] = (rois[:,1] - base_rois[:,1]) / base_w
39 |         relative_rois[:,3] = (rois[:,3] - base_rois[:,1]) / base_w
40 |         relative_rois[:,2] = (rois[:,2] - base_rois[:,2]) / base_h
41 |         relative_rois[:,4] = (rois[:,4] - base_rois[:,2]) / base_h
42 |         relative_rois[:,1] = relative_rois[:,1]*feature_shape[1]
43 |         relative_rois[:,3] = relative_rois[:,3]*feature_shape[1]
44 |         relative_rois[:,2] = relative_rois[:,2]*feature_shape[0]
45 |         relative_rois[:,4] = relative_rois[:,4]*feature_shape[0]
46 |         return relative_rois
47 | 
48 |     @force_fp32(apply_to=('feats', ), out_fp16=True)
49 |     def forward(self, feats, rois, base_rois):
50 |         """Forward function"""
51 |         out_size = self.roi_layers[0].out_size
52 |         feature_shape = feats.shape[-2:]
53 |         relative_rois = self.compute_relative_rois(rois,base_rois,feature_shape)
54 |         if len(rois) == 0:
55 |             return feats.new_zeros(
56 |                 rois.size(0), self.out_channels, *out_size)
57 |         return self.roi_layers[0](feats, relative_rois)
58 |     
59 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/eval_hooks.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from mmcv.runner import Hook
 4 | from torch.utils.data import DataLoader
 5 | 
 6 | 
 7 | class EvalHook(Hook):
 8 |     """Evaluation hook.
 9 | 
10 |     Attributes:
11 |         dataloader (DataLoader): A PyTorch dataloader.
12 |         interval (int): Evaluation interval (by epochs). Default: 1.
13 |     """
14 | 
15 |     def __init__(self, dataloader, interval=1, **eval_kwargs):
16 |         if not isinstance(dataloader, DataLoader):
17 |             raise TypeError('dataloader must be a pytorch DataLoader, but got'
18 |                             f' {type(dataloader)}')
19 |         self.dataloader = dataloader
20 |         self.interval = interval
21 |         self.eval_kwargs = eval_kwargs
22 | 
23 |     def after_train_epoch(self, runner):
24 |         if not self.every_n_epochs(runner, self.interval):
25 |             return
26 |         from mmdet.apis import single_gpu_test
27 |         results = single_gpu_test(runner.model, self.dataloader, show=False)
28 |         self.evaluate(runner, results)
29 | 
30 |     def evaluate(self, runner, results):
31 |         eval_res = self.dataloader.dataset.evaluate(
32 |             results, logger=runner.logger, **self.eval_kwargs)
33 |         for name, val in eval_res.items():
34 |             runner.log_buffer.output[name] = val
35 |         runner.log_buffer.ready = True
36 | 
37 | 
38 | class DistEvalHook(EvalHook):
39 |     """Distributed evaluation hook.
40 | 
41 |     Attributes:
42 |         dataloader (DataLoader): A PyTorch dataloader.
43 |         interval (int): Evaluation interval (by epochs). Default: 1.
44 |         tmpdir (str | None): Temporary directory to save the results of all
45 |             processes. Default: None.
46 |         gpu_collect (bool): Whether to use gpu or cpu to collect results.
47 |             Default: False.
48 |     """
49 | 
50 |     def __init__(self,
51 |                  dataloader,
52 |                  interval=1,
53 |                  gpu_collect=False,
54 |                  **eval_kwargs):
55 |         if not isinstance(dataloader, DataLoader):
56 |             raise TypeError('dataloader must be a pytorch DataLoader, but got '
57 |                             f'{type(dataloader)}')
58 |         self.dataloader = dataloader
59 |         self.interval = interval
60 |         self.gpu_collect = gpu_collect
61 |         self.eval_kwargs = eval_kwargs
62 | 
63 |     def after_train_epoch(self, runner):
64 |         if not self.every_n_epochs(runner, self.interval):
65 |             return
66 |         from mmdet.apis import multi_gpu_test
67 |         results = multi_gpu_test(
68 |             runner.model,
69 |             self.dataloader,
70 |             tmpdir=osp.join(runner.work_dir, '.eval_hook'),
71 |             gpu_collect=self.gpu_collect)
72 |         if runner.rank == 0:
73 |             print('\n')
74 |             self.evaluate(runner, results)
75 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/pseudo_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..builder import BBOX_SAMPLERS
 4 | from .base_sampler import BaseSampler
 5 | from .sampling_result import SamplingResult
 6 | 
 7 | 
 8 | @BBOX_SAMPLERS.register_module()
 9 | class PseudoSampler(BaseSampler):
10 |     """A pseudo sampler that does not do sampling actually."""
11 | 
12 |     def __init__(self, add_gt_as_proposals=True, **kwargs):
13 |         self.add_gt_as_proposals = add_gt_as_proposals
14 | 
15 |     def _sample_pos(self, **kwargs):
16 |         """Sample positive samples"""
17 |         raise NotImplementedError
18 | 
19 |     def _sample_neg(self, **kwargs):
20 |         """Sample negative samples"""
21 |         raise NotImplementedError
22 | 
23 |     def sample(self, assign_result, bboxes, gt_bboxes, gt_labels,**kwargs):
24 |         """Directly returns the positive and negative indices  of samples
25 | 
26 |         Args:
27 |             assign_result (:obj:`AssignResult`): Assigned results
28 |             bboxes (torch.Tensor): Bounding boxes
29 |             gt_bboxes (torch.Tensor): Ground truth boxes
30 | 
31 |         Returns:
32 |             :obj:`SamplingResult`: sampler results
33 | 
34 |         """
35 |         
36 |         if len(bboxes.shape) < 2:
37 |             bboxes = bboxes[None, :]
38 | 
39 |         bboxes = bboxes[:, :4]
40 |         pos_inds = torch.nonzero(
41 |             assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique()
42 |         neg_inds = torch.nonzero(
43 |             assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique()
44 |         #print("before:",bboxes.shape[0],pos_inds.shape[0], neg_inds.shape[0], gt_bboxes.shape[0])
45 |         gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
46 |         if self.add_gt_as_proposals and len(gt_bboxes) > 0:
47 |             if gt_labels is None:
48 |                 raise ValueError(
49 |                     'gt_labels must be given when add_gt_as_proposals is True')
50 |             bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
51 |             assign_result.add_gt_(gt_labels)
52 |             gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
53 |             gt_flags = torch.cat([gt_ones, gt_flags])
54 | 
55 |         #print("in sample",gt_flags.sum())
56 |         #print( torch.nonzero(assign_result.gt_inds > 0, as_tuple=False).squeeze(-1))
57 |         pos_inds = torch.nonzero(
58 |             assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique()
59 |         #print("sample",pos_inds)
60 |         neg_inds = torch.nonzero(
61 |             assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique()
62 |         #print("after:",bboxes.shape[0],pos_inds.shape[0], neg_inds.shape[0], gt_bboxes.shape[0])
63 |         sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
64 |                                          assign_result, gt_flags)
65 |         return sampling_result
66 | 


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/src/cuda/masked_conv2d_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int MaskedIm2colForwardLaucher(const at::Tensor im, const int height,
 7 |                                const int width, const int channels,
 8 |                                const int kernel_h, const int kernel_w,
 9 |                                const int pad_h, const int pad_w,
10 |                                const at::Tensor mask_h_idx,
11 |                                const at::Tensor mask_w_idx, const int mask_cnt,
12 |                                at::Tensor col);
13 | 
14 | int MaskedCol2imForwardLaucher(const at::Tensor col, const int height,
15 |                                const int width, const int channels,
16 |                                const at::Tensor mask_h_idx,
17 |                                const at::Tensor mask_w_idx, const int mask_cnt,
18 |                                at::Tensor im);
19 | 
20 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
21 | #define CHECK_CONTIGUOUS(x) \
22 |   TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
23 | #define CHECK_INPUT(x) \
24 |   CHECK_CUDA(x);       \
25 |   CHECK_CONTIGUOUS(x)
26 | 
27 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx,
28 |                                const at::Tensor mask_w_idx, const int kernel_h,
29 |                                const int kernel_w, const int pad_h,
30 |                                const int pad_w, at::Tensor col) {
31 |   CHECK_INPUT(im);
32 |   CHECK_INPUT(mask_h_idx);
33 |   CHECK_INPUT(mask_w_idx);
34 |   CHECK_INPUT(col);
35 |   // im: (n, ic, h, w), kernel size (kh, kw)
36 |   // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
37 |   at::DeviceGuard guard(im.device());
38 | 
39 |   int channels = im.size(1);
40 |   int height = im.size(2);
41 |   int width = im.size(3);
42 |   int mask_cnt = mask_h_idx.size(0);
43 | 
44 |   MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w,
45 |                              pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt,
46 |                              col);
47 | 
48 |   return 1;
49 | }
50 | 
51 | int masked_col2im_forward_cuda(const at::Tensor col,
52 |                                const at::Tensor mask_h_idx,
53 |                                const at::Tensor mask_w_idx, int height,
54 |                                int width, int channels, at::Tensor im) {
55 |   CHECK_INPUT(col);
56 |   CHECK_INPUT(mask_h_idx);
57 |   CHECK_INPUT(mask_w_idx);
58 |   CHECK_INPUT(im);
59 |   // im: (n, ic, h, w), kernel size (kh, kw)
60 |   // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
61 |   at::DeviceGuard guard(col.device());
62 | 
63 |   int mask_cnt = mask_h_idx.size(0);
64 | 
65 |   MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx,
66 |                              mask_w_idx, mask_cnt, im);
67 | 
68 |   return 1;
69 | }
70 | 


--------------------------------------------------------------------------------
/mmdet/ops/carafe/src/cuda/carafe_naive_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/ATen.h>
 2 | #include <torch/torch.h>
 3 | 
 4 | #include <cmath>
 5 | #include <vector>
 6 | 
 7 | int CARAFENAIVEForwardLaucher(const at::Tensor features, const at::Tensor masks,
 8 |                               const int kernel_size, const int group_size,
 9 |                               const int scale_factor, const int batch_size,
10 |                               const int channels, const int height,
11 |                               const int width, at::Tensor output);
12 | 
13 | int CARAFENAIVEBackwardLaucher(const at::Tensor top_grad,
14 |                                const at::Tensor features,
15 |                                const at::Tensor masks, const int kernel_size,
16 |                                const int group_size, const int scale_factor,
17 |                                const int batch_size, const int channels,
18 |                                const int height, const int width,
19 |                                at::Tensor bottom_grad, at::Tensor mask_grad);
20 | 
21 | #define CHECK_CUDA(x) TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
22 | #define CHECK_CONTIGUOUS(x) \
23 |   TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
24 | #define CHECK_INPUT(x) \
25 |   CHECK_CUDA(x);       \
26 |   CHECK_CONTIGUOUS(x)
27 | 
28 | int carafe_naive_forward_cuda(at::Tensor features, at::Tensor masks,
29 |                               int kernel_size, int group_size, int scale_factor,
30 |                               at::Tensor output) {
31 |   CHECK_INPUT(features);
32 |   CHECK_INPUT(masks);
33 |   CHECK_INPUT(output);
34 |   at::DeviceGuard guard(features.device());
35 | 
36 |   int batch_size = output.size(0);
37 |   int num_channels = output.size(1);
38 |   int data_height = output.size(2);
39 |   int data_width = output.size(3);
40 | 
41 |   CARAFENAIVEForwardLaucher(features, masks, kernel_size, group_size,
42 |                             scale_factor, batch_size, num_channels, data_height,
43 |                             data_width, output);
44 | 
45 |   return 1;
46 | }
47 | 
48 | int carafe_naive_backward_cuda(at::Tensor top_grad, at::Tensor features,
49 |                                at::Tensor masks, int kernel_size,
50 |                                int group_size, int scale_factor,
51 |                                at::Tensor bottom_grad, at::Tensor mask_grad) {
52 |   CHECK_INPUT(top_grad);
53 |   CHECK_INPUT(features);
54 |   CHECK_INPUT(masks);
55 |   CHECK_INPUT(bottom_grad);
56 |   CHECK_INPUT(mask_grad);
57 |   at::DeviceGuard guard(top_grad.device());
58 | 
59 |   int batch_size = top_grad.size(0);
60 |   int num_channels = top_grad.size(1);
61 |   int data_height = top_grad.size(2);
62 |   int data_width = top_grad.size(3);
63 | 
64 |   CARAFENAIVEBackwardLaucher(top_grad, features, masks, kernel_size, group_size,
65 |                              scale_factor, batch_size, num_channels,
66 |                              data_height, data_width, bottom_grad, mask_grad);
67 | 
68 |   return 1;
69 | }
70 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/test_time_aug.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import mmcv
 4 | 
 5 | from ..builder import PIPELINES
 6 | from .compose import Compose
 7 | 
 8 | 
 9 | @PIPELINES.register_module()
10 | class MultiScaleFlipAug(object):
11 |     """Test-time augmentation with multiple scales and flipping
12 | 
13 |     Args:
14 |         transforms (list[dict]): Transforms to apply in each augmentation.
15 |         img_scale (tuple | list[tuple]: Images scales for resizing.
16 |         flip (bool): Whether apply flip augmentation. Default: False.
17 |         flip_direction (str | list[str]): Flip augmentation directions,
18 |             options are "horizontal" and "vertical". If flip_direction is list,
19 |             multiple flip augmentations will be applied.
20 |             It has no effect when flip == False. Default: "horizontal".
21 |     """
22 | 
23 |     def __init__(self,
24 |                  transforms,
25 |                  img_scale,
26 |                  flip=False,
27 |                  flip_direction='horizontal'):
28 |         self.transforms = Compose(transforms)
29 |         self.img_scale = img_scale if isinstance(img_scale,
30 |                                                  list) else [img_scale]
31 |         assert mmcv.is_list_of(self.img_scale, tuple)
32 |         self.flip = flip
33 |         self.flip_direction = flip_direction if isinstance(
34 |             flip_direction, list) else [flip_direction]
35 |         assert mmcv.is_list_of(self.flip_direction, str)
36 |         if not self.flip and self.flip_direction != ['horizontal']:
37 |             warnings.warn(
38 |                 'flip_direction has no effect when flip is set to False')
39 |         if (self.flip
40 |                 and not any([t['type'] == 'RandomFlip' for t in transforms])):
41 |             warnings.warn(
42 |                 'flip has no effect when RandomFlip is not in transforms')
43 | 
44 |     def __call__(self, results):
45 |         aug_data = []
46 |         flip_aug = [False, True] if self.flip else [False]
47 |         for scale in self.img_scale:
48 |             for flip in flip_aug:
49 |                 for direction in self.flip_direction:
50 |                     _results = results.copy()
51 |                     _results['scale'] = scale
52 |                     _results['flip'] = flip
53 |                     _results['flip_direction'] = direction
54 |                     data = self.transforms(_results)
55 |                     aug_data.append(data)
56 |         # list of dict to dict of list
57 |         aug_data_dict = {key: [] for key in aug_data[0]}
58 |         for data in aug_data:
59 |             for key, val in data.items():
60 |                 aug_data_dict[key].append(val)
61 |         return aug_data_dict
62 | 
63 |     def __repr__(self):
64 |         repr_str = self.__class__.__name__
65 |         repr_str += f'(transforms={self.transforms}, '
66 |         repr_str += f'img_scale={self.img_scale}, flip={self.flip})'
67 |         repr_str += f'flip_direction={self.flip_direction}'
68 |         return repr_str
69 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/random_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..builder import BBOX_SAMPLERS
 4 | from .base_sampler import BaseSampler
 5 | 
 6 | 
 7 | @BBOX_SAMPLERS.register_module()
 8 | class RandomSampler(BaseSampler):
 9 |     """Random sampler
10 | 
11 |     Args:
12 |         num (int): Number of samples
13 |         pos_fraction (float): Fraction of positive samples
14 |         neg_pos_up (int, optional): Upper bound number of negative and
15 |             positive samples. Defaults to -1.
16 |         add_gt_as_proposals (bool, optional): Whether to add ground truth
17 |             boxes as proposals. Defaults to True.
18 |     """
19 | 
20 |     def __init__(self,
21 |                  num,
22 |                  pos_fraction,
23 |                  neg_pos_ub=-1,
24 |                  add_gt_as_proposals=True,
25 |                  **kwargs):
26 |         from mmdet.core.bbox import demodata
27 |         super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
28 |                                             add_gt_as_proposals)
29 |         self.rng = demodata.ensure_rng(kwargs.get('rng', None))
30 | 
31 |     def random_choice(self, gallery, num):
32 |         """Random select some elements from the gallery.
33 | 
34 |         If `gallery` is a Tensor, the returned indices will be a Tensor;
35 |         If `gallery` is a ndarray or list, the returned indices will be a
36 |         ndarray.
37 | 
38 |         Args:
39 |             gallery (Tensor | ndarray | list): indices pool.
40 |             num (int): expected sample num.
41 | 
42 |         Returns:
43 |             Tensor or ndarray: sampled indices.
44 |         """
45 |         assert len(gallery) >= num
46 | 
47 |         is_tensor = isinstance(gallery, torch.Tensor)
48 |         if not is_tensor:
49 |             gallery = torch.tensor(
50 |                 gallery, dtype=torch.long, device=torch.cuda.current_device())
51 |         perm = torch.randperm(gallery.numel(), device=gallery.device)[:num]
52 |         rand_inds = gallery[perm]
53 |         if not is_tensor:
54 |             rand_inds = rand_inds.cpu().numpy()
55 |         return rand_inds
56 | 
57 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
58 |         """Randomly sample some positive samples."""
59 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)
60 |         if pos_inds.numel() != 0:
61 |             pos_inds = pos_inds.squeeze(1)
62 |         if pos_inds.numel() <= num_expected:
63 |             return pos_inds
64 |         else:
65 |             return self.random_choice(pos_inds, num_expected)
66 | 
67 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
68 |         """Randomly sample some negative samples."""
69 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False)
70 |         if neg_inds.numel() != 0:
71 |             neg_inds = neg_inds.squeeze(1)
72 |         if len(neg_inds) <= num_expected:
73 |             return neg_inds
74 |         else:
75 |             return self.random_choice(neg_inds, num_expected)
76 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from mmdet import ops
 7 | 
 8 | 
 9 | class BaseRoIExtractor(nn.Module, metaclass=ABCMeta):
10 |     """Base class for RoI extractor.
11 | 
12 |     Args:
13 |         roi_layer (dict): Specify RoI layer type and arguments.
14 |         out_channels (int): Output channels of RoI layers.
15 |         featmap_strides (int): Strides of input feature maps.
16 |     """
17 | 
18 |     def __init__(self, roi_layer, out_channels, featmap_strides):
19 |         super(BaseRoIExtractor, self).__init__()
20 |         self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)
21 |         self.out_channels = out_channels
22 |         self.featmap_strides = featmap_strides
23 |         self.fp16_enabled = False
24 | 
25 |     @property
26 |     def num_inputs(self):
27 |         """int: Number of input feature maps."""
28 |         return len(self.featmap_strides)
29 | 
30 |     def init_weights(self):
31 |         pass
32 | 
33 |     def build_roi_layers(self, layer_cfg, featmap_strides):
34 |         """Build RoI operator to extract feature from each level feature map.
35 | 
36 |         Args:
37 |             layer_cfg (dict): Dictionary to construct and config RoI layer
38 |                 operation. Options are modules under ``mmdet/ops`` such as
39 |                 ``RoIAlign``.
40 |             featmap_strides (int): The stride of input feature map w.r.t to the
41 |                 original image size, which would be used to scale RoI
42 |                 coordinate (original image coordinate system) to feature
43 |                 coordinate system.
44 | 
45 |         Returns:
46 |             nn.ModuleList: The RoI extractor modules for each level feature
47 |                 map.
48 |         """
49 | 
50 |         cfg = layer_cfg.copy()
51 |         layer_type = cfg.pop('type')
52 |         assert hasattr(ops, layer_type)
53 |         layer_cls = getattr(ops, layer_type)
54 |         roi_layers = nn.ModuleList(
55 |             [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides])
56 |         return roi_layers
57 | 
58 |     def roi_rescale(self, rois, scale_factor):
59 |         """Scale RoI coordinates by scale factor.
60 | 
61 |         Args:
62 |             rois (torch.Tensor): RoI (Region of Interest), shape (n, 5)
63 |             scale_factor (float): Scale factor that RoI will be multiplied by.
64 | 
65 |         Returns:
66 |             torch.Tensor: Scaled RoI.
67 |         """
68 | 
69 |         cx = (rois[:, 1] + rois[:, 3]) * 0.5
70 |         cy = (rois[:, 2] + rois[:, 4]) * 0.5
71 |         w = rois[:, 3] - rois[:, 1]
72 |         h = rois[:, 4] - rois[:, 2]
73 |         new_w = w * scale_factor
74 |         new_h = h * scale_factor
75 |         x1 = cx - new_w * 0.5
76 |         x2 = cx + new_w * 0.5
77 |         y1 = cy - new_h * 0.5
78 |         y2 = cy + new_h * 0.5
79 |         new_rois = torch.stack((rois[:, 0], x1, y1, x2, y2), dim=-1)
80 |         return new_rois
81 | 
82 |     @abstractmethod
83 |     def forward(self, feats, rois, roi_scale_factor=None):
84 |         pass
85 | 


--------------------------------------------------------------------------------
/tools/benchmark.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import time
 3 | 
 4 | import torch
 5 | from mmcv import Config
 6 | from mmcv.parallel import MMDataParallel
 7 | from mmcv.runner import load_checkpoint
 8 | from tools.fuse_conv_bn import fuse_module
 9 | 
10 | from mmdet.core import wrap_fp16_model
11 | from mmdet.datasets import build_dataloader, build_dataset
12 | from mmdet.models import build_detector
13 | 
14 | 
15 | def parse_args():
16 |     parser = argparse.ArgumentParser(description='MMDet benchmark a model')
17 |     parser.add_argument('config', help='test config file path')
18 |     parser.add_argument('checkpoint', help='checkpoint file')
19 |     parser.add_argument(
20 |         '--log-interval', default=50, help='interval of logging')
21 |     parser.add_argument(
22 |         '--fuse-conv-bn',
23 |         action='store_true',
24 |         help='Whether to fuse conv and bn, this will slightly increase'
25 |         'the inference speed')
26 |     args = parser.parse_args()
27 |     return args
28 | 
29 | 
30 | def main():
31 |     args = parse_args()
32 | 
33 |     cfg = Config.fromfile(args.config)
34 |     # set cudnn_benchmark
35 |     if cfg.get('cudnn_benchmark', False):
36 |         torch.backends.cudnn.benchmark = True
37 |     cfg.model.pretrained = None
38 |     cfg.data.test.test_mode = True
39 | 
40 |     # build the dataloader
41 |     # TODO: support multiple images per gpu (only minor changes are needed)
42 |     dataset = build_dataset(cfg.data.test)
43 |     data_loader = build_dataloader(
44 |         dataset,
45 |         samples_per_gpu=1,
46 |         workers_per_gpu=cfg.data.workers_per_gpu,
47 |         dist=False,
48 |         shuffle=False)
49 | 
50 |     # build the model and load checkpoint
51 |     model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
52 |     fp16_cfg = cfg.get('fp16', None)
53 |     if fp16_cfg is not None:
54 |         wrap_fp16_model(model)
55 |     load_checkpoint(model, args.checkpoint, map_location='cpu')
56 |     if args.fuse_conv_bn:
57 |         model = fuse_module(model)
58 | 
59 |     model = MMDataParallel(model, device_ids=[0])
60 | 
61 |     model.eval()
62 | 
63 |     # the first several iterations may be very slow so skip them
64 |     num_warmup = 5
65 |     pure_inf_time = 0
66 | 
67 |     # benchmark with 2000 image and take the average
68 |     for i, data in enumerate(data_loader):
69 | 
70 |         torch.cuda.synchronize()
71 |         start_time = time.perf_counter()
72 | 
73 |         with torch.no_grad():
74 |             model(return_loss=False, rescale=True, **data)
75 | 
76 |         torch.cuda.synchronize()
77 |         elapsed = time.perf_counter() - start_time
78 | 
79 |         if i >= num_warmup:
80 |             pure_inf_time += elapsed
81 |             if (i + 1) % args.log_interval == 0:
82 |                 fps = (i + 1 - num_warmup) / pure_inf_time
83 |                 print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s')
84 | 
85 |         if (i + 1) == 2000:
86 |             pure_inf_time += elapsed
87 |             fps = (i + 1 - num_warmup) / pure_inf_time
88 |             print(f'Overall fps: {fps:.1f} img / s')
89 |             break
90 | 
91 | 
92 | if __name__ == '__main__':
93 |     main()
94 | 


--------------------------------------------------------------------------------
/mmdet/ops/corner_pool/corner_pool.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | from torch.autograd import Function
  3 | 
  4 | from . import corner_pool_ext
  5 | 
  6 | 
  7 | class TopPoolFunction(Function):
  8 | 
  9 |     @staticmethod
 10 |     def forward(ctx, input):
 11 |         output = corner_pool_ext.top_pool_forward(input)
 12 |         ctx.save_for_backward(input)
 13 |         return output
 14 | 
 15 |     @staticmethod
 16 |     def backward(ctx, grad_output):
 17 |         input = ctx.saved_variables[0]
 18 |         output = corner_pool_ext.top_pool_backward(input, grad_output)
 19 |         return output
 20 | 
 21 | 
 22 | class BottomPoolFunction(Function):
 23 | 
 24 |     @staticmethod
 25 |     def forward(ctx, input):
 26 |         output = corner_pool_ext.bottom_pool_forward(input)
 27 |         ctx.save_for_backward(input)
 28 |         return output
 29 | 
 30 |     @staticmethod
 31 |     def backward(ctx, grad_output):
 32 |         input = ctx.saved_variables[0]
 33 |         output = corner_pool_ext.bottom_pool_backward(input, grad_output)
 34 |         return output
 35 | 
 36 | 
 37 | class LeftPoolFunction(Function):
 38 | 
 39 |     @staticmethod
 40 |     def forward(ctx, input):
 41 |         output = corner_pool_ext.left_pool_forward(input)
 42 |         ctx.save_for_backward(input)
 43 |         return output
 44 | 
 45 |     @staticmethod
 46 |     def backward(ctx, grad_output):
 47 |         input = ctx.saved_variables[0]
 48 |         output = corner_pool_ext.left_pool_backward(input, grad_output)
 49 |         return output
 50 | 
 51 | 
 52 | class RightPoolFunction(Function):
 53 | 
 54 |     @staticmethod
 55 |     def forward(ctx, input):
 56 |         output = corner_pool_ext.right_pool_forward(input)
 57 |         ctx.save_for_backward(input)
 58 |         return output
 59 | 
 60 |     @staticmethod
 61 |     def backward(ctx, grad_output):
 62 |         input = ctx.saved_variables[0]
 63 |         output = corner_pool_ext.right_pool_backward(input, grad_output)
 64 |         return output
 65 | 
 66 | 
 67 | class CornerPool(nn.Module):
 68 |     """Corner Pooling.
 69 | 
 70 |     Corner Pooling is a new type of pooling layer that helps a
 71 |     convolutional network better localize corners of bounding boxes.
 72 | 
 73 |     Please refer to https://arxiv.org/abs/1808.01244 for more details.
 74 |     Code is modified from https://github.com/princeton-vl/CornerNet-Lite.
 75 | 
 76 |     Args:
 77 |         mode(str): Pooling orientation for the pooling layer
 78 | 
 79 |             - 'bottom': Bottom Pooling
 80 |             - 'left': Left Pooling
 81 |             - 'right': Right Pooling
 82 |             - 'top': Top Pooling
 83 | 
 84 |     Returns:
 85 |         Feature map after pooling.
 86 |     """
 87 | 
 88 |     pool_functions = {
 89 |         'bottom': BottomPoolFunction,
 90 |         'left': LeftPoolFunction,
 91 |         'right': RightPoolFunction,
 92 |         'top': TopPoolFunction,
 93 |     }
 94 | 
 95 |     def __init__(self, mode):
 96 |         super(CornerPool, self).__init__()
 97 |         assert mode in self.pool_functions
 98 |         self.corner_pool = self.pool_functions[mode]
 99 | 
100 |     def forward(self, x):
101 |         return self.corner_pool.apply(x)
102 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/src/deform_pool_ext.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
 3 | 
 4 | // based on
 5 | // author: Charles Shang
 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
 7 | 
 8 | #include <torch/extension.h>
 9 | #include <ATen/DeviceGuard.h>
10 | 
11 | #include <cmath>
12 | #include <vector>
13 | 
14 | #ifdef WITH_CUDA
15 | void deform_psroi_pooling_cuda_forward(
16 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
17 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
18 |     const int output_dim, const int group_size, const int pooled_size,
19 |     const int part_size, const int sample_per_part, const float trans_std);
20 | 
21 | void deform_psroi_pooling_cuda_backward(
22 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
23 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
24 |     const int no_trans, const float spatial_scale, const int output_dim,
25 |     const int group_size, const int pooled_size, const int part_size,
26 |     const int sample_per_part, const float trans_std);
27 | #endif
28 | 
29 | void deform_psroi_pooling_forward(
30 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
31 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
32 |     const int output_dim, const int group_size, const int pooled_size,
33 |     const int part_size, const int sample_per_part, const float trans_std) {
34 |   if (input.device().is_cuda()) {
35 | #ifdef WITH_CUDA
36 |     return deform_psroi_pooling_cuda_forward(input, bbox, trans, out, top_count,
37 |         no_trans, spatial_scale, output_dim, group_size, pooled_size,
38 |         part_size, sample_per_part, trans_std);
39 | #else
40 |     AT_ERROR("deform psroi pooling is not compiled with GPU support");
41 | #endif
42 |   }
43 |   AT_ERROR("deform psroi pooling is not implemented on CPU");
44 | }
45 | 
46 | void deform_psroi_pooling_backward(
47 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
48 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
49 |     const int no_trans, const float spatial_scale, const int output_dim,
50 |     const int group_size, const int pooled_size, const int part_size,
51 |     const int sample_per_part, const float trans_std) {
52 |   if (input.device().is_cuda()) {
53 | #ifdef WITH_CUDA
54 |     return deform_psroi_pooling_cuda_backward(out_grad, input, bbox, trans,
55 |         top_count, input_grad, trans_grad, no_trans, spatial_scale,
56 |         output_dim, group_size, pooled_size, part_size, sample_per_part,
57 |         trans_std);
58 | #else
59 |     AT_ERROR("deform psroi pooling is not compiled with GPU support");
60 | #endif
61 |   }
62 |   AT_ERROR("deform psroi pooling is not implemented on CPU");
63 | }
64 | 
65 | 
66 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
67 |   m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward,
68 |         "deform psroi pooling forward");
69 |   m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward,
70 |         "deform psroi pooling backward");
71 | }
72 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmdet.core import force_fp32
 4 | from mmdet.models.builder import ROI_EXTRACTORS
 5 | from .base_roi_extractor import BaseRoIExtractor
 6 | 
 7 | 
 8 | @ROI_EXTRACTORS.register_module()
 9 | class SingleRoIExtractor(BaseRoIExtractor):
10 |     """Extract RoI features from a single level feature map.
11 | 
12 |     If there are multiple input feature levels, each RoI is mapped to a level
13 |     according to its scale. The mapping rule is proposed in
14 |     `FPN <https://arxiv.org/abs/1612.03144>`_.
15 | 
16 |     Args:
17 |         roi_layer (dict): Specify RoI layer type and arguments.
18 |         out_channels (int): Output channels of RoI layers.
19 |         featmap_strides (int): Strides of input feature maps.
20 |         finest_scale (int): Scale threshold of mapping to level 0. Default: 56.
21 |     """
22 | 
23 |     def __init__(self,
24 |                  roi_layer,
25 |                  out_channels,
26 |                  featmap_strides,
27 |                  finest_scale=56):
28 |         super(SingleRoIExtractor, self).__init__(roi_layer, out_channels,
29 |                                                  featmap_strides)
30 |         self.finest_scale = finest_scale
31 | 
32 |     def map_roi_levels(self, rois, num_levels):
33 |         """Map rois to corresponding feature levels by scales.
34 | 
35 |         - scale < finest_scale * 2: level 0
36 |         - finest_scale * 2 <= scale < finest_scale * 4: level 1
37 |         - finest_scale * 4 <= scale < finest_scale * 8: level 2
38 |         - scale >= finest_scale * 8: level 3
39 | 
40 |         Args:
41 |             rois (Tensor): Input RoIs, shape (k, 5).
42 |             num_levels (int): Total level number.
43 | 
44 |         Returns:
45 |             Tensor: Level index (0-based) of each RoI, shape (k, )
46 |         """
47 |         scale = torch.sqrt(
48 |             (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]))
49 |         target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6))
50 |         target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()
51 |         return target_lvls
52 | 
53 |     @force_fp32(apply_to=('feats', ), out_fp16=True)
54 |     def forward(self, feats, rois, roi_scale_factor=None):
55 |         """Forward function"""
56 |         out_size = self.roi_layers[0].out_size
57 |         num_levels = len(feats)
58 |         roi_feats = feats[0].new_zeros(
59 |             rois.size(0), self.out_channels, *out_size)
60 | 
61 |         if num_levels == 1:
62 |             if len(rois) == 0:
63 |                 return roi_feats
64 |             return self.roi_layers[0](feats[0], rois)
65 | 
66 |         target_lvls = self.map_roi_levels(rois, num_levels)
67 |         if roi_scale_factor is not None:
68 |             rois = self.roi_rescale(rois, roi_scale_factor)
69 |         for i in range(num_levels):
70 |             inds = target_lvls == i
71 |             if inds.any():
72 |                 rois_ = rois[inds, :]
73 |                 roi_feats_t = self.roi_layers[i](feats[i], rois_)
74 |                 #print(feats[i].shape, rois_.shape, rois_[0])
75 |                 roi_feats[inds] = roi_feats_t
76 |             else:
77 |                 roi_feats += sum(x.view(-1)[0] for x in self.parameters()) * 0.
78 |         return roi_feats
79 | 


--------------------------------------------------------------------------------
/tools/regnet2mmdet.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from collections import OrderedDict
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def convert_stem(model_key, model_weight, state_dict, converted_names):
 8 |     new_key = model_key.replace('stem.conv', 'conv1')
 9 |     new_key = new_key.replace('stem.bn', 'bn1')
10 |     state_dict[new_key] = model_weight
11 |     converted_names.add(model_key)
12 |     print(f'Convert {model_key} to {new_key}')
13 | 
14 | 
15 | def convert_head(model_key, model_weight, state_dict, converted_names):
16 |     new_key = model_key.replace('head.fc', 'fc')
17 |     state_dict[new_key] = model_weight
18 |     converted_names.add(model_key)
19 |     print(f'Convert {model_key} to {new_key}')
20 | 
21 | 
22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names):
23 |     split_keys = model_key.split('.')
24 |     layer, block, module = split_keys[:3]
25 |     block_id = int(block[1:])
26 |     layer_name = f'layer{int(layer[1:])}'
27 |     block_name = f'{block_id - 1}'
28 | 
29 |     if block_id == 1 and module == 'bn':
30 |         new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}'
31 |     elif block_id == 1 and module == 'proj':
32 |         new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}'
33 |     elif module == 'f':
34 |         if split_keys[3] == 'a_bn':
35 |             module_name = 'bn1'
36 |         elif split_keys[3] == 'b_bn':
37 |             module_name = 'bn2'
38 |         elif split_keys[3] == 'c_bn':
39 |             module_name = 'bn3'
40 |         elif split_keys[3] == 'a':
41 |             module_name = 'conv1'
42 |         elif split_keys[3] == 'b':
43 |             module_name = 'conv2'
44 |         elif split_keys[3] == 'c':
45 |             module_name = 'conv3'
46 |         new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}'
47 |     else:
48 |         raise ValueError(f'Unsupported conversion of key {model_key}')
49 |     print(f'Convert {model_key} to {new_key}')
50 |     state_dict[new_key] = model_weight
51 |     converted_names.add(model_key)
52 | 
53 | 
54 | def convert(src, dst):
55 |     """Convert keys in pycls pretrained RegNet models to mmdet style."""
56 |     # load caffe model
57 |     regnet_model = torch.load(src)
58 |     blobs = regnet_model['model_state']
59 |     # convert to pytorch style
60 |     state_dict = OrderedDict()
61 |     converted_names = set()
62 |     for key, weight in blobs.items():
63 |         if 'stem' in key:
64 |             convert_stem(key, weight, state_dict, converted_names)
65 |         elif 'head' in key:
66 |             convert_head(key, weight, state_dict, converted_names)
67 |         elif key.startswith('s'):
68 |             convert_reslayer(key, weight, state_dict, converted_names)
69 | 
70 |     # check if all layers are converted
71 |     for key in blobs:
72 |         if key not in converted_names:
73 |             print(f'not converted: {key}')
74 |     # save checkpoint
75 |     checkpoint = dict()
76 |     checkpoint['state_dict'] = state_dict
77 |     torch.save(checkpoint, dst)
78 | 
79 | 
80 | def main():
81 |     parser = argparse.ArgumentParser(description='Convert model keys')
82 |     parser.add_argument('src', help='src detectron model path')
83 |     parser.add_argument('dst', help='save path')
84 |     args = parser.parse_args()
85 |     convert(args.src, args.dst)
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     main()
90 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/utils.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def reduce_loss(loss, reduction):
 7 |     """Reduce loss as specified.
 8 | 
 9 |     Args:
10 |         loss (Tensor): Elementwise loss tensor.
11 |         reduction (str): Options are "none", "mean" and "sum".
12 | 
13 |     Return:
14 |         Tensor: Reduced loss tensor.
15 |     """
16 |     reduction_enum = F._Reduction.get_enum(reduction)
17 |     # none: 0, elementwise_mean:1, sum: 2
18 |     if reduction_enum == 0:
19 |         return loss
20 |     elif reduction_enum == 1:
21 |         return loss.mean()
22 |     elif reduction_enum == 2:
23 |         return loss.sum()
24 | 
25 | 
26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
27 |     """Apply element-wise weight and reduce loss.
28 | 
29 |     Args:
30 |         loss (Tensor): Element-wise loss.
31 |         weight (Tensor): Element-wise weights.
32 |         reduction (str): Same as built-in losses of PyTorch.
33 |         avg_factor (float): Avarage factor when computing the mean of losses.
34 | 
35 |     Returns:
36 |         Tensor: Processed loss values.
37 |     """
38 |     # if weight is specified, apply element-wise weight
39 |     if weight is not None:
40 |         loss = loss * weight
41 | 
42 |     # if avg_factor is not specified, just reduce the loss
43 |     if avg_factor is None:
44 |         loss = reduce_loss(loss, reduction)
45 |     else:
46 |         # if reduction is mean, then average the loss by avg_factor
47 |         if reduction == 'mean':
48 |             loss = loss.sum() / avg_factor
49 |         # if reduction is 'none', then do nothing, otherwise raise an error
50 |         elif reduction != 'none':
51 |             raise ValueError('avg_factor can not be used with reduction="sum"')
52 |     return loss
53 | 
54 | 
55 | def weighted_loss(loss_func):
56 |     """Create a weighted version of a given loss function.
57 | 
58 |     To use this decorator, the loss function must have the signature like
59 |     `loss_func(pred, target, **kwargs)`. The function only needs to compute
60 |     element-wise loss without any reduction. This decorator will add weight
61 |     and reduction arguments to the function. The decorated function will have
62 |     the signature like `loss_func(pred, target, weight=None, reduction='mean',
63 |     avg_factor=None, **kwargs)`.
64 | 
65 |     :Example:
66 | 
67 |     >>> import torch
68 |     >>> @weighted_loss
69 |     >>> def l1_loss(pred, target):
70 |     >>>     return (pred - target).abs()
71 | 
72 |     >>> pred = torch.Tensor([0, 2, 3])
73 |     >>> target = torch.Tensor([1, 1, 1])
74 |     >>> weight = torch.Tensor([1, 0, 1])
75 | 
76 |     >>> l1_loss(pred, target)
77 |     tensor(1.3333)
78 |     >>> l1_loss(pred, target, weight)
79 |     tensor(1.)
80 |     >>> l1_loss(pred, target, reduction='none')
81 |     tensor([1., 1., 2.])
82 |     >>> l1_loss(pred, target, weight, avg_factor=2)
83 |     tensor(1.5000)
84 |     """
85 | 
86 |     @functools.wraps(loss_func)
87 |     def wrapper(pred,
88 |                 target,
89 |                 weight=None,
90 |                 reduction='mean',
91 |                 avg_factor=None,
92 |                 **kwargs):
93 |         # get element-wise loss
94 |         loss = loss_func(pred, target, **kwargs)
95 |         loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
96 |         return loss
97 | 
98 |     return wrapper
99 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/roi_extractors/generic_roi_extractor.py:
--------------------------------------------------------------------------------
 1 | from mmdet.core import force_fp32
 2 | from mmdet.models.builder import ROI_EXTRACTORS
 3 | from mmdet.ops.plugin import build_plugin_layer
 4 | from .base_roi_extractor import BaseRoIExtractor
 5 | 
 6 | 
 7 | @ROI_EXTRACTORS.register_module()
 8 | class GenericRoIExtractor(BaseRoIExtractor):
 9 |     """Extract RoI features from all level feature maps levels.
10 | 
11 |     This is the implementation of `A novel Region of Interest Extraction Layer
12 |     for Instance Segmentation <https://arxiv.org/abs/2004.13665>`_.
13 | 
14 |     Args:
15 |         aggregation (str): The method to aggregate multiple feature maps.
16 |             Options are 'sum', 'concat'. Default: 'sum'.
17 |         pre_cfg (dict|None): Specify pre-processing modules. Default: None.
18 |         post_cfg (dict|None): Specify post-processing modules. Default: None.
19 |         kwargs (keyword arguments): Arguments that are the same
20 |             as :class:`BaseRoIExtractor`.
21 |     """
22 | 
23 |     def __init__(self,
24 |                  aggregation='sum',
25 |                  pre_cfg=None,
26 |                  post_cfg=None,
27 |                  **kwargs):
28 |         super(GenericRoIExtractor, self).__init__(**kwargs)
29 | 
30 |         assert aggregation in ['sum', 'concat']
31 | 
32 |         self.aggregation = aggregation
33 |         self.with_post = post_cfg is not None
34 |         self.with_pre = pre_cfg is not None
35 |         # build pre/post processing modules
36 |         if self.with_post:
37 |             self.post_module = build_plugin_layer(post_cfg, '_post_module')[1]
38 |         if self.with_pre:
39 |             self.pre_module = build_plugin_layer(pre_cfg, '_pre_module')[1]
40 | 
41 |     @force_fp32(apply_to=('feats', ), out_fp16=True)
42 |     def forward(self, feats, rois, roi_scale_factor=None):
43 |         """Forward function"""
44 |         if len(feats) == 1:
45 |             return self.roi_layers[0](feats[0], rois)
46 | 
47 |         out_size = self.roi_layers[0].out_size
48 |         num_levels = len(feats)
49 |         roi_feats = feats[0].new_zeros(
50 |             rois.size(0), self.out_channels, *out_size)
51 | 
52 |         # some times rois is an empty tensor
53 |         if roi_feats.shape[0] == 0:
54 |             return roi_feats
55 | 
56 |         if roi_scale_factor is not None:
57 |             rois = self.roi_rescale(rois, roi_scale_factor)
58 | 
59 |         # mark the starting channels for concat mode
60 |         start_channels = 0
61 |         for i in range(num_levels):
62 |             roi_feats_t = self.roi_layers[i](feats[i], rois)
63 |             end_channels = start_channels + roi_feats_t.size(1)
64 |             if self.with_pre:
65 |                 # apply pre-processing to a RoI extracted from each layer
66 |                 roi_feats_t = self.pre_module(roi_feats_t)
67 |             if self.aggregation == 'sum':
68 |                 # and sum them all
69 |                 roi_feats += roi_feats_t
70 |             else:
71 |                 # and concat them along channel dimension
72 |                 roi_feats[:, start_channels:end_channels] = roi_feats_t
73 |             # update channels starting position
74 |             start_channels = end_channels
75 |         # check if concat channels match at the end
76 |         if self.aggregation == 'concat':
77 |             assert start_channels == self.out_channels
78 | 
79 |         if self.with_post:
80 |             # apply post-processing before return the result
81 |             roi_feats = self.post_module(roi_feats)
82 |         return roi_feats
83 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/gaussian_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from ..builder import LOSSES
 4 | from .utils import weighted_loss
 5 | 
 6 | 
 7 | @weighted_loss
 8 | def gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0):
 9 |     """`Focal Loss <https://arxiv.org/abs/1708.02002>`_ for targets in
10 |     gaussian distribution.
11 | 
12 |     Args:
13 |         pred (torch.Tensor): The prediction.
14 |         gaussian_target (torch.Tensor): The learning target of the prediction
15 |             in gaussian distribution.
16 |         alpha (float, optional): A balanced form for Focal Loss.
17 |             Defaults to 2.0.
18 |         gamma (float, optional): The gamma for calculating the modulating
19 |             factor. Defaults to 4.0.
20 |     """
21 |     eps = 1e-12
22 |     pos_weights = gaussian_target.eq(1)
23 |     neg_weights = (1 - gaussian_target).pow(gamma)
24 |     pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights
25 |     neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights
26 |     return pos_loss + neg_loss
27 | 
28 | 
29 | @LOSSES.register_module()
30 | class GaussianFocalLoss(nn.Module):
31 |     """GaussianFocalLoss is a variant of focal loss.
32 | 
33 |     More details can be found in the `paper
34 |     <https://arxiv.org/abs/1808.01244>`_
35 |     Code is modified from `kp_utils.py
36 |     <https://github.com/princeton-vl/CornerNet/blob/master/models/py_utils/kp_utils.py#L152>`_  # noqa: E501
37 |     Please notice that the target in GaussianFocalLoss is a gaussian heatmap,
38 |     not 0/1 binary target.
39 | 
40 |     Args:
41 |         alpha (float): Power of prediction.
42 |         gamma (float): Power of target for negtive samples.
43 |         reduction (str): Options are "none", "mean" and "sum".
44 |         loss_weight (float): Loss weight of current loss.
45 |     """
46 | 
47 |     def __init__(self,
48 |                  alpha=2.0,
49 |                  gamma=4.0,
50 |                  reduction='mean',
51 |                  loss_weight=1.0):
52 |         super(GaussianFocalLoss, self).__init__()
53 |         self.alpha = alpha
54 |         self.gamma = gamma
55 |         self.reduction = reduction
56 |         self.loss_weight = loss_weight
57 | 
58 |     def forward(self,
59 |                 pred,
60 |                 target,
61 |                 weight=None,
62 |                 avg_factor=None,
63 |                 reduction_override=None):
64 |         """Forward function
65 | 
66 |         Args:
67 |             pred (torch.Tensor): The prediction.
68 |             target (torch.Tensor): The learning target of the prediction
69 |                 in gaussian distribution.
70 |             weight (torch.Tensor, optional): The weight of loss for each
71 |                 prediction. Defaults to None.
72 |             avg_factor (int, optional): Average factor that is used to average
73 |                 the loss. Defaults to None.
74 |             reduction_override (str, optional): The reduction method used to
75 |                 override the original reduction method of the loss.
76 |                 Defaults to None.
77 |         """
78 |         assert reduction_override in (None, 'none', 'mean', 'sum')
79 |         reduction = (
80 |             reduction_override if reduction_override else self.reduction)
81 |         loss_reg = self.loss_weight * gaussian_focal_loss(
82 |             pred,
83 |             target,
84 |             weight,
85 |             alpha=self.alpha,
86 |             gamma=self.gamma,
87 |             reduction=reduction,
88 |             avg_factor=avg_factor)
89 |         return loss_reg
90 | 


--------------------------------------------------------------------------------
/conda_list.txt:
--------------------------------------------------------------------------------
 1 | # Name                    Version                   Build  Channel
 2 | _libgcc_mutex             0.1                        main  
 3 | addict                    2.2.1                    pypi_0    pypi
 4 | blas                      1.0                         mkl  
 5 | ca-certificates           2020.1.1                      0  
 6 | certifi                   2020.6.20                py37_0  
 7 | cudatoolkit               10.1.243             h6bb024c_0  
 8 | cycler                    0.10.0                   pypi_0    pypi
 9 | cython                    3.0a5                    pypi_0    pypi
10 | freetype                  2.10.2               h5ab3b9f_0  
11 | future                    0.18.2                   pypi_0    pypi
12 | intel-openmp              2020.1                      217  
13 | jpeg                      9b                   h024ee3a_2  
14 | kiwisolver                1.2.0                    pypi_0    pypi
15 | ld_impl_linux-64          2.33.1               h53a641e_7  
16 | libedit                   3.1.20191231         h7b6447c_0  
17 | libffi                    3.3                  he6710b0_1  
18 | libgcc-ng                 9.1.0                hdf63c60_0  
19 | libgfortran-ng            7.3.0                hdf63c60_0  
20 | libpng                    1.6.37               hbc83047_0  
21 | libstdcxx-ng              9.1.0                hdf63c60_0  
22 | libtiff                   4.1.0                h2733197_1  
23 | lz4-c                     1.9.2                he6710b0_0  
24 | matplotlib                3.3.0rc1                 pypi_0    pypi
25 | mkl                       2020.1                      217  
26 | mkl-service               2.3.0            py37he904b0f_0  
27 | mkl_fft                   1.1.0            py37h23d657b_0  
28 | mkl_random                1.1.1            py37h0573a6f_0  
29 | mmcv                      0.6.2                    pypi_0    pypi
30 | mmdet                     2.1.0+4a8f083             dev_0    <develop>
31 | ncurses                   6.2                  he6710b0_1  
32 | ninja                     1.9.0            py37hfd86e86_0  
33 | numpy                     1.18.5           py37ha1c710e_0  
34 | numpy-base                1.18.5           py37hde5b4d6_0  
35 | olefile                   0.46                     py37_0  
36 | opencv-python             4.2.0.34                 pypi_0    pypi
37 | openssl                   1.1.1g               h7b6447c_0  
38 | pillow                    6.2.2                    pypi_0    pypi
39 | pip                       20.1.1                   py37_1  
40 | pycocotools               12.0                     pypi_0    pypi
41 | pyparsing                 3.0.0a1                  pypi_0    pypi
42 | python                    3.7.7                hcff3b4d_5  
43 | python-dateutil           2.8.1                    pypi_0    pypi
44 | pytorch                   1.5.1           py3.7_cuda10.1.243_cudnn7.6.3_0    pytorch
45 | readline                  8.0                  h7b6447c_0  
46 | setuptools                47.3.1                   py37_0  
47 | six                       1.15.0                     py_0  
48 | sqlite                    3.32.3               h62c20be_0  
49 | tk                        8.6.10               hbc83047_0  
50 | torchvision               0.6.1                py37_cu101    pytorch
51 | wheel                     0.34.2                   py37_0  
52 | xz                        5.2.5                h7b6447c_0  
53 | yapf                      0.30.0                   pypi_0    pypi
54 | zlib                      1.2.11               h7b6447c_3  
55 | zstd                      1.4.4                h0b5b093_3


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/base_roi_head.py:
--------------------------------------------------------------------------------
  1 | from abc import ABCMeta, abstractmethod
  2 | 
  3 | import torch.nn as nn
  4 | 
  5 | from ..builder import build_shared_head
  6 | 
  7 | 
  8 | class BaseRoIHead(nn.Module, metaclass=ABCMeta):
  9 |     """Base class for RoIHeads"""
 10 | 
 11 |     def __init__(self,
 12 |                  bbox_roi_extractor=None,
 13 |                  bbox_head=None,
 14 |                  mask_roi_extractor=None,
 15 |                  mask_head=None,
 16 |                  shared_head=None,
 17 |                  train_cfg=None,
 18 |                  test_cfg=None):
 19 |         super(BaseRoIHead, self).__init__()
 20 |         self.train_cfg = train_cfg
 21 |         self.test_cfg = test_cfg
 22 |         if shared_head is not None:
 23 |             self.shared_head = build_shared_head(shared_head)
 24 | 
 25 |         if bbox_head is not None:
 26 |             self.init_bbox_head(bbox_roi_extractor, bbox_head)
 27 | 
 28 |         if mask_head is not None:
 29 |             self.init_mask_head(mask_roi_extractor, mask_head)
 30 | 
 31 |         self.init_assigner_sampler()
 32 | 
 33 |     @property
 34 |     def with_bbox(self):
 35 |         """bool: whether the RoI head contains a `bbox_head`"""
 36 |         return hasattr(self, 'bbox_head') and self.bbox_head is not None
 37 | 
 38 |     @property
 39 |     def with_mask(self):
 40 |         """bool: whether the RoI head contains a `mask_head`"""
 41 |         return hasattr(self, 'mask_head') and self.mask_head is not None
 42 | 
 43 |     @property
 44 |     def with_shared_head(self):
 45 |         """bool: whether the RoI head contains a `shared_head`"""
 46 |         return hasattr(self, 'shared_head') and self.shared_head is not None
 47 | 
 48 |     @abstractmethod
 49 |     def init_weights(self, pretrained):
 50 |         """Initialize the weights in head
 51 | 
 52 |         Args:
 53 |             pretrained (str, optional): Path to pre-trained weights.
 54 |                 Defaults to None.
 55 |         """
 56 |         pass
 57 | 
 58 |     @abstractmethod
 59 |     def init_bbox_head(self):
 60 |         """Initialize ``bbox_head``"""
 61 |         pass
 62 | 
 63 |     @abstractmethod
 64 |     def init_mask_head(self):
 65 |         """Initialize ``mask_head``"""
 66 |         pass
 67 | 
 68 |     @abstractmethod
 69 |     def init_assigner_sampler(self):
 70 |         """Initialize assigner and sampler"""
 71 |         pass
 72 | 
 73 |     @abstractmethod
 74 |     def forward_train(self,
 75 |                       x,
 76 |                       img_meta,
 77 |                       proposal_list,
 78 |                       gt_bboxes,
 79 |                       gt_labels,
 80 |                       gt_bboxes_ignore=None,
 81 |                       gt_masks=None,
 82 |                       **kwargs):
 83 |         """Forward function during training"""
 84 |         pass
 85 | 
 86 |     async def async_simple_test(self, x, img_meta, **kwargs):
 87 |         """Asynchronized test function"""
 88 |         raise NotImplementedError
 89 | 
 90 |     def simple_test(self,
 91 |                     x,
 92 |                     proposal_list,
 93 |                     img_meta,
 94 |                     proposals=None,
 95 |                     rescale=False,
 96 |                     **kwargs):
 97 |         """Test without augmentation."""
 98 |         pass
 99 | 
100 |     def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs):
101 |         """Test with augmentations.
102 | 
103 |         If rescale is False, then returned bboxes and masks will fit the scale
104 |         of imgs[0].
105 |         """
106 |         pass
107 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/mask_heads/coarse_mask_head.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from mmcv.cnn import ConvModule, constant_init, xavier_init
 3 | 
 4 | from mmdet.core import auto_fp16
 5 | from mmdet.models.builder import HEADS
 6 | from .fcn_mask_head import FCNMaskHead
 7 | 
 8 | 
 9 | @HEADS.register_module
10 | class CoarseMaskHead(FCNMaskHead):
11 |     """Coarse mask head used in PointRend.
12 | 
13 |     Compared with standard ``FCNMaskHead``, ``CoarseMaskHead`` will downsample
14 |     the input feature map instead of upsample it.
15 | 
16 |     Args:
17 |         num_convs (int): Number of conv layers in the head. Default: 0.
18 |         num_fcs (int): Number of fc layers in the head. Default: 2.
19 |         fc_out_channels (int): Number of output channels of fc layer.
20 |             Default: 1024.
21 |         downsample_factor (int): The factor that feature map is downsampled by.
22 |             Default: 2.
23 |     """
24 | 
25 |     def __init__(self,
26 |                  num_convs=0,
27 |                  num_fcs=2,
28 |                  fc_out_channels=1024,
29 |                  downsample_factor=2,
30 |                  *arg,
31 |                  **kwarg):
32 |         super(CoarseMaskHead, self).__init__(
33 |             *arg, num_convs=num_convs, upsample_cfg=dict(type=None), **kwarg)
34 |         self.num_fcs = num_fcs
35 |         assert self.num_fcs > 0
36 |         self.fc_out_channels = fc_out_channels
37 |         self.downsample_factor = downsample_factor
38 |         assert self.downsample_factor >= 1
39 |         # remove conv_logit
40 |         delattr(self, 'conv_logits')
41 | 
42 |         if downsample_factor > 1:
43 |             downsample_in_channels = (
44 |                 self.conv_out_channels
45 |                 if self.num_convs > 0 else self.in_channels)
46 |             self.downsample_conv = ConvModule(
47 |                 downsample_in_channels,
48 |                 self.conv_out_channels,
49 |                 kernel_size=downsample_factor,
50 |                 stride=downsample_factor,
51 |                 padding=0,
52 |                 conv_cfg=self.conv_cfg,
53 |                 norm_cfg=self.norm_cfg)
54 |         else:
55 |             self.downsample_conv = None
56 | 
57 |         self.output_size = (self.roi_feat_size[0] // downsample_factor,
58 |                             self.roi_feat_size[1] // downsample_factor)
59 |         self.output_area = self.output_size[0] * self.output_size[1]
60 | 
61 |         last_layer_dim = self.conv_out_channels * self.output_area
62 | 
63 |         self.fcs = nn.ModuleList()
64 |         for i in range(num_fcs):
65 |             fc_in_channels = (
66 |                 last_layer_dim if i == 0 else self.fc_out_channels)
67 |             self.fcs.append(nn.Linear(fc_in_channels, self.fc_out_channels))
68 |         last_layer_dim = self.fc_out_channels
69 |         output_channels = self.num_classes * self.output_area
70 |         self.fc_logits = nn.Linear(last_layer_dim, output_channels)
71 | 
72 |     def init_weights(self):
73 |         for m in self.fcs.modules():
74 |             if isinstance(m, nn.Linear):
75 |                 xavier_init(m)
76 |         constant_init(self.fc_logits, 0.001)
77 | 
78 |     @auto_fp16()
79 |     def forward(self, x):
80 |         for conv in self.convs:
81 |             x = conv(x)
82 | 
83 |         if self.downsample_conv is not None:
84 |             x = self.downsample_conv(x)
85 | 
86 |         x = x.flatten(1)
87 |         for fc in self.fcs:
88 |             x = self.relu(fc(x))
89 |         mask_pred = self.fc_logits(x).view(
90 |             x.size(0), self.num_classes, *self.output_size)
91 |         return mask_pred
92 | 


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/masked_conv.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.autograd import Function
 6 | from torch.autograd.function import once_differentiable
 7 | from torch.nn.modules.utils import _pair
 8 | 
 9 | from . import masked_conv2d_ext
10 | 
11 | 
12 | class MaskedConv2dFunction(Function):
13 | 
14 |     @staticmethod
15 |     def forward(ctx, features, mask, weight, bias, padding=0, stride=1):
16 |         assert mask.dim() == 3 and mask.size(0) == 1
17 |         assert features.dim() == 4 and features.size(0) == 1
18 |         assert features.size()[2:] == mask.size()[1:]
19 |         pad_h, pad_w = _pair(padding)
20 |         stride_h, stride_w = _pair(stride)
21 |         if stride_h != 1 or stride_w != 1:
22 |             raise ValueError(
23 |                 'Stride could not only be 1 in masked_conv2d currently.')
24 |         if not features.is_cuda:
25 |             raise NotImplementedError
26 | 
27 |         out_channel, in_channel, kernel_h, kernel_w = weight.size()
28 | 
29 |         batch_size = features.size(0)
30 |         out_h = int(
31 |             math.floor((features.size(2) + 2 * pad_h -
32 |                         (kernel_h - 1) - 1) / stride_h + 1))
33 |         out_w = int(
34 |             math.floor((features.size(3) + 2 * pad_w -
35 |                         (kernel_h - 1) - 1) / stride_w + 1))
36 |         mask_inds = torch.nonzero(mask[0] > 0, as_tuple=False)
37 |         output = features.new_zeros(batch_size, out_channel, out_h, out_w)
38 |         if mask_inds.numel() > 0:
39 |             mask_h_idx = mask_inds[:, 0].contiguous()
40 |             mask_w_idx = mask_inds[:, 1].contiguous()
41 |             data_col = features.new_zeros(in_channel * kernel_h * kernel_w,
42 |                                           mask_inds.size(0))
43 |             masked_conv2d_ext.masked_im2col_forward(features, mask_h_idx,
44 |                                                     mask_w_idx, kernel_h,
45 |                                                     kernel_w, pad_h, pad_w,
46 |                                                     data_col)
47 | 
48 |             masked_output = torch.addmm(1, bias[:, None], 1,
49 |                                         weight.view(out_channel, -1), data_col)
50 |             masked_conv2d_ext.masked_col2im_forward(masked_output, mask_h_idx,
51 |                                                     mask_w_idx, out_h, out_w,
52 |                                                     out_channel, output)
53 |         return output
54 | 
55 |     @staticmethod
56 |     @once_differentiable
57 |     def backward(ctx, grad_output):
58 |         return (None, ) * 5
59 | 
60 | 
61 | masked_conv2d = MaskedConv2dFunction.apply
62 | 
63 | 
64 | class MaskedConv2d(nn.Conv2d):
65 |     """A MaskedConv2d which inherits the official Conv2d.
66 | 
67 |     The masked forward doesn't implement the backward function and only
68 |     supports the stride parameter to be 1 currently.
69 |     """
70 | 
71 |     def __init__(self,
72 |                  in_channels,
73 |                  out_channels,
74 |                  kernel_size,
75 |                  stride=1,
76 |                  padding=0,
77 |                  dilation=1,
78 |                  groups=1,
79 |                  bias=True):
80 |         super(MaskedConv2d,
81 |               self).__init__(in_channels, out_channels, kernel_size, stride,
82 |                              padding, dilation, groups, bias)
83 | 
84 |     def forward(self, input, mask=None):
85 |         if mask is None:  # fallback to the normal Conv2d
86 |             return super(MaskedConv2d, self).forward(input)
87 |         else:
88 |             return masked_conv2d(input, mask, self.weight, self.bias,
89 |                                  self.padding)
90 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_heads/roi_extractors/sg_single_level_roi_extractor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from mmdet.core import force_fp32
 4 | from mmdet.models.builder import ROI_EXTRACTORS
 5 | from .base_roi_extractor import BaseRoIExtractor
 6 | 
 7 | 
 8 | @ROI_EXTRACTORS.register_module()
 9 | class SgSingleRoIExtractor(BaseRoIExtractor):
10 |     """Extract RoI features from a single level feature map.
11 | 
12 |     If there are multiple input feature levels, each RoI is mapped to a level
13 |     according to its scale. The mapping rule is proposed in
14 |     `FPN <https://arxiv.org/abs/1612.03144>`_.
15 | 
16 |     Args:
17 |         roi_layer (dict): Specify RoI layer type and arguments.
18 |         out_channels (int): Output channels of RoI layers.
19 |         featmap_strides (int): Strides of input feature maps.
20 |         finest_scale (int): Scale threshold of mapping to level 0. Default: 56.
21 |     """
22 | 
23 |     def __init__(self,
24 |                  roi_layer,
25 |                  out_channels,
26 |                  featmap_strides,
27 |                  finest_scale=56):
28 |         super(SgSingleRoIExtractor, self).__init__(roi_layer, out_channels,
29 |                                                  featmap_strides)
30 |         self.finest_scale = finest_scale
31 | 
32 |     def map_roi_levels(self, rois, num_levels):
33 |         """Map rois to corresponding feature levels by scales.
34 | 
35 |         - scale < finest_scale * 2: level 0
36 |         - finest_scale * 2 <= scale < finest_scale * 4: level 1
37 |         - finest_scale * 4 <= scale < finest_scale * 8: level 2
38 |         - scale >= finest_scale * 8: level 3
39 | 
40 |         Args:
41 |             rois (Tensor): Input RoIs, shape (k, 5).
42 |             num_levels (int): Total level number.
43 | 
44 |         Returns:
45 |             Tensor: Level index (0-based) of each RoI, shape (k, )
46 |         """
47 |         scale = torch.sqrt(
48 |             (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]))
49 |         target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6))
50 |         target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()
51 |         return target_lvls
52 | 
53 |     @force_fp32(apply_to=('feats', ), out_fp16=True)
54 |     def forward(self, feats, rois, roi_scale_factor=None, masks=None):
55 |         """Forward function"""
56 |         out_size = self.roi_layers[0].out_size
57 |         num_levels = len(feats)
58 |         roi_feats = feats[0].new_zeros(
59 |             rois.size(0), self.out_channels, *out_size)
60 | 
61 |         if masks is not None:
62 |             resized_masks = nn.functional.adaptive_avg_pool2d(masks, roi_feats.shape[-2:])
63 | 
64 | 
65 |         if num_levels == 1:
66 |             if len(rois) == 0:
67 |                 return roi_feats
68 |             if masks is None:
69 |                 return self.roi_layers[0](feats[0], rois)
70 |             else:
71 |                 return self.roi_layers[0](feats[0], rois) * (resized_masks + 1.0)
72 | 
73 |         target_lvls = self.map_roi_levels(rois, num_levels)
74 |         if roi_scale_factor is not None:
75 |             rois = self.roi_rescale(rois, roi_scale_factor)
76 |         for i in range(num_levels):
77 |             inds = target_lvls == i
78 |             if inds.any():
79 |                 rois_ = rois[inds, :]
80 |                 roi_feats_t = self.roi_layers[i](feats[i], rois_)
81 |                 if masks is not None:
82 |                     resized_masks_t = resized_masks[inds]
83 |                     roi_feats_t = roi_feats_t * (resized_masks_t + 1.0)
84 |                 roi_feats[inds] = roi_feats_t
85 |             else:
86 |                 roi_feats += sum(x.view(-1)[0] for x in self.parameters()) * 0.
87 |         return roi_feats
88 | 


--------------------------------------------------------------------------------