├── cls_head_models ├── __init__.py ├── 2fc.py ├── simple2fc.py └── simple3fc.py ├── instaboost ├── .svn │ ├── entries │ ├── format │ ├── wc.db-journal │ ├── wc.db │ └── pristine │ │ ├── c7 │ │ └── c701008e17212a7a6aa964551bf957bba1c3fb95.svn-base │ │ ├── aa │ │ └── aa94ef40fd3d99558c5026f2eaf05a380206b80e.svn-base │ │ └── 9a │ │ └── 9a5e7d16f998e55c36ca225a6b41887910f19387.svn-base ├── __init__.py ├── exceptions.py └── config.py ├── htc.png ├── demo ├── demo.jpg ├── 000000125100.jpg ├── 000000125106.jpg ├── 000000125107.jpg ├── 000000125109.jpg ├── 000000125110.jpg ├── 000000412510.jpg ├── coco_test_12510.jpg ├── corruptions_sev_3.png ├── demo.py └── webcam_demo.py ├── mrcnn-lvis.png ├── mrcnn_cocolt.png ├── is_crowd_id_val.pt ├── lvis_val_cats_info.pt ├── lvis_train_cate_info.pt ├── lvis_train_cats_info.pt ├── mmdet ├── datasets │ ├── registry.py │ ├── loader │ │ ├── __init__.py │ │ └── build_loader.py │ ├── cityscapes.py │ ├── voc.py │ ├── __init__.py │ ├── builder.py │ ├── wider_face.py │ ├── dataset_wrappers.py │ └── utils.py ├── models │ ├── shared_heads │ │ ├── __init__.py │ │ └── res_layer.py │ ├── roi_extractors │ │ └── __init__.py │ ├── necks │ │ └── __init__.py │ ├── plugins │ │ └── __init__.py │ ├── backbones │ │ └── __init__.py │ ├── bbox_heads │ │ └── __init__.py │ ├── registry.py │ ├── utils │ │ ├── scale.py │ │ ├── __init__.py │ │ ├── conv_ws.py │ │ ├── weight_init.py │ │ └── norm.py │ ├── mask_heads │ │ ├── __init__.py │ │ └── htc_mask_head.py │ ├── anchor_heads │ │ ├── __init__.py │ │ └── retina_head.py │ ├── detectors │ │ ├── fcos.py │ │ ├── retinanet.py │ │ ├── faster_rcnn.py │ │ ├── __init__.py │ │ ├── fast_rcnn.py │ │ ├── mask_rcnn.py │ │ └── single_stage.py │ ├── losses │ │ ├── mse_loss.py │ │ ├── accuracy.py │ │ ├── __init__.py │ │ ├── smooth_l1_loss.py │ │ ├── balanced_l1_loss.py │ │ └── focal_loss.py │ ├── __init__.py │ └── builder.py ├── ops │ ├── nms │ │ ├── __init__.py │ │ ├── src │ │ │ ├── nms_cuda.cpp │ │ │ └── nms_cpu.cpp │ │ └── nms_wrapper.py │ ├── roi_pool │ │ ├── __init__.py │ │ ├── gradcheck.py │ │ └── roi_pool.py │ ├── roi_align │ │ ├── __init__.py │ │ └── gradcheck.py │ ├── masked_conv │ │ ├── __init__.py │ │ └── src │ │ │ └── masked_conv2d_cuda.cpp │ ├── sigmoid_focal_loss │ │ ├── __init__.py │ │ ├── sigmoid_focal_loss.py │ │ └── src │ │ │ └── sigmoid_focal_loss.cpp │ ├── dcn │ │ └── __init__.py │ └── __init__.py ├── __init__.py ├── utils │ ├── __init__.py │ └── registry.py ├── core │ ├── mask │ │ ├── __init__.py │ │ ├── utils.py │ │ └── mask_target.py │ ├── fp16 │ │ ├── __init__.py │ │ └── utils.py │ ├── utils │ │ ├── __init__.py │ │ ├── misc.py │ │ └── dist_utils.py │ ├── bbox │ │ ├── assigners │ │ │ ├── base_assigner.py │ │ │ ├── __init__.py │ │ │ └── assign_result.py │ │ ├── samplers │ │ │ ├── combined_sampler.py │ │ │ ├── __init__.py │ │ │ ├── sampling_result.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── random_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ └── base_sampler.py │ │ ├── __init__.py │ │ ├── assign_sampling.py │ │ └── geometry.py │ ├── __init__.py │ ├── anchor │ │ └── __init__.py │ ├── post_processing │ │ ├── __init__.py │ │ └── bbox_nms.py │ └── evaluation │ │ ├── __init__.py.bk │ │ ├── __init__.py │ │ └── bbox_overlaps.py └── apis │ ├── __init__.py │ └── env.py ├── cls_id_683_top_sim_classes.pt ├── class_to_imageid_and_inscount.pt ├── lvis_api ├── .gitignore ├── requirements.txt ├── lvis │ └── __init__.py ├── test.py ├── setup.py ├── LICENSE └── README.md ├── class_to_imageid_and_inscount_new.pt ├── class_to_imageid_and_inscount_val.pt ├── tools ├── stop_para_test.sh ├── dist_train.sh ├── dist_test.sh ├── dist_test_htc.sh ├── start_para_test_gpu4.sh ├── start_para_test_gpu8.sh ├── slurm_test.sh ├── slurm_train.sh ├── draw_cls_dist_coco.py ├── coco_eval.py ├── configs │ ├── empirical_attention │ │ └── README.md │ ├── scratch │ │ └── README.md │ ├── wider_face │ │ └── README.md │ ├── ghm │ │ └── README.md │ ├── grid_rcnn │ │ └── README.md │ ├── libra_rcnn │ │ └── README.md │ ├── gn │ │ └── README.md │ ├── ms_rcnn │ │ └── README.md │ ├── fcos │ │ └── README.md │ └── hrnet │ │ └── README.md ├── publish_model.py ├── draw_cls_dist_lvis.py ├── draw_pr_recall_bar_lviscoco.py ├── upgrade_model_version.py ├── draw_eAP_sensitivity_binnum.py ├── draw_ft_epoch_ablation.py ├── voc_eval.py ├── draw_comparison_head_design_choices.py └── draw_eAP_sensitivity_eap.py ├── Transparent_Martini_PNG_Clipart-621.png ├── zero_ap_classes_mrcnnr50_boxmask_ag.pt ├── class_to_imageid_and_inscount_val_new.pt ├── lvis_maskrcnn_r50fpn.pkl_per_cat_recall.pt ├── class_to_imageid_and_inscount_coco_sampled.pt ├── .style.yapf ├── exist_categories_in_val_ap_sorted_mrcnn_r101fpn.pt ├── exist_categories_in_val_ap_sorted_mrcnn_r50fpn_props_gt_label.pt ├── .github └── ISSUE_TEMPLATE │ ├── general_questions.md │ ├── feature_request.md │ └── error-report.md ├── .isort.cfg ├── .travis.yml ├── docker └── Dockerfile ├── install.sh ├── configs ├── empirical_attention │ └── README.md ├── pascal_voc │ └── README.md ├── scratch │ └── README.md ├── wider_face │ └── README.md ├── ghm │ └── README.md ├── grid_rcnn │ └── README.md ├── cityscapes │ └── README.md ├── libra_rcnn │ └── README.md ├── gn │ └── README.md ├── ms_rcnn │ └── README.md ├── fcos │ └── README.md └── hrnet │ └── README.md ├── CONTRIBUTING.md ├── ft_cal_epoch_ablation_for_drawing_compose.txt ├── ft_cal_epoch_ablation_for_drawing.txt ├── ft_cat_epoch_ablation_for_drawing.txt └── .gitignore /cls_head_models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /instaboost/.svn/entries: -------------------------------------------------------------------------------- 1 | 12 2 | -------------------------------------------------------------------------------- /instaboost/.svn/format: -------------------------------------------------------------------------------- 1 | 12 2 | -------------------------------------------------------------------------------- /instaboost/.svn/wc.db-journal: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /htc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/htc.png -------------------------------------------------------------------------------- /demo/demo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/demo.jpg -------------------------------------------------------------------------------- /mrcnn-lvis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/mrcnn-lvis.png -------------------------------------------------------------------------------- /mrcnn_cocolt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/mrcnn_cocolt.png -------------------------------------------------------------------------------- /is_crowd_id_val.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/is_crowd_id_val.pt -------------------------------------------------------------------------------- /demo/000000125100.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/000000125100.jpg -------------------------------------------------------------------------------- /demo/000000125106.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/000000125106.jpg -------------------------------------------------------------------------------- /demo/000000125107.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/000000125107.jpg -------------------------------------------------------------------------------- /demo/000000125109.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/000000125109.jpg -------------------------------------------------------------------------------- /demo/000000125110.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/000000125110.jpg -------------------------------------------------------------------------------- /demo/000000412510.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/000000412510.jpg -------------------------------------------------------------------------------- /instaboost/.svn/wc.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/instaboost/.svn/wc.db -------------------------------------------------------------------------------- /lvis_val_cats_info.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/lvis_val_cats_info.pt -------------------------------------------------------------------------------- /demo/coco_test_12510.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/coco_test_12510.jpg -------------------------------------------------------------------------------- /lvis_train_cate_info.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/lvis_train_cate_info.pt -------------------------------------------------------------------------------- /lvis_train_cats_info.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/lvis_train_cats_info.pt -------------------------------------------------------------------------------- /demo/corruptions_sev_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/corruptions_sev_3.png -------------------------------------------------------------------------------- /mmdet/datasets/registry.py: -------------------------------------------------------------------------------- 1 | from mmdet.utils import Registry 2 | 3 | DATASETS = Registry('dataset') 4 | -------------------------------------------------------------------------------- /cls_id_683_top_sim_classes.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/cls_id_683_top_sim_classes.pt -------------------------------------------------------------------------------- /mmdet/models/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ['ResLayer'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /class_to_imageid_and_inscount.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/class_to_imageid_and_inscount.pt -------------------------------------------------------------------------------- /lvis_api/.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | __pycache__ 3 | .DS_Store 4 | dist/* 5 | lvis.egg-info/ 6 | build/* 7 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_pool import RoIPool, roi_pool 2 | 3 | __all__ = ['roi_pool', 'RoIPool'] 4 | -------------------------------------------------------------------------------- /class_to_imageid_and_inscount_new.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/class_to_imageid_and_inscount_new.pt -------------------------------------------------------------------------------- /class_to_imageid_and_inscount_val.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/class_to_imageid_and_inscount_val.pt -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_align import RoIAlign, roi_align 2 | 3 | __all__ = ['roi_align', 'RoIAlign'] 4 | -------------------------------------------------------------------------------- /tools/stop_para_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for idx in 0 1 2 3 4 5 6 7 3 | do 4 | tmux kill-session -t "set$idx" 5 | done 6 | -------------------------------------------------------------------------------- /Transparent_Martini_PNG_Clipart-621.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/Transparent_Martini_PNG_Clipart-621.png -------------------------------------------------------------------------------- /mmdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .registry import Registry, build_from_cfg 2 | 3 | __all__ = ['Registry', 'build_from_cfg'] 4 | -------------------------------------------------------------------------------- /zero_ap_classes_mrcnnr50_boxmask_ag.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/zero_ap_classes_mrcnnr50_boxmask_ag.pt -------------------------------------------------------------------------------- /class_to_imageid_and_inscount_val_new.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/class_to_imageid_and_inscount_val_new.pt -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | 3 | __all__ = ['SingleRoIExtractor'] 4 | -------------------------------------------------------------------------------- /instaboost/__init__.py: -------------------------------------------------------------------------------- 1 | from .InstaBoost import * 2 | from .config import * 3 | 4 | __all__ = ['get_new_data', 'InstaBoostConfig'] 5 | -------------------------------------------------------------------------------- /lvis_maskrcnn_r50fpn.pkl_per_cat_recall.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/lvis_maskrcnn_r50fpn.pkl_per_cat_recall.pt -------------------------------------------------------------------------------- /class_to_imageid_and_inscount_coco_sampled.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/class_to_imageid_and_inscount_coco_sampled.pt -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/__init__.py: -------------------------------------------------------------------------------- 1 | from .masked_conv import MaskedConv2d, masked_conv2d 2 | 3 | __all__ = ['masked_conv2d', 'MaskedConv2d'] 4 | -------------------------------------------------------------------------------- /.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | BASED_ON_STYLE = pep8 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 5 | -------------------------------------------------------------------------------- /exist_categories_in_val_ap_sorted_mrcnn_r101fpn.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/exist_categories_in_val_ap_sorted_mrcnn_r101fpn.pt -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .bfp import BFP 2 | from .fpn import FPN 3 | from .hrfpn import HRFPN 4 | 5 | __all__ = ['FPN', 'BFP', 'HRFPN'] 6 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .mask_target import mask_target 2 | from .utils import split_combined_polys 3 | 4 | __all__ = ['split_combined_polys', 'mask_target'] 5 | -------------------------------------------------------------------------------- /exist_categories_in_val_ap_sorted_mrcnn_r50fpn_props_gt_label.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twangnh/SimCal/HEAD/exist_categories_in_val_ap_sorted_mrcnn_r50fpn_props_gt_label.pt -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 2 | 3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss'] 4 | -------------------------------------------------------------------------------- /mmdet/models/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from .generalized_attention import GeneralizedAttention 2 | from .non_local import NonLocal2D 3 | 4 | __all__ = ['NonLocal2D', 'GeneralizedAttention'] 5 | -------------------------------------------------------------------------------- /instaboost/.svn/pristine/c7/c701008e17212a7a6aa964551bf957bba1c3fb95.svn-base: -------------------------------------------------------------------------------- 1 | from .InstaBoost import * 2 | from .config import * 3 | 4 | __all__ = ['get_new_data', 'InstaBoostConfig'] 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/general_questions.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: General questions 3 | about: Ask general questions to get help 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /mmdet/core/fp16/__init__.py: -------------------------------------------------------------------------------- 1 | from .decorators import auto_fp16, force_fp32 2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model 3 | 4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model'] 5 | -------------------------------------------------------------------------------- /lvis_api/requirements.txt: -------------------------------------------------------------------------------- 1 | cycler==0.10.0 2 | Cython==0.29.12 3 | kiwisolver==1.1.0 4 | matplotlib==3.1.1 5 | numpy==1.16.4 6 | opencv-python==4.1.0.25 7 | pyparsing==2.4.0 8 | python-dateutil==2.8.0 9 | six==1.12.0 10 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .build_loader import build_dataloader 2 | from .sampler import GroupSampler, DistributedGroupSampler 3 | 4 | __all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader'] 5 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .hrnet import HRNet 2 | from .resnet import ResNet, make_res_layer 3 | from .resnext import ResNeXt 4 | from .ssd_vgg import SSDVGG 5 | 6 | __all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG', 'HRNet'] 7 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | CONFIG=$1 6 | GPUS=$2 7 | 8 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import DistOptimizerHook, allreduce_grads 2 | from .misc import multi_apply, tensor2imgs, unmap 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', 6 | 'multi_apply' 7 | ] 8 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 79 3 | multi_line_output = 0 4 | known_first_party = mmdet 5 | known_third_party = mmcv,numpy,matplotlib,pycocotools,six,seaborn,terminaltables,torch,torchvision 6 | no_lines_before = STDLIB,LOCALFOLDER 7 | default_section = THIRDPARTY -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | 6 | @abstractmethod 7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 8 | pass 9 | -------------------------------------------------------------------------------- /mmdet/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | from .double_bbox_head import DoubleConvFCBBoxHead 4 | 5 | __all__ = [ 6 | 'BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'DoubleConvFCBBoxHead' 7 | ] 8 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | CONFIG=$1 6 | GPUS=$2 7 | PORT=${PORT:-29500} 8 | 9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test_lvis.py $CONFIG --launcher pytorch ${@:3} -------------------------------------------------------------------------------- /mmdet/datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | from .coco import CocoDataset 2 | from .registry import DATASETS 3 | 4 | 5 | @DATASETS.register_module 6 | class CityscapesDataset(CocoDataset): 7 | 8 | CLASSES = ('person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 9 | 'bicycle') 10 | -------------------------------------------------------------------------------- /tools/dist_test_htc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | CONFIG=$1 6 | GPUS=$2 7 | PORT=${PORT:-29500} 8 | 9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test_lvis_htc.py $CONFIG --launcher pytorch ${@:3} -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | language: python 3 | 4 | install: 5 | - pip install isort flake8 yapf 6 | 7 | python: 8 | - "3.5" 9 | - "3.6" 10 | - "3.7" 11 | 12 | script: 13 | - flake8 14 | - isort -rc --check-only --diff mmdet/ tools/ 15 | - yapf -r -d --style .style.yapf mmdet/ tools/ -------------------------------------------------------------------------------- /mmdet/models/registry.py: -------------------------------------------------------------------------------- 1 | from mmdet.utils import Registry 2 | 3 | BACKBONES = Registry('backbone') 4 | NECKS = Registry('neck') 5 | ROI_EXTRACTORS = Registry('roi_extractor') 6 | SHARED_HEADS = Registry('shared_head') 7 | HEADS = Registry('head') 8 | LOSSES = Registry('loss') 9 | DETECTORS = Registry('detector') 10 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 2 | from .assign_result import AssignResult 3 | from .base_assigner import BaseAssigner 4 | from .max_iou_assigner import MaxIoUAssigner 5 | 6 | __all__ = [ 7 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .evaluation import * # noqa: F401, F403 4 | from .fp16 import * # noqa: F401, F403 5 | from .mask import * # noqa: F401, F403 6 | from .post_processing import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import anchor_inside_flags, anchor_target 3 | from .guided_anchor_target import ga_loc_target, ga_shape_target 4 | 5 | __all__ = [ 6 | 'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target', 7 | 'ga_shape_target' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms 2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks, 3 | merge_aug_proposals, merge_aug_scores) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores', 'merge_aug_masks' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/models/utils/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Scale(nn.Module): 6 | 7 | def __init__(self, scale=1.0): 8 | super(Scale, self).__init__() 9 | self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) 10 | 11 | def forward(self, x): 12 | return x * self.scale 13 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from .fused_semantic_head import FusedSemanticHead 3 | from .grid_head import GridHead 4 | from .htc_mask_head import HTCMaskHead 5 | from .maskiou_head import MaskIoUHead 6 | 7 | __all__ = [ 8 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 9 | 'MaskIoUHead' 10 | ] 11 | -------------------------------------------------------------------------------- /instaboost/exceptions.py: -------------------------------------------------------------------------------- 1 | class TrimapError(Exception): 2 | """ 3 | Error when creating matting trimap. 4 | """ 5 | def __init__(self, err): 6 | super(TrimapError, self).__init__(err) 7 | 8 | 9 | class AnnError(Exception): 10 | """ 11 | Error with Input annotation. 12 | """ 13 | def __init__(self, err): 14 | super(AnnError, self).__init__(err) 15 | -------------------------------------------------------------------------------- /lvis_api/lvis/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from .lvis import LVIS 3 | from .results import LVISResults 4 | from .eval import LVISEval 5 | from .vis import LVISVis 6 | 7 | logging.basicConfig( 8 | format="[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S", 9 | level=logging.WARN, 10 | ) 11 | 12 | __all__ = ["LVIS", "LVISResults", "LVISEval", "LVISVis"] 13 | -------------------------------------------------------------------------------- /tools/start_para_test_gpu4.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | for idx in 0 1 2 3 3 | do 4 | tmux new-session -d -s "set$idx" \; 5 | send-keys "mmd" Enter \; 6 | send-keys "python ./tools/test_lvis.py configs/mask_rcnn_r50_fpn_1x_lvis.py /home/wangtao/prj/liyu_mmdet/work_dirs/mask_rcnn_r50_fpn_1x_lvis_liyu_finetune_imglevelsampler/epoch_12.pth --out ./set$idx.pkl --eval segm --set $idx" Enter \; 7 | done 8 | 9 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTORCH="1.1.0" 2 | ARG CUDA="10.0" 3 | ARG CUDNN="7.5" 4 | 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel 6 | 7 | RUN apt-get update && apt-get install -y libglib2.0-0 libsm6 libxrender-dev libxext6 8 | 9 | # Install mmdetection 10 | RUN conda install cython -y 11 | RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection 12 | WORKDIR /mmdetection 13 | RUN pip install -e . 14 | -------------------------------------------------------------------------------- /instaboost/.svn/pristine/aa/aa94ef40fd3d99558c5026f2eaf05a380206b80e.svn-base: -------------------------------------------------------------------------------- 1 | class TrimapError(Exception): 2 | """ 3 | Error when creating matting trimap. 4 | """ 5 | def __init__(self, err): 6 | super(TrimapError, self).__init__(err) 7 | 8 | 9 | class AnnError(Exception): 10 | """ 11 | Error with Input annotation. 12 | """ 13 | def __init__(self, err): 14 | super(AnnError, self).__init__(err) 15 | -------------------------------------------------------------------------------- /tools/start_para_test_gpu8.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | config=$1 3 | model=$2 4 | out=$3 5 | 6 | for idx in 0 1 2 3 4 5 6 7 7 | do 8 | tmux kill-session -t "set$idx" 9 | done 10 | 11 | for idx in 0 1 2 3 4 5 6 7 12 | do 13 | tmux new-session -d -s "set$idx" \; send-keys "mmd" Enter \; send-keys "CUDA_VISIBLE_DEVICES=$idx python ./tools/test_lvis_split_parallel.py $config $model --out ./$out"_set"$idx.pkl --eval segm --set $idx --total_set_num 8" Enter \; 14 | done 15 | 16 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .fcos_head import FCOSHead 3 | from .ga_retina_head import GARetinaHead 4 | from .ga_rpn_head import GARPNHead 5 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead 6 | from .retina_head import RetinaHead 7 | from .rpn_head import RPNHead 8 | from .ssd_head import SSDHead 9 | 10 | __all__ = [ 11 | 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead', 12 | 'GARPNHead', 'RetinaHead', 'GARetinaHead', 'SSDHead', 'FCOSHead' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv_module import ConvModule, build_conv_layer 2 | from .conv_ws import ConvWS2d, conv_ws_2d 3 | from .norm import build_norm_layer 4 | from .scale import Scale 5 | from .weight_init import (bias_init_with_prob, kaiming_init, normal_init, 6 | uniform_init, xavier_init) 7 | 8 | __all__ = [ 9 | 'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule', 10 | 'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init', 11 | 'kaiming_init', 'bias_init_with_prob', 'Scale' 12 | ] 13 | -------------------------------------------------------------------------------- /lvis_api/test.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from lvis import LVIS, LVISResults, LVISEval 3 | 4 | # result and val files for 100 randomly sampled images. 5 | # ANNOTATION_PATH = "./data/lvis_val_100.json" 6 | # RESULT_PATH = "./data/lvis_results_100.json" 7 | ANNOTATION_PATH = "./data/lvis/lvis_v0.5_val.json" 8 | RESULT_PATH = './debug_file.pkl.segm.json' 9 | # RESULT_PATH = './mask_rcnn_r101_fpn_1x_lvis.pkl.segm.json' 10 | ANN_TYPE = 'segm' 11 | 12 | lvis_eval = LVISEval(ANNOTATION_PATH, RESULT_PATH, ANN_TYPE) 13 | lvis_eval.run() 14 | lvis_eval.print_results(True) 15 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fcos.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FCOS(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class RetinaNet(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .env import get_root_logger, init_dist, set_random_seed 2 | from .inference import (inference_detector, init_detector, show_result, 3 | show_result_pyplot) 4 | # from .train import train_detector 5 | from .train_new import train_detector as train_detector_calibration 6 | from .train_orig import train_detector as train_detector_normal 7 | __all__ = [ 8 | 'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector_calibration', 'train_detector_normal', 9 | 'init_detector', 'inference_detector', 'show_result', 'show_result_pyplot' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import torch 5 | from torch.autograd import gradcheck 6 | 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_pool import RoIPool # noqa: E402, isort:skip 9 | 10 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 11 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 12 | [1, 67, 40, 110, 120]]).cuda() 13 | inputs = (feat, rois) 14 | print('Gradcheck for roi pooling...') 15 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 16 | print(test) 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from ..assign_sampling import build_sampler 2 | from .base_sampler import BaseSampler 3 | 4 | 5 | class CombinedSampler(BaseSampler): 6 | 7 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 8 | super(CombinedSampler, self).__init__(**kwargs) 9 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 10 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | conda create -n simcal_mmdet python=3.7 3 | source ~/anaconda3/etc/profile.d/conda.sh 4 | conda init bash 5 | conda activate simcal_mmdet 6 | echo "python path" 7 | which python 8 | conda install pytorch==1.2.0 torchvision==0.4.0 cudatoolkit=9.2 -c pytorch 9 | pip install cython==0.29.12 mmcv==0.2.16 matplotlib terminaltables 10 | pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI" 11 | pip install opencv-python-headless 12 | pip install Pillow==6.1 13 | pip install numpy==1.17.1 --no-deps 14 | git clone https://github.com/twangnh/SimCal 15 | cd SimCal 16 | pip install -v -e . -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, deform_conv, 3 | modulated_deform_conv) 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 5 | ModulatedDeformRoIPoolingPack, deform_roi_pooling) 6 | 7 | __all__ = [ 8 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 9 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 10 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', 11 | 'deform_roi_pooling' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .combined_sampler import CombinedSampler 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 5 | from .ohem_sampler import OHEMSampler 6 | from .pseudo_sampler import PseudoSampler 7 | from .random_sampler import RandomSampler 8 | from .sampling_result import SamplingResult 9 | 10 | __all__ = [ 11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 12 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 13 | 'OHEMSampler', 'SamplingResult' 14 | ] 15 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${5:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${PY_ARGS:-"--validate"} 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --cpus-per-task=${CPUS_PER_TASK} \ 21 | --kill-on-bad-exit=1 \ 22 | ${SRUN_ARGS} \ 23 | python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 24 | -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } -------------------------------------------------------------------------------- /mmdet/models/losses/mse_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from ..registry import LOSSES 5 | from .utils import weighted_loss 6 | 7 | mse_loss = weighted_loss(F.mse_loss) 8 | 9 | 10 | @LOSSES.register_module 11 | class MSELoss(nn.Module): 12 | 13 | def __init__(self, reduction='mean', loss_weight=1.0): 14 | super().__init__() 15 | self.reduction = reduction 16 | self.loss_weight = loss_weight 17 | 18 | def forward(self, pred, target, weight=None, avg_factor=None): 19 | loss = self.loss_weight * mse_loss( 20 | pred, 21 | target, 22 | weight, 23 | reduction=self.reduction, 24 | avg_factor=avg_factor) 25 | return loss 26 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/assign_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AssignResult(object): 5 | 6 | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None): 7 | self.num_gts = num_gts 8 | self.gt_inds = gt_inds 9 | self.max_overlaps = max_overlaps 10 | self.labels = labels 11 | 12 | def add_gt_(self, gt_labels): 13 | self_inds = torch.arange( 14 | 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device) 15 | self.gt_inds = torch.cat([self_inds, self.gt_inds]) 16 | self.max_overlaps = torch.cat( 17 | [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps]) 18 | if self.labels is not None: 19 | self.labels = torch.cat([gt_labels, self.labels]) 20 | -------------------------------------------------------------------------------- /mmdet/core/fp16/utils.py: -------------------------------------------------------------------------------- 1 | from collections import abc 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def cast_tensor_type(inputs, src_type, dst_type): 8 | if isinstance(inputs, torch.Tensor): 9 | return inputs.to(dst_type) 10 | elif isinstance(inputs, str): 11 | return inputs 12 | elif isinstance(inputs, np.ndarray): 13 | return inputs 14 | elif isinstance(inputs, abc.Mapping): 15 | return type(inputs)({ 16 | k: cast_tensor_type(v, src_type, dst_type) 17 | for k, v in inputs.items() 18 | }) 19 | elif isinstance(inputs, abc.Iterable): 20 | return type(inputs)( 21 | cast_tensor_type(item, src_type, dst_type) for item in inputs) 22 | else: 23 | return inputs 24 | -------------------------------------------------------------------------------- /mmdet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from .registry import DATASETS 2 | from .xml_style import XMLDataset 3 | 4 | 5 | @DATASETS.register_module 6 | class VOCDataset(XMLDataset): 7 | 8 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 9 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 10 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 11 | 'tvmonitor') 12 | 13 | def __init__(self, **kwargs): 14 | super(VOCDataset, self).__init__(**kwargs) 15 | if 'VOC2007' in self.img_prefix: 16 | self.year = 2007 17 | elif 'VOC2012' in self.img_prefix: 18 | self.year = 2012 19 | else: 20 | raise ValueError('Cannot infer dataset year from img_prefix') 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the feature** 11 | 12 | **Motivation** 13 | A clear and concise description of the motivation of the feature. 14 | Ex1. It is inconvenient when [....]. 15 | Ex2. There is a recent paper [....], which is very helpful for [....]. 16 | 17 | **Related resources** 18 | If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful. 19 | 20 | **Additional context** 21 | Add any other context or screenshots about the feature request here. 22 | If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated. 23 | -------------------------------------------------------------------------------- /tools/draw_cls_dist_coco.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | import mmcv 3 | import numpy as np 4 | import pickle 5 | 6 | x_name = mmcv.load('./x_name.pkl') 7 | y = mmcv.load('./y.pkl') 8 | 9 | y_coco_sampled = pickle.load(open('./class_to_imageid_and_inscount_coco_sampled.pt', 'rb')) 10 | y_coco_lt = sorted([y_coco_sampled[i]['isntance_count'] for i in range(1,81)])[::-1] 11 | # plt.figure(figsize=(90, 50)) 12 | plt.bar(range(1, 81), y, align='center', alpha=0.5, width=0.8) 13 | plt.grid(color='#95a5a6', linestyle='--', linewidth=2, axis='y', alpha=0.7) 14 | plt.yscale('log') 15 | plt.ylabel('Number of instances') 16 | plt.xlabel('Sorted category index') 17 | # plt.title('') 18 | # plt.xticks(np.arange(len(x_name)), x_name, rotation=45) 19 | plt.savefig('coco_orig_cls_dist.eps', format='eps') 20 | # plt.savefig('coco_sample_cls_dist_1.eps', format='eps') 21 | plt.show() 22 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom import CustomDataset 2 | from .xml_style import XMLDataset 3 | from .coco import CocoDataset 4 | from .lvis import LvisDataset 5 | from .voc import VOCDataset 6 | from .wider_face import WIDERFaceDataset 7 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader 8 | from .utils import to_tensor, random_scale, show_ann 9 | from .dataset_wrappers import ConcatDataset, RepeatDataset 10 | from .extra_aug import ExtraAugmentation 11 | from .registry import DATASETS 12 | from .builder import build_dataset 13 | 14 | __all__ = [ 15 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler', 16 | 'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale', 17 | 'show_ann', 'ConcatDataset', 'RepeatDataset', 'ExtraAugmentation', 18 | 'WIDERFaceDataset', 'DATASETS', 'build_dataset' 19 | ] 20 | -------------------------------------------------------------------------------- /tools/coco_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | from mmdet.core import coco_eval 4 | 5 | 6 | def main(): 7 | parser = ArgumentParser(description='COCO Evaluation') 8 | parser.add_argument('result', help='result file path') 9 | parser.add_argument('--ann', help='annotation file path') 10 | parser.add_argument( 11 | '--types', 12 | type=str, 13 | nargs='+', 14 | choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'], 15 | default=['bbox'], 16 | help='result types') 17 | parser.add_argument( 18 | '--max-dets', 19 | type=int, 20 | nargs='+', 21 | default=[100, 300, 1000], 22 | help='proposal numbers, only used for recall evaluation') 23 | args = parser.parse_args() 24 | coco_eval(args.result, args.types, args.ann, args.max_dets) 25 | 26 | 27 | if __name__ == '__main__': 28 | main() 29 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/sampling_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class SamplingResult(object): 5 | 6 | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, 7 | gt_flags): 8 | self.pos_inds = pos_inds 9 | self.neg_inds = neg_inds 10 | self.pos_bboxes = bboxes[pos_inds] 11 | self.neg_bboxes = bboxes[neg_inds] 12 | self.pos_is_gt = gt_flags[pos_inds] 13 | 14 | self.num_gts = gt_bboxes.shape[0] 15 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 16 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :] 17 | if assign_result.labels is not None: 18 | self.pos_gt_labels = assign_result.labels[pos_inds] 19 | else: 20 | self.pos_gt_labels = None 21 | 22 | @property 23 | def bboxes(self): 24 | return torch.cat([self.pos_bboxes, self.neg_bboxes]) 25 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FasterRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | shared_head=None, 17 | pretrained=None): 18 | super(FasterRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | shared_head=shared_head, 22 | rpn_head=rpn_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | pretrained=pretrained) 28 | -------------------------------------------------------------------------------- /configs/empirical_attention/README.md: -------------------------------------------------------------------------------- 1 | # An Empirical Study of Spatial Attention Mechanisms in Deep Networks 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{zhu2019empirical, 7 | title={An Empirical Study of Spatial Attention Mechanisms in Deep Networks}, 8 | author={Zhu, Xizhou and Cheng, Dazhi and Zhang, Zheng and Lin, Stephen and Dai, Jifeng}, 9 | journal={arXiv preprint arXiv:1904.05873}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | 15 | ## Results and Models 16 | 17 | | Backbone | Attention Component | DCN | Lr schd | box AP | Download | 18 | |:---------:|:-------------------:|:----:|:-------:|:------:|:--------:| 19 | | R-50 | 1111 | N | 1x | 38.6 | - | 20 | | R-50 | 0010 | N | 1x | 38.2 | - | 21 | | R-50 | 1111 | Y | 1x | 41.0 | - | 22 | | R-50 | 0010 | Y | 1x | 40.8 | - | 23 | 24 | -------------------------------------------------------------------------------- /configs/pascal_voc/README.md: -------------------------------------------------------------------------------- 1 | ### SSD 2 | 3 | | Backbone | Size | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 4 | | :------: | :---: | :---: | :-----: | :------: | :-----------------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------------: | 5 | | VGG16 | 300 | caffe | 240e | 2.5 | 0.159 | 35.7 / 53.6 | 77.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_voc_vgg16_caffe_240e_20190501-7160d09a.pth) | 6 | | VGG16 | 512 | caffe | 240e | 4.3 | 0.214 | 27.5 / 35.9 | 80.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd512_voc_vgg16_caffe_240e_20190501-ff194be1.pth) | -------------------------------------------------------------------------------- /tools/configs/empirical_attention/README.md: -------------------------------------------------------------------------------- 1 | # An Empirical Study of Spatial Attention Mechanisms in Deep Networks 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{zhu2019empirical, 7 | title={An Empirical Study of Spatial Attention Mechanisms in Deep Networks}, 8 | author={Zhu, Xizhou and Cheng, Dazhi and Zhang, Zheng and Lin, Stephen and Dai, Jifeng}, 9 | journal={arXiv preprint arXiv:1904.05873}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | 15 | ## Results and Models 16 | 17 | | Backbone | Attention Component | DCN | Lr schd | box AP | Download | 18 | |:---------:|:-------------------:|:----:|:-------:|:------:|:--------:| 19 | | R-50 | 1111 | N | 1x | 38.6 | - | 20 | | R-50 | 0010 | N | 1x | 38.2 | - | 21 | | R-50 | 1111 | Y | 1x | 41.0 | - | 22 | | R-50 | 0010 | Y | 1x | 40.8 | - | 23 | 24 | -------------------------------------------------------------------------------- /mmdet/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def accuracy(pred, target, topk=1): 5 | assert isinstance(topk, (int, tuple)) 6 | if isinstance(topk, int): 7 | topk = (topk, ) 8 | return_single = True 9 | else: 10 | return_single = False 11 | 12 | maxk = max(topk) 13 | _, pred_label = pred.topk(maxk, dim=1) 14 | pred_label = pred_label.t() 15 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 16 | 17 | res = [] 18 | for k in topk: 19 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 20 | res.append(correct_k.mul_(100.0 / pred.size(0))) 21 | return res[0] if return_single else res 22 | 23 | 24 | class Accuracy(nn.Module): 25 | 26 | def __init__(self, topk=(1, )): 27 | super().__init__() 28 | self.topk = topk 29 | 30 | def forward(self, pred, target): 31 | return accuracy(pred, target, self.topk) 32 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | 18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 19 | pos_inds = torch.nonzero( 20 | assign_result.gt_inds > 0).squeeze(-1).unique() 21 | neg_inds = torch.nonzero( 22 | assign_result.gt_inds == 0).squeeze(-1).unique() 23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 25 | assign_result, gt_flags) 26 | return sampling_result 27 | -------------------------------------------------------------------------------- /mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .cascade_rcnn import CascadeRCNN 3 | from .double_head_rcnn import DoubleHeadRCNN 4 | from .fast_rcnn import FastRCNN 5 | from .faster_rcnn import FasterRCNN 6 | from .fcos import FCOS 7 | from .grid_rcnn import GridRCNN 8 | from .htc import HybridTaskCascade 9 | from .mask_rcnn import MaskRCNN_calibration 10 | from .mask_rcnn import MaskRCNN_normal 11 | from .mask_scoring_rcnn import MaskScoringRCNN 12 | from .retinanet import RetinaNet 13 | from .rpn import RPN 14 | from .single_stage import SingleStageDetector 15 | # from .two_stage import TwoStageDetector 16 | from .two_stage_calibration import TwoStageDetector 17 | 18 | __all__ = [ 19 | 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 20 | 'FastRCNN', 'FasterRCNN', 'MaskRCNN_normal', 'MaskRCNN_calibration', 'CascadeRCNN', 'HybridTaskCascade', 21 | 'DoubleHeadRCNN', 'RetinaNet', 'FCOS', 'GridRCNN', 'MaskScoringRCNN' 22 | ] 23 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_heads import * # noqa: F401,F403 2 | from .backbones import * # noqa: F401,F403 3 | from .bbox_heads import * # noqa: F401,F403 4 | from .builder import (build_backbone, build_detector, build_head, build_loss, 5 | build_neck, build_roi_extractor, build_shared_head) 6 | from .detectors import * # noqa: F401,F403 7 | from .losses import * # noqa: F401,F403 8 | from .mask_heads import * # noqa: F401,F403 9 | from .necks import * # noqa: F401,F403 10 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, 11 | ROI_EXTRACTORS, SHARED_HEADS) 12 | from .roi_extractors import * # noqa: F401,F403 13 | from .shared_heads import * # noqa: F401,F403 14 | 15 | __all__ = [ 16 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 17 | 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor', 18 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector' 19 | ] 20 | -------------------------------------------------------------------------------- /configs/scratch/README.md: -------------------------------------------------------------------------------- 1 | # Rethinking ImageNet Pre-training 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{he2018rethinking, 7 | title={Rethinking imagenet pre-training}, 8 | author={He, Kaiming and Girshick, Ross and Doll{\'a}r, Piotr}, 9 | journal={arXiv preprint arXiv:1811.08883}, 10 | year={2018} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Model | Backbone | Style | Lr schd | box AP | mask AP | Download | 17 | |:------------:|:---------:|:-------:|:-------:|:------:|:-------:|:--------:| 18 | | Faster R-CNN | R-50-FPN | pytorch | 6x | 40.1 | - | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/scratch/scratch_faster_rcnn_r50_fpn_gn_6x_20190515-ff554978.pth) | 19 | | Mask R-CNN | R-50-FPN | pytorch | 6x | 41.0 | 37.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_mask_rcnn_r50_fpn_gn_6x_20190515-96743f5e.pth) | 20 | 21 | Note: 22 | - The above models are trained with 16 GPUs. -------------------------------------------------------------------------------- /mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .context_block import ContextBlock 2 | from .dcn import (DeformConv, DeformConvPack, DeformRoIPooling, 3 | DeformRoIPoolingPack, ModulatedDeformConv, 4 | ModulatedDeformConvPack, ModulatedDeformRoIPoolingPack, 5 | deform_conv, deform_roi_pooling, modulated_deform_conv) 6 | from .masked_conv import MaskedConv2d 7 | from .nms import nms, soft_nms 8 | from .roi_align import RoIAlign, roi_align 9 | from .roi_pool import RoIPool, roi_pool 10 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 11 | 12 | __all__ = [ 13 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 14 | 'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 15 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 16 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 17 | 'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss', 18 | 'MaskedConv2d', 'ContextBlock' 19 | ] 20 | -------------------------------------------------------------------------------- /tools/configs/scratch/README.md: -------------------------------------------------------------------------------- 1 | # Rethinking ImageNet Pre-training 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{he2018rethinking, 7 | title={Rethinking imagenet pre-training}, 8 | author={He, Kaiming and Girshick, Ross and Doll{\'a}r, Piotr}, 9 | journal={arXiv preprint arXiv:1811.08883}, 10 | year={2018} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Model | Backbone | Style | Lr schd | box AP | mask AP | Download | 17 | |:------------:|:---------:|:-------:|:-------:|:------:|:-------:|:--------:| 18 | | Faster R-CNN | R-50-FPN | pytorch | 6x | 40.1 | - | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_faster_rcnn_r50_fpn_gn_6x-20190515-ff554978.pth) | 19 | | Mask R-CNN | R-50-FPN | pytorch | 6x | 41.0 | 37.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_mask_rcnn_r50_fpn_gn_6x_20190515-96743f5e.pth) | 20 | 21 | Note: 22 | - The above models are trained with 16 GPUs. -------------------------------------------------------------------------------- /mmdet/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import numpy as np 5 | import torch 6 | from torch.autograd import gradcheck 7 | 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 9 | from roi_align import RoIAlign # noqa: E402, isort:skip 10 | 11 | feat_size = 15 12 | spatial_scale = 1.0 / 8 13 | img_size = feat_size / spatial_scale 14 | num_imgs = 2 15 | num_rois = 20 16 | 17 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 18 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 19 | rois[:, 2:] += img_size * 0.5 20 | rois = np.hstack((batch_ind, rois)) 21 | 22 | feat = torch.randn( 23 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 24 | rois = torch.from_numpy(rois).float().cuda() 25 | inputs = (feat, rois) 26 | print('Gradcheck for roi align...') 27 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 28 | print(test) 29 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 30 | print(test) 31 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py.bk: -------------------------------------------------------------------------------- 1 | from .class_names import (coco_classes, dataset_aliases, get_classes, 2 | imagenet_det_classes, imagenet_vid_classes, 3 | voc_classes) 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json 5 | from .eval_hooks import (CocoDistEvalmAPHook, CocoDistEvalRecallHook, 6 | DistEvalHook, DistEvalmAPHook) 7 | from .mean_ap import average_precision, eval_map, print_map_summary 8 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall, 9 | print_recall_summary) 10 | 11 | __all__ = [ 12 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 13 | 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', 14 | 'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook', 15 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', 16 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 17 | 'plot_num_recall', 'plot_iou_recall' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import Accuracy, accuracy 2 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss 3 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 4 | cross_entropy, mask_cross_entropy) 5 | from .focal_loss import FocalLoss, sigmoid_focal_loss 6 | from .ghm_loss import GHMC, GHMR 7 | from .iou_loss import BoundedIoULoss, IoULoss, bounded_iou_loss, iou_loss 8 | from .mse_loss import MSELoss, mse_loss 9 | from .smooth_l1_loss import SmoothL1Loss, smooth_l1_loss 10 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 11 | 12 | __all__ = [ 13 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 14 | 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss', 15 | 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss', 16 | 'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss', 17 | 'IoULoss', 'BoundedIoULoss', 'GHMC', 'GHMR', 'reduce_loss', 18 | 'weight_reduce_loss', 'weighted_loss' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (coco_classes, dataset_aliases, get_classes, 2 | imagenet_det_classes, imagenet_vid_classes, 3 | voc_classes) 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json 5 | from .lvis_utils import lvis_eval, lvis_fast_eval_recall 6 | from .eval_hooks import (CocoDistEvalmAPHook, CocoDistEvalRecallHook, 7 | DistEvalHook, DistEvalmAPHook) 8 | from .mean_ap import average_precision, eval_map, print_map_summary 9 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall, 10 | print_recall_summary) 11 | 12 | __all__ = [ 13 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 14 | 'coco_classes', 'dataset_aliases', 'get_classes', 'lvis_eval', 'coco_eval', 15 | 'fast_eval_recall','lvis_fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook', 16 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', 17 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 18 | 'plot_num_recall', 'plot_iou_recall' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from mmdet.utils import build_from_cfg 4 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, 5 | ROI_EXTRACTORS, SHARED_HEADS) 6 | 7 | 8 | def build(cfg, registry, default_args=None): 9 | if isinstance(cfg, list): 10 | modules = [ 11 | build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg 12 | ] 13 | return nn.Sequential(*modules) 14 | else: 15 | return build_from_cfg(cfg, registry, default_args) 16 | 17 | 18 | def build_backbone(cfg): 19 | return build(cfg, BACKBONES) 20 | 21 | 22 | def build_neck(cfg): 23 | return build(cfg, NECKS) 24 | 25 | 26 | def build_roi_extractor(cfg): 27 | return build(cfg, ROI_EXTRACTORS) 28 | 29 | 30 | def build_shared_head(cfg): 31 | return build(cfg, SHARED_HEADS) 32 | 33 | 34 | def build_head(cfg): 35 | return build(cfg, HEADS) 36 | 37 | 38 | def build_loss(cfg): 39 | return build(cfg, LOSSES) 40 | 41 | 42 | def build_detector(cfg, train_cfg=None, test_cfg=None): 43 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 44 | -------------------------------------------------------------------------------- /lvis_api/setup.py: -------------------------------------------------------------------------------- 1 | """LVIS (pronounced ‘el-vis’): is a new dataset for Large Vocabulary Instance Segmentation. 2 | We collect over 2 million high-quality instance segmentation masks for over 1200 entry-level object categories in 164k images. LVIS API enables reading and interacting with annotation files, 3 | visualizing annotations, and evaluating results. 4 | 5 | """ 6 | DOCLINES = (__doc__ or '') 7 | 8 | import os.path 9 | import sys 10 | import pip 11 | 12 | import setuptools 13 | 14 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "lvis")) 15 | 16 | with open("requirements.txt") as f: 17 | reqs = f.read() 18 | 19 | DISTNAME = "lvis" 20 | DESCRIPTION = "Python API for LVIS dataset." 21 | AUTHOR = "Agrim Gupta" 22 | REQUIREMENTS = (reqs.strip().split("\n"),) 23 | 24 | 25 | if __name__ == "__main__": 26 | setuptools.setup( 27 | name=DISTNAME, 28 | install_requires=REQUIREMENTS, 29 | packages=setuptools.find_packages(), 30 | version="0.5", 31 | description=DESCRIPTION, 32 | long_description=DOCLINES, 33 | long_description_content_type='text/markdown', 34 | author=AUTHOR 35 | ) 36 | -------------------------------------------------------------------------------- /tools/publish_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | 4 | import torch 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='Process a checkpoint to be published') 10 | parser.add_argument('in_file', help='input checkpoint filename') 11 | parser.add_argument('out_file', help='output checkpoint filename') 12 | args = parser.parse_args() 13 | return args 14 | 15 | 16 | def process_checkpoint(in_file, out_file): 17 | checkpoint = torch.load(in_file, map_location='cpu') 18 | # remove optimizer for smaller file size 19 | if 'optimizer' in checkpoint: 20 | del checkpoint['optimizer'] 21 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 22 | # add the code here. 23 | torch.save(checkpoint, out_file) 24 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 25 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 26 | subprocess.Popen(['mv', out_file, final_file]) 27 | 28 | 29 | def main(): 30 | args = parse_args() 31 | process_checkpoint(args.in_file, args.out_file) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner 2 | from .bbox_target import bbox_target 3 | from .geometry import bbox_overlaps 4 | from .samplers import (BaseSampler, CombinedSampler, 5 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 6 | PseudoSampler, RandomSampler, SamplingResult) 7 | from .transforms import (bbox2delta, bbox2result, bbox2roi, bbox_flip, 8 | bbox_mapping, bbox_mapping_back, delta2bbox, 9 | distance2bbox, roi2bbox) 10 | 11 | from .assign_sampling import ( # isort:skip, avoid recursive imports 12 | assign_and_sample, build_assigner, build_sampler) 13 | 14 | __all__ = [ 15 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 16 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 17 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 18 | 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample', 19 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', 20 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 21 | 'distance2bbox', 'bbox_target' 22 | ] 23 | -------------------------------------------------------------------------------- /tools/draw_cls_dist_lvis.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | import mmcv 3 | import numpy as np 4 | import pickle 5 | 6 | train_info = pickle.load(open('./lvis_train_cats_info.pt', 'rb')) 7 | 8 | y_lvis = pickle.load(open('./class_to_imageid_and_inscount.pt', 'rb')) 9 | y_lvis = sorted([y_lvis[i]['isntance_count'] for i in range(1,1231)])[::-1] 10 | # plt.figure(figsize=(90, 50)) 11 | # plt.bar(range(1, 1231), y_lvis, align='center', alpha=0.5, width=0.8) 12 | plt.plot(range(1, 1231), y_lvis, color='black') 13 | plt.fill_between(range(1, 480), 0, y_lvis[1:480], facecolor='green', interpolate=True) 14 | plt.fill_between(range(481, 1230), 0, y_lvis[481:1230], facecolor='red') 15 | # plt.fill_between(range(401, 500), 0, y_lvis[401:500]) 16 | # plt.fill_between(range(501, 1230), 0, y_lvis[501:1230]) 17 | plt.grid(color='#95a5a6', linestyle='--', linewidth=2, axis='y', alpha=0.7) 18 | plt.yscale('log') 19 | plt.ylabel('Number of training instances') 20 | plt.xlabel('Sorted category index') 21 | # plt.title('') 22 | # plt.xticks(np.arange(len(x_name)), x_name, rotation=45) 23 | plt.savefig('lvis_cls_dist.eps', format='eps', dpi=1000) 24 | # plt.savefig('coco_sample_cls_dist_1.eps', format='eps') 25 | plt.show() 26 | -------------------------------------------------------------------------------- /configs/wider_face/README.md: -------------------------------------------------------------------------------- 1 | ## WIDER Face Dataset 2 | 3 | To use the WIDER Face dataset you need to download it 4 | and extract to the `data/WIDERFace` folder. Annotation in the VOC format 5 | can be found in this [repo](https://github.com/sovrasov/wider-face-pascal-voc-annotations.git). 6 | You should move the annotation files from `WIDER_train_annotations` and `WIDER_val_annotations` folders 7 | to the `Annotation` folders inside the corresponding directories `WIDER_train` and `WIDER_val`. 8 | Also annotation lists `val.txt` and `train.txt` should be copied to `data/WIDERFace` from `WIDER_train_annotations` and `WIDER_val_annotations`. 9 | The directory should be like this: 10 | 11 | ``` 12 | mmdetection 13 | ├── mmdet 14 | ├── tools 15 | ├── configs 16 | ├── data 17 | │ ├── WIDERFace 18 | │ │ ├── WIDER_train 19 | │ | │ ├──0--Parade 20 | │ | │ ├── ... 21 | │ | │ ├── Annotations 22 | │ │ ├── WIDER_val 23 | │ | │ ├──0--Parade 24 | │ | │ ├── ... 25 | │ | │ ├── Annotations 26 | │ │ ├── val.txt 27 | │ │ ├── train.txt 28 | 29 | ``` 30 | 31 | After that you can train the SSD300 on WIDER by launching training with the `ssd300_wider_face.py` config or 32 | create your own config based on the presented one. 33 | -------------------------------------------------------------------------------- /tools/configs/wider_face/README.md: -------------------------------------------------------------------------------- 1 | ## WIDER Face Dataset 2 | 3 | To use the WIDER Face dataset you need to download it 4 | and extract to the `data/WIDERFace` folder. Annotation in the VOC format 5 | can be found in this [repo](https://github.com/sovrasov/wider-face-pascal-voc-annotations.git). 6 | You should move the annotation files from `WIDER_train_annotations` and `WIDER_val_annotations` folders 7 | to the `Annotation` folders inside the corresponding directories `WIDER_train` and `WIDER_val`. 8 | Also annotation lists `val.txt` and `train.txt` should be copied to `data/WIDERFace` from `WIDER_train_annotations` and `WIDER_val_annotations`. 9 | The directory should be like this: 10 | 11 | ``` 12 | mmdetection 13 | ├── mmdet 14 | ├── tools 15 | ├── configs 16 | ├── data 17 | │ ├── WIDERFace 18 | │ │ ├── WIDER_train 19 | │ | │ ├──0--Parade 20 | │ | │ ├── ... 21 | │ | │ ├── Annotations 22 | │ │ ├── WIDER_val 23 | │ | │ ├──0--Parade 24 | │ | │ ├── ... 25 | │ | │ ├── Annotations 26 | │ │ ├── val.txt 27 | │ │ ├── train.txt 28 | 29 | ``` 30 | 31 | After that you can train the SSD300 on WIDER by launching training with the `ssd300_wider_face.py` config or 32 | create your own config based on the presented one. 33 | -------------------------------------------------------------------------------- /mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import mmcv 4 | import numpy as np 5 | from six.moves import map, zip 6 | 7 | 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 9 | num_imgs = tensor.size(0) 10 | mean = np.array(mean, dtype=np.float32) 11 | std = np.array(std, dtype=np.float32) 12 | imgs = [] 13 | for img_id in range(num_imgs): 14 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 15 | img = mmcv.imdenormalize( 16 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 17 | imgs.append(np.ascontiguousarray(img)) 18 | return imgs 19 | 20 | 21 | def multi_apply(func, *args, **kwargs): 22 | pfunc = partial(func, **kwargs) if kwargs else func 23 | map_results = map(pfunc, *args) 24 | return tuple(map(list, zip(*map_results))) 25 | 26 | 27 | def unmap(data, count, inds, fill=0): 28 | """ Unmap a subset of item (data) back to the original set of items (of 29 | size count) """ 30 | if data.dim() == 1: 31 | ret = data.new_full((count, ), fill) 32 | ret[inds] = data 33 | else: 34 | new_size = (count, ) + data.size()[1:] 35 | ret = data.new_full(new_size, fill) 36 | ret[inds, :] = data 37 | return ret 38 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def split_combined_polys(polys, poly_lens, polys_per_mask): 5 | """Split the combined 1-D polys into masks. 6 | 7 | A mask is represented as a list of polys, and a poly is represented as 8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 9 | tensor. Here we need to split the tensor into original representations. 10 | 11 | Args: 12 | polys (list): a list (length = image num) of 1-D tensors 13 | poly_lens (list): a list (length = image num) of poly length 14 | polys_per_mask (list): a list (length = image num) of poly number 15 | of each mask 16 | 17 | Returns: 18 | list: a list (length = image num) of list (length = mask num) of 19 | list (length = poly num) of numpy array 20 | """ 21 | mask_polys_list = [] 22 | for img_id in range(len(polys)): 23 | polys_single = polys[img_id] 24 | polys_lens_single = poly_lens[img_id].tolist() 25 | polys_per_mask_single = polys_per_mask[img_id].tolist() 26 | 27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 29 | mask_polys_list.append(mask_polys) 30 | return mask_polys_list 31 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assign_sampling.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import assigners, samplers 4 | 5 | 6 | def build_assigner(cfg, **kwargs): 7 | if isinstance(cfg, assigners.BaseAssigner): 8 | return cfg 9 | elif isinstance(cfg, dict): 10 | return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs) 11 | else: 12 | raise TypeError('Invalid type {} for building a sampler'.format( 13 | type(cfg))) 14 | 15 | 16 | def build_sampler(cfg, **kwargs): 17 | if isinstance(cfg, samplers.BaseSampler): 18 | return cfg 19 | elif isinstance(cfg, dict): 20 | return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs) 21 | else: 22 | raise TypeError('Invalid type {} for building a sampler'.format( 23 | type(cfg))) 24 | 25 | 26 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): 27 | bbox_assigner = build_assigner(cfg.assigner) 28 | bbox_sampler = build_sampler(cfg.sampler) 29 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore, 30 | gt_labels) 31 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes, 32 | gt_labels) 33 | return assign_result, sampling_result 34 | -------------------------------------------------------------------------------- /demo/demo.py: -------------------------------------------------------------------------------- 1 | from mmdet.apis import init_detector, inference_detector, show_result 2 | import mmcv 3 | import os 4 | 5 | demopath = os.path.dirname(os.path.realpath(__file__)) 6 | # config_file = 'configs/faster_rcnn_r50_fpn_1x.py' 7 | # checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth' 8 | config_file = 'configs/mask_rcnn_r50_fpn_1x.py' 9 | checkpoint_file = 'checkpoints/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth' 10 | 11 | # build the model from a config file and a checkpoint file 12 | model = init_detector(config_file, checkpoint_file, device='cuda:0') 13 | 14 | # test a single image and show the results 15 | img = os.path.join(demopath, '000000125100.jpg') # or img = mmcv.imread(img), which will only load it once 16 | result = inference_detector(model, img) 17 | show_result(img, result, model.CLASSES) 18 | 19 | # test a list of images and write the results to image files 20 | # imgs = ['test1.jpg', 'test2.jpg'] 21 | # for i, result in enumerate(inference_detector(model, imgs)): 22 | # show_result(imgs[i], result, model.CLASSES, out_file='result_{}.jpg'.format(i)) 23 | 24 | # test a video and show the results 25 | # video = mmcv.VideoReader('video.mp4') 26 | # for frame in video: 27 | # result = inference_detector(model, frame) 28 | # show_result(frame, result, model.CLASSES, wait_time=1) 29 | 30 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | from ..registry import HEADS 2 | from ..utils import ConvModule 3 | from .fcn_mask_head import FCNMaskHead 4 | 5 | 6 | @HEADS.register_module 7 | class HTCMaskHead(FCNMaskHead): 8 | 9 | def __init__(self, *args, **kwargs): 10 | super(HTCMaskHead, self).__init__(*args, **kwargs) 11 | self.conv_res = ConvModule( 12 | self.conv_out_channels, 13 | self.conv_out_channels, 14 | 1, 15 | conv_cfg=self.conv_cfg, 16 | norm_cfg=self.norm_cfg) 17 | 18 | def init_weights(self): 19 | super(HTCMaskHead, self).init_weights() 20 | self.conv_res.init_weights() 21 | 22 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 23 | if res_feat is not None: 24 | res_feat = self.conv_res(res_feat) 25 | x = x + res_feat 26 | for conv in self.convs: 27 | x = conv(x) 28 | res_feat = x 29 | outs = [] 30 | if return_logits: 31 | x = self.upsample(x) 32 | if self.upsample_method == 'deconv': 33 | x = self.relu(x) 34 | mask_pred = self.conv_logits(x) 35 | outs.append(mask_pred) 36 | if return_feat: 37 | outs.append(res_feat) 38 | return outs if len(outs) > 1 else outs[0] 39 | -------------------------------------------------------------------------------- /mmdet/datasets/builder.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from mmdet.utils import build_from_cfg 4 | from .dataset_wrappers import ConcatDataset, RepeatDataset 5 | from .registry import DATASETS 6 | 7 | 8 | def _concat_dataset(cfg): 9 | ann_files = cfg['ann_file'] 10 | img_prefixes = cfg.get('img_prefix', None) 11 | seg_prefixes = cfg.get('seg_prefixes', None) 12 | proposal_files = cfg.get('proposal_file', None) 13 | 14 | datasets = [] 15 | num_dset = len(ann_files) 16 | for i in range(num_dset): 17 | data_cfg = copy.deepcopy(cfg) 18 | data_cfg['ann_file'] = ann_files[i] 19 | if isinstance(img_prefixes, (list, tuple)): 20 | data_cfg['img_prefix'] = img_prefixes[i] 21 | if isinstance(seg_prefixes, (list, tuple)): 22 | data_cfg['seg_prefix'] = seg_prefixes[i] 23 | if isinstance(proposal_files, (list, tuple)): 24 | data_cfg['proposal_file'] = proposal_files[i] 25 | datasets.append(build_dataset(data_cfg)) 26 | 27 | return ConcatDataset(datasets) 28 | 29 | 30 | def build_dataset(cfg): 31 | if cfg['type'] == 'RepeatDataset': 32 | dataset = RepeatDataset(build_dataset(cfg['dataset']), cfg['times']) 33 | elif isinstance(cfg['ann_file'], (list, tuple)): 34 | dataset = _concat_dataset(cfg) 35 | else: 36 | dataset = build_from_cfg(cfg, DATASETS) 37 | 38 | return dataset 39 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to mmdetection 2 | 3 | All kinds of contributions are welcome, including but not limited to the following. 4 | 5 | - Fixes (typo, bugs) 6 | - New features and components 7 | 8 | ## Workflow 9 | 10 | 1. fork and pull the latest mmdetection 11 | 2. checkout a new branch (do not use master branch for PRs) 12 | 3. commit your changes 13 | 4. create a PR 14 | 15 | Note 16 | - If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first. 17 | - If you are the author of some papers and would like to include your method to mmdetection, 18 | please contact Kai Chen (chenkaidev[at]gmail[dot]com). We will much appreciate your contribution. 19 | 20 | ## Code style 21 | 22 | ### Python 23 | We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style. 24 | 25 | We use the following tools for linting and formatting: 26 | - [flake8](http://flake8.pycqa.org/en/latest/): linter 27 | - [yapf](https://github.com/google/yapf): formatter 28 | - [isort](https://github.com/timothycrosley/isort): sort imports 29 | 30 | Style configurations of yapf and isort can be found in [.style.yapf](.style.yapf) and [.isort.cfg](.isort.cfg). 31 | 32 | >Before you create a PR, make sure that your code lints and is formatted by yapf. 33 | 34 | ### C++ and CUDA 35 | We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). -------------------------------------------------------------------------------- /demo/webcam_demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import cv2 4 | import torch 5 | 6 | from mmdet.apis import inference_detector, init_detector, show_result 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='MMDetection webcam demo') 11 | parser.add_argument('config', help='test config file path') 12 | parser.add_argument('checkpoint', help='checkpoint file') 13 | parser.add_argument('--device', type=int, default=0, help='CUDA device id') 14 | parser.add_argument( 15 | '--camera-id', type=int, default=0, help='camera device id') 16 | parser.add_argument( 17 | '--score-thr', type=float, default=0.5, help='bbox score threshold') 18 | args = parser.parse_args() 19 | return args 20 | 21 | 22 | def main(): 23 | args = parse_args() 24 | 25 | model = init_detector( 26 | args.config, args.checkpoint, device=torch.device('cuda', args.device)) 27 | 28 | camera = cv2.VideoCapture(args.camera_id) 29 | 30 | print('Press "Esc", "q" or "Q" to exit.') 31 | while True: 32 | ret_val, img = camera.read() 33 | result = inference_detector(model, img) 34 | 35 | ch = cv2.waitKey(1) 36 | if ch == 27 or ch == ord('q') or ch == ord('Q'): 37 | break 38 | 39 | show_result( 40 | img, result, model.CLASSES, score_thr=args.score_thr, wait_time=1) 41 | 42 | 43 | if __name__ == '__main__': 44 | main() 45 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/error-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Error report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Thanks for your error report and we appreciate it a lot. 11 | 12 | **Checklist** 13 | 1. I have searched related issues but cannot get the expected help. 14 | 2. The bug has not been fixed in the latest version. 15 | 16 | **Describe the bug** 17 | A clear and concise description of what the bug is. 18 | 19 | **Reproduction** 20 | 1. What command or script did you run? 21 | ``` 22 | A placeholder for the command. 23 | ``` 24 | 2. Did you make any modifications on the code or config? Did you understand what you have modified? 25 | 3. What dataset did you use? 26 | 27 | **Environment** 28 | - OS: [e.g., Ubuntu 16.04.6] 29 | - GCC [e.g., 5.4.0] 30 | - PyTorch version [e.g., 1.1.0] 31 | - How you installed PyTorch [e.g., pip, conda, source] 32 | - GPU model [e.g., 1080Ti, V100] 33 | - CUDA and CUDNN version 34 | - [optional] Other information that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.) 35 | 36 | **Error traceback** 37 | If applicable, paste the error trackback here. 38 | ``` 39 | A placeholder for trackback. 40 | ``` 41 | 42 | **Bug fix** 43 | If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated! 44 | -------------------------------------------------------------------------------- /ft_cal_epoch_ablation_for_drawing_compose.txt: -------------------------------------------------------------------------------- 1 | 0.0 0.133 0.214 0.27 2 | 0.04280418980064317 0.20160173382295815 0.1988776074768606 0.22835550743102145 3 | 0.09616575450188941 0.1954467399271925 0.20311018258723598 0.23287101901797713 4 | 0.0979132273568508 0.21812343785761834 0.19653808201066472 0.2312084008481571 5 | 0.11101177281907294 0.2227236030473715 0.1906213960656182 0.22132534522222255 6 | 0.12878948298692902 0.21914006163797856 0.19461092345089875 0.22616635362890605 7 | 0.11844617291278857 0.21264685912320885 0.19239315593933407 0.22711695907796758 8 | 0.12709551283158493 0.21645399522004918 0.19448744166679882 0.21668781745355545 9 | 0.12406608517994656 0.22257170647785493 0.19644651999355758 0.2241675539773186 10 | 0.13204309939334583 0.22897236956240652 0.19264401188011646 0.22119623077589495 11 | 0.11417922262250052 0.22094165409452865 0.19157754172376854 0.2194968263538914 12 | 0.12809983556991092 0.23119536979857622 0.19368227690676004 0.22163935901414147 13 | 0.12632375300126727 0.22515345220504207 0.19510491178599484 0.22847079536768966 14 | 0.12250187918578637 0.23654185924390544 0.19835340748216393 0.22237704021269583 15 | 0.10328880702569189 0.23741552100789903 0.19897538729486794 0.22950141235625374 16 | 0.11183935930906523 0.23645881293063528 0.2007864199513981 0.22948890723346893 17 | 0.11545512563393455 0.23961815994386487 0.20013024142538005 0.22691280524591614 18 | 0.11456224300468426 0.23811144384852884 0.20343264275148323 0.22817559018420883 -------------------------------------------------------------------------------- /tools/draw_pr_recall_bar_lviscoco.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | 6 | labels = ['Proposal Recall@1k', 'AP'] 7 | men_means = [55.9, 32.8] 8 | women_means = [51.0, 18.0] 9 | 10 | x = np.arange(len(labels)) # the label locations 11 | width = 0.35 # the width of the bars 12 | 13 | matplotlib.rcParams.update({'font.size': 18}) 14 | plt.rc('ytick', labelsize=10) 15 | 16 | fig, ax = plt.subplots() 17 | rects1 = ax.bar(x - width/2, men_means, width, label='COCO') 18 | rects2 = ax.bar(x + width/2, women_means, width, label='LVIS') 19 | 20 | # Add some text for labels, title and custom x-axis tick labels, etc. 21 | # ax.set_ylabel('Scores') 22 | # ax.set_title('Scores by group and gender') 23 | ax.set_ylim([0,65]) 24 | ax.set_xticks(x) 25 | ax.set_xticklabels(labels) 26 | ax.legend() 27 | 28 | 29 | def autolabel(rects): 30 | """Attach a text label above each bar in *rects*, displaying its height.""" 31 | for rect in rects: 32 | height = rect.get_height() 33 | ax.annotate('{}'.format(height), 34 | xy=(rect.get_x() + rect.get_width() / 2, height), 35 | xytext=(0, 3), # 3 points vertical offset 36 | textcoords="offset points", 37 | ha='center', va='bottom') 38 | 39 | 40 | autolabel(rects1) 41 | autolabel(rects2) 42 | 43 | fig.tight_layout() 44 | plt.savefig('coco_lvis_pr_recall_bar.eps', format='eps', dpi=1000) 45 | plt.show() 46 | -------------------------------------------------------------------------------- /mmdet/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | 6 | from .registry import DATASETS 7 | from .xml_style import XMLDataset 8 | 9 | 10 | @DATASETS.register_module 11 | class WIDERFaceDataset(XMLDataset): 12 | """ 13 | Reader for the WIDER Face dataset in PASCAL VOC format. 14 | Conversion scripts can be found in 15 | https://github.com/sovrasov/wider-face-pascal-voc-annotations 16 | """ 17 | CLASSES = ('face', ) 18 | 19 | def __init__(self, **kwargs): 20 | super(WIDERFaceDataset, self).__init__(**kwargs) 21 | 22 | def load_annotations(self, ann_file): 23 | img_infos = [] 24 | img_ids = mmcv.list_from_file(ann_file) 25 | for img_id in img_ids: 26 | filename = '{}.jpg'.format(img_id) 27 | xml_path = osp.join(self.img_prefix, 'Annotations', 28 | '{}.xml'.format(img_id)) 29 | tree = ET.parse(xml_path) 30 | root = tree.getroot() 31 | size = root.find('size') 32 | width = int(size.find('width').text) 33 | height = int(size.find('height').text) 34 | folder = root.find('folder').text 35 | img_infos.append( 36 | dict( 37 | id=img_id, 38 | filename=osp.join(folder, filename), 39 | width=width, 40 | height=height)) 41 | 42 | return img_infos 43 | -------------------------------------------------------------------------------- /mmdet/models/losses/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from ..registry import LOSSES 5 | from .utils import weighted_loss 6 | 7 | 8 | @weighted_loss 9 | def smooth_l1_loss(pred, target, beta=1.0): 10 | assert beta > 0 11 | assert pred.size() == target.size() and target.numel() > 0 12 | diff = torch.abs(pred - target) 13 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, 14 | diff - 0.5 * beta) 15 | return loss 16 | 17 | 18 | @LOSSES.register_module 19 | class SmoothL1Loss(nn.Module): 20 | 21 | def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0): 22 | super(SmoothL1Loss, self).__init__() 23 | self.beta = beta 24 | self.reduction = reduction 25 | self.loss_weight = loss_weight 26 | 27 | def forward(self, 28 | pred, 29 | target, 30 | weight=None, 31 | avg_factor=None, 32 | reduction_override=None, 33 | **kwargs): 34 | assert reduction_override in (None, 'none', 'mean', 'sum') 35 | reduction = ( 36 | reduction_override if reduction_override else self.reduction) 37 | loss_bbox = self.loss_weight * smooth_l1_loss( 38 | pred, 39 | target, 40 | weight, 41 | beta=self.beta, 42 | reduction=reduction, 43 | avg_factor=avg_factor, 44 | **kwargs) 45 | return loss_bbox 46 | -------------------------------------------------------------------------------- /tools/upgrade_model_version.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import re 3 | from collections import OrderedDict 4 | 5 | import torch 6 | 7 | 8 | def convert(in_file, out_file): 9 | """Convert keys in checkpoints. 10 | 11 | There can be some breaking changes during the development of mmdetection, 12 | and this tool is used for upgrading checkpoints trained with old versions 13 | to the latest one. 14 | """ 15 | checkpoint = torch.load(in_file) 16 | in_state_dict = checkpoint.pop('state_dict') 17 | out_state_dict = OrderedDict() 18 | for key, val in in_state_dict.items(): 19 | # Use ConvModule instead of nn.Conv2d in RetinaNet 20 | # cls_convs.0.weight -> cls_convs.0.conv.weight 21 | m = re.search(r'(cls_convs|reg_convs).\d.(weight|bias)', key) 22 | if m is not None: 23 | param = m.groups()[1] 24 | new_key = key.replace(param, 'conv.{}'.format(param)) 25 | out_state_dict[new_key] = val 26 | continue 27 | 28 | out_state_dict[key] = val 29 | checkpoint['state_dict'] = out_state_dict 30 | torch.save(checkpoint, out_file) 31 | 32 | 33 | def main(): 34 | parser = argparse.ArgumentParser(description='Upgrade model version') 35 | parser.add_argument('in_file', help='input checkpoint file') 36 | parser.add_argument('out_file', help='output checkpoint file') 37 | args = parser.parse_args() 38 | convert(args.in_file, args.out_file) 39 | 40 | 41 | if __name__ == '__main__': 42 | main() 43 | -------------------------------------------------------------------------------- /configs/ghm/README.md: -------------------------------------------------------------------------------- 1 | # Gradient Harmonized Single-stage Detector 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{li2019gradient, 7 | title={Gradient Harmonized Single-stage Detector}, 8 | author={Li, Buyu and Liu, Yu and Wang, Xiaogang}, 9 | booktitle={AAAI Conference on Artificial Intelligence}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 17 | | :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :------: | 18 | | R-50-FPN | pytorch | 1x | 3.9 | 0.500 | 9.4 | 36.9 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r50_fpn_1x_20190608-b9aa5862.pth) | 19 | | R-101-FPN | pytorch | 1x | 5.8 | 0.625 | 8.5 | 39.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r101_fpn_1x_20190608-b885b74a.pth) | 20 | | X-101-32x4d-FPN | pytorch | 1x | 7.0 | 0.818 | 7.6 | 40.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_32x4d_fpn_1x_20190608-ed295d22.pth) | 21 | | X-101-64x4d-FPN | pytorch | 1x | 9.9 | 1.191 | 6.1 | 41.6 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_64x4d_fpn_1x_20190608-7f2037ce.pth) | -------------------------------------------------------------------------------- /ft_cal_epoch_ablation_for_drawing.txt: -------------------------------------------------------------------------------- 1 | 0.0 0.133 0.214 0.27 0.18 2 | 0.030845771144278604 0.1902208185426685 0.1988776074768606 0.22835550743102145 0.203 3 | 0.10822619575390374 0.2060170475260656 0.20311018258723598 0.23287101901797713 0.202 4 | 0.12556453939637033 0.22611426962951972 0.19653808201066472 0.2312084008481571 0.211 5 | 0.12397418846362247 0.20782131406074886 0.1906213960656182 0.22132534522222255 0.214 6 | 0.09298596704020082 0.19833452971738785 0.19461092345089875 0.22616635362890605 0.214 7 | 0.11705334712575734 0.22270016140556695 0.19239315593933407 0.22711695907796758 0.209 8 | 0.10725190918690512 0.2091746843401387 0.19448744166679882 0.21668781745355545 0.210 9 | 0.11928969016304615 0.2177085097241877 0.19644651999355758 0.2241675539773186 0.214 10 | 0.12043408881211713 0.21771775526842294 0.19264401188011646 0.22119623077589495 0.216 11 | 0.10610914582929507 0.21603875815042728 0.19157754172376854 0.2194968263538914 0.212 12 | 0.11457422927793845 0.2221024827401525 0.19368227690676004 0.22163935901414147 0.216 13 | 0.12398472309917556 0.22479705199942607 0.19510491178599484 0.22847079536768966 0.215 14 | 0.11924409072037266 0.22496923696141857 0.19835340748216393 0.22237704021269583 0.218 15 | 0.10200808193825778 0.22646670710206385 0.19897538729486794 0.22950141235625374 0.218 16 | 0.110377567607507 0.23374143119513724 0.2007864199513981 0.22948890723346893 0.219 17 | 0.11416522995583138 0.23100215512937564 0.20013024142538005 0.22691280524591614 0.220 18 | 0.09514752967834095 0.2308715098124401 0.20343264275148323 0.22817559018420883 -------------------------------------------------------------------------------- /ft_cat_epoch_ablation_for_drawing.txt: -------------------------------------------------------------------------------- 1 | 0.0 0.133 0.214 0.27 0.18 2 | 0.04280418980064317 0.20160173382295815 0.200568307268474 0.2671032643413395 0.203 3 | 0.09616575450188941 0.1954467399271925 0.1943992314872916 0.2653585561754797 0.202 4 | 0.0979132273568508 0.21812343785761834 0.19578938849837843 0.26581880358174925 0.211 5 | 0.11101177281907294 0.2227236030473715 0.19580531795819484 0.2652209941015451 0.214 6 | 0.12878948298692902 0.21914006163797856 0.19480494761560097 0.2652357706376281 0.214 7 | 0.11844617291278857 0.21264685912320885 0.19098514971537514 0.2634918945035475 0.209 8 | 0.12709551283158493 0.21645399522004918 0.19015205931533305 0.26332312509347844 0.210 9 | 0.12406608517994656 0.22257170647785493 0.19399217829271048 0.26501951230658677 0.214 10 | 0.13204309939334583 0.22897236956240652 0.1913758157437348 0.2639267886854602 0.216 11 | 0.11417922262250052 0.22094165409452865 0.1916112849432229 0.2636358997450702 0.212 12 | 0.12809983556991092 0.23119536979857622 0.1915855842657778 0.2640212324643147 0.216 13 | 0.12632375300126727 0.22515345220504207 0.1943042860285571 0.26524881489714663 0.215 14 | 0.12250187918578637 0.23654185924390544 0.19151705754236964 0.26410391351880164 0.218 15 | 0.10328880702569189 0.23741552100789903 0.19546495169655584 0.2653966386186115 0.218 16 | 0.11183935930906523 0.23645881293063528 0.1952282774372787 0.26547618736841405 0.219 17 | 0.11545512563393455 0.23961815994386487 0.19514162398787996 0.26499435229124235 0.220 18 | 0.11456224300468426 0.23811144384852884 0.19497220837245285 0.26527226129414966 0.219 -------------------------------------------------------------------------------- /tools/configs/ghm/README.md: -------------------------------------------------------------------------------- 1 | # Gradient Harmonized Single-stage Detector 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{li2019gradient, 7 | title={Gradient Harmonized Single-stage Detector}, 8 | author={Li, Buyu and Liu, Yu and Wang, Xiaogang}, 9 | booktitle={AAAI Conference on Artificial Intelligence}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 17 | | :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :------: | 18 | | R-50-FPN | pytorch | 1x | 3.9 | 0.500 | 9.4 | 36.9 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r50_fpn_1x_20190608-b9aa5862.pth) | 19 | | R-101-FPN | pytorch | 1x | 5.8 | 0.625 | 8.5 | 39.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r101_fpn_1x_20190608-b885b74a.pth) | 20 | | X-101-32x4d-FPN | pytorch | 1x | 7.0 | 0.818 | 7.6 | 40.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_32x4d_fpn_1x_20190608-ed295d22.pth) | 21 | | X-101-64x4d-FPN | pytorch | 1x | 9.9 | 1.191 | 6.1 | 41.6 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_64x4d_fpn_1x_20190608-7f2037ce.pth) | -------------------------------------------------------------------------------- /mmdet/models/utils/conv_ws.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | def conv_ws_2d(input, 6 | weight, 7 | bias=None, 8 | stride=1, 9 | padding=0, 10 | dilation=1, 11 | groups=1, 12 | eps=1e-5): 13 | c_in = weight.size(0) 14 | weight_flat = weight.view(c_in, -1) 15 | mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) 16 | std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) 17 | weight = (weight - mean) / (std + eps) 18 | return F.conv2d(input, weight, bias, stride, padding, dilation, groups) 19 | 20 | 21 | class ConvWS2d(nn.Conv2d): 22 | 23 | def __init__(self, 24 | in_channels, 25 | out_channels, 26 | kernel_size, 27 | stride=1, 28 | padding=0, 29 | dilation=1, 30 | groups=1, 31 | bias=True, 32 | eps=1e-5): 33 | super(ConvWS2d, self).__init__( 34 | in_channels, 35 | out_channels, 36 | kernel_size, 37 | stride=stride, 38 | padding=padding, 39 | dilation=dilation, 40 | groups=groups, 41 | bias=bias) 42 | self.eps = eps 43 | 44 | def forward(self, x): 45 | return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, 46 | self.dilation, self.groups, self.eps) 47 | -------------------------------------------------------------------------------- /cls_head_models/2fc.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import torch.nn.functional as F 5 | ############################################################## 6 | ### Code to compute batch counts and means 7 | ############################################################## 8 | 9 | class feat_extractor(torch.nn.Module): 10 | def __init__(self, input_shape = [256, 7, 7], hidden_dim=512): 11 | super(feat_extractor, self).__init__() 12 | 13 | self.cls_last_dim = input_shape[0]*input_shape[1]*input_shape[2] 14 | 15 | self.fc1 = nn.Linear(self.cls_last_dim, hidden_dim) 16 | self.fc2 = nn.Linear(hidden_dim, hidden_dim) 17 | 18 | def forward(self, x): 19 | x = x.view(x.size(0), -1) 20 | x = self.fc1(x) 21 | x = F.relu(x) 22 | x = self.fc2(x) 23 | 24 | return x 25 | 26 | class simple2fc(torch.nn.Module): 27 | 28 | def __init__(self, num_classes=1231): 29 | super(simple2fc, self).__init__() 30 | 31 | self.feat_extractor = feat_extractor(hidden_dim=num_classes).cuda() 32 | 33 | def forward(self, input): 34 | logits = self.feat_extractor(input) 35 | return logits 36 | 37 | 38 | 39 | # def ncm_sq_dist_bt_norm(a,b): 40 | # anorm = tf.reshape(tf.reduce_sum(tf.square(a), 1),[-1, 1]) 41 | # bnorm = tf.reshape(tf.reduce_sum(tf.square(b), 0),[1, -1]) 42 | # d = -2*tf.matmul(a,b,transpose_b=False)+anorm + bnorm 43 | # return d, anorm 44 | # 45 | # def ncm_sq_dist_bt(a,b): 46 | # d, bnorm = ncm_sq_dist_bt_norm(a,b) 47 | # return d 48 | 49 | -------------------------------------------------------------------------------- /lvis_api/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019, Agrim Gupta and Ross Girshick 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 | 24 | The views and conclusions contained in the software and documentation are those 25 | of the authors and should not be interpreted as representing official policies, 26 | either expressed or implied, of the FreeBSD Project. 27 | -------------------------------------------------------------------------------- /cls_head_models/simple2fc.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import torch.nn.functional as F 5 | ############################################################## 6 | ### Code to compute batch counts and means 7 | ############################################################## 8 | 9 | class feat_extractor(torch.nn.Module): 10 | def __init__(self, input_shape = [256, 7, 7], hidden_dim=512, num_classes=1231): 11 | super(feat_extractor, self).__init__() 12 | 13 | self.cls_last_dim = input_shape[0]*input_shape[1]*input_shape[2] 14 | 15 | self.fc1 = nn.Linear(self.cls_last_dim, hidden_dim) 16 | self.fc2 = nn.Linear(hidden_dim, num_classes) 17 | 18 | def forward(self, x): 19 | x = x.view(x.size(0), -1) 20 | x = self.fc1(x) 21 | x = F.relu(x) 22 | x = self.fc2(x) 23 | 24 | return x 25 | 26 | class simple2fc(torch.nn.Module): 27 | 28 | def __init__(self, num_classes=1231): 29 | super(simple2fc, self).__init__() 30 | 31 | self.feat_extractor = feat_extractor(hidden_dim=1024, num_classes=num_classes).cuda() 32 | 33 | def forward(self, input): 34 | logits = self.feat_extractor(input) 35 | return logits 36 | 37 | 38 | 39 | # def ncm_sq_dist_bt_norm(a,b): 40 | # anorm = tf.reshape(tf.reduce_sum(tf.square(a), 1),[-1, 1]) 41 | # bnorm = tf.reshape(tf.reduce_sum(tf.square(b), 0),[1, -1]) 42 | # d = -2*tf.matmul(a,b,transpose_b=False)+anorm + bnorm 43 | # return d, anorm 44 | # 45 | # def ncm_sq_dist_bt(a,b): 46 | # d, bnorm = ncm_sq_dist_bt_norm(a,b) 47 | # return d 48 | 49 | -------------------------------------------------------------------------------- /tools/draw_eAP_sensitivity_binnum.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import math 6 | from matplotlib.ticker import FormatStrFormatter 7 | from matplotlib import scale as mscale 8 | from matplotlib import transforms as mtransforms 9 | 10 | 11 | epoch_results = [[51, 250, 276, 253], [62, 278, 276, 214], [67, 298, 284, 181], [71, 325, 280, 154], [77, 348, 282, 123], [81, 362, 289, 98], [85, 377, 297, 71], [90, 386, 301, 53]] 12 | 13 | epoch_results_array = np.array(epoch_results).astype(np.float) 14 | z = [8,9,10,11,12,13,14,15] 15 | # z = [0,1,2,3,4,5,6,7,8,9] 16 | 17 | eAP = epoch_results_array[:, :4].mean(axis=1).tolist() 18 | bin1 = epoch_results_array[:, 0].tolist() 19 | bin2 = epoch_results_array[:, 1].tolist() 20 | bin3 = epoch_results_array[:, 2].tolist() 21 | bin4 = epoch_results_array[:, 3].tolist() 22 | 23 | fig = plt.figure(figsize=(8,5)) 24 | ax1 = fig.add_subplot(111) 25 | 26 | matplotlib.rcParams.update({'font.size': 12}) 27 | ax1.plot(z, bin4, marker='o', linewidth=2, label='class number in bin [f^3, -)') 28 | ax1.plot(z, bin3, marker='o', linewidth=2, label='class number in bin [f^2, f^3)') 29 | ax1.plot(z, bin2, marker='o', linewidth=2, label='class number in bin [f, f^2)') 30 | ax1.plot(z, bin1, marker='o', linewidth=2, label='class number in bin (0, f)') 31 | 32 | 33 | 34 | # ax1.plot([0],[15.4], 'D', color = 'green') 35 | 36 | plt.xlabel('calibration steps (k)', size=16) 37 | plt.ylabel('AP or eAP', size=16) 38 | # ax1.set_xscale('log') 39 | 40 | plt.legend( loc='best') 41 | 42 | plt.grid() 43 | plt.savefig('eap_sensitivity_binnum.eps', format='eps', dpi=1000) 44 | plt.show() 45 | 46 | 47 | -------------------------------------------------------------------------------- /mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import torch 4 | from torch.nn.modules.utils import _pair 5 | 6 | 7 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 8 | cfg): 9 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 10 | mask_targets = map(mask_target_single, pos_proposals_list, 11 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 12 | mask_targets = torch.cat(list(mask_targets)) 13 | return mask_targets 14 | 15 | 16 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 17 | mask_size = _pair(cfg.mask_size) 18 | num_pos = pos_proposals.size(0) 19 | mask_targets = [] 20 | if num_pos > 0: 21 | proposals_np = pos_proposals.cpu().numpy() 22 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 23 | for i in range(num_pos): 24 | gt_mask = gt_masks[pos_assigned_gt_inds[i]] 25 | bbox = proposals_np[i, :].astype(np.int32) 26 | x1, y1, x2, y2 = bbox 27 | w = np.maximum(x2 - x1 + 1, 1) 28 | h = np.maximum(y2 - y1 + 1, 1) 29 | # mask is uint8 both before and after resizing 30 | # mask_size (h, w) to (w, h) 31 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], 32 | mask_size[::-1]) 33 | mask_targets.append(target) 34 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( 35 | pos_proposals.device) 36 | else: 37 | mask_targets = pos_proposals.new_zeros((0, ) + mask_size) 38 | return mask_targets 39 | -------------------------------------------------------------------------------- /mmdet/models/utils/weight_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 6 | assert distribution in ['uniform', 'normal'] 7 | if distribution == 'uniform': 8 | nn.init.xavier_uniform_(module.weight, gain=gain) 9 | else: 10 | nn.init.xavier_normal_(module.weight, gain=gain) 11 | if hasattr(module, 'bias'): 12 | nn.init.constant_(module.bias, bias) 13 | 14 | 15 | def normal_init(module, mean=0, std=1, bias=0): 16 | nn.init.normal_(module.weight, mean, std) 17 | if hasattr(module, 'bias'): 18 | nn.init.constant_(module.bias, bias) 19 | 20 | 21 | def uniform_init(module, a=0, b=1, bias=0): 22 | nn.init.uniform_(module.weight, a, b) 23 | if hasattr(module, 'bias'): 24 | nn.init.constant_(module.bias, bias) 25 | 26 | 27 | def kaiming_init(module, 28 | mode='fan_out', 29 | nonlinearity='relu', 30 | bias=0, 31 | distribution='normal'): 32 | assert distribution in ['uniform', 'normal'] 33 | if distribution == 'uniform': 34 | nn.init.kaiming_uniform_( 35 | module.weight, mode=mode, nonlinearity=nonlinearity) 36 | else: 37 | nn.init.kaiming_normal_( 38 | module.weight, mode=mode, nonlinearity=nonlinearity) 39 | if hasattr(module, 'bias'): 40 | nn.init.constant_(module.bias, bias) 41 | 42 | 43 | def bias_init_with_prob(prior_prob): 44 | """ initialize conv/fc bias value according to giving probablity""" 45 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 46 | return bias_init 47 | -------------------------------------------------------------------------------- /cls_head_models/simple3fc.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import torch.nn.functional as F 5 | 6 | class feat_classifier(torch.nn.Module): 7 | def __init__(self, num_classes, input_shape = [256, 7, 7], hidden_dim=1024, cls_fc_bias=True): 8 | super(feat_classifier, self).__init__() 9 | 10 | self.cls_last_dim = input_shape[0]*input_shape[1]*input_shape[2] 11 | 12 | self.fc1 = nn.Linear(self.cls_last_dim, hidden_dim) 13 | self.fc2 = nn.Linear(hidden_dim, hidden_dim) 14 | self.fc_classifier = nn.Linear(hidden_dim, num_classes, bias=cls_fc_bias) 15 | 16 | def forward(self, x): 17 | x = x.view(x.size(0), -1) 18 | x = self.fc1(x) 19 | x = F.relu(x) 20 | x = self.fc2(x) 21 | x = F.relu(x) 22 | x = self.fc_classifier(x) 23 | 24 | return x 25 | 26 | class simple3fc(torch.nn.Module): 27 | 28 | def __init__(self, num_classes=1231, cls_fc_bias=True): 29 | super(simple3fc, self).__init__() 30 | 31 | self.feat_classifier = feat_classifier(num_classes=num_classes, hidden_dim=1024, cls_fc_bias=cls_fc_bias).cuda() 32 | 33 | def forward(self, input): 34 | logits = self.feat_classifier(input) 35 | return logits 36 | 37 | 38 | 39 | # def ncm_sq_dist_bt_norm(a,b): 40 | # anorm = tf.reshape(tf.reduce_sum(tf.square(a), 1),[-1, 1]) 41 | # bnorm = tf.reshape(tf.reduce_sum(tf.square(b), 0),[1, -1]) 42 | # d = -2*tf.matmul(a,b,transpose_b=False)+anorm + bnorm 43 | # return d, anorm 44 | # 45 | # def ncm_sq_dist_bt(a,b): 46 | # d, bnorm = ncm_sq_dist_bt_norm(a,b) 47 | # return d 48 | 49 | -------------------------------------------------------------------------------- /instaboost/config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class InstaBoostConfig: 5 | def __init__(self, action_candidate: tuple = ('normal', 'horizontal', 'skip'), 6 | action_prob: tuple = (1, 0, 0), scale: tuple = (0.8, 1.2), dx: float = 15, dy: float = 15, 7 | theta=(-1, 1), color_prob=0.5, heatmap_flag=False): 8 | """ 9 | :param action_candidate: tuple of action candidates. 'normal', 'horizontal', 'vertical', 'skip' are supported 10 | :param action_prob: tuple of corresponding action probabilities. Should be the same length as action_candidate 11 | :param scale: tuple of (min scale, max scale) 12 | :param dx: the maximum x-axis shift will be (instance width) / dx 13 | :param dy: the maximum y-axis shift will be (instance height) / dy 14 | :param theta: tuple of (min rotation degree, max rotation degree) 15 | :param color_prob: the probability of images for color augmentation 16 | :param heatmap_flag: whether to use heatmap guided 17 | """ 18 | assert len(action_candidate) == len(action_prob), 'Candidate & probability length mismatch' 19 | assert np.sum(action_prob) == 1, 'Probability must sum to 1' 20 | assert len(scale) == 2, 'scale should have 2 items (min scale, max scale)' 21 | assert len(theta) == 2, 'theta should have 2 items (min theta, max theta)' 22 | 23 | self.action_candidate = np.array(action_candidate) 24 | self.action_prob = np.array(action_prob) 25 | self.scale = scale 26 | self.dx = dx 27 | self.dy = dy 28 | self.theta = theta 29 | self.color_prob = color_prob 30 | self.heatmap_flag = heatmap_flag 31 | -------------------------------------------------------------------------------- /instaboost/.svn/pristine/9a/9a5e7d16f998e55c36ca225a6b41887910f19387.svn-base: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class InstaBoostConfig: 5 | def __init__(self, action_candidate: tuple = ('normal', 'horizontal', 'skip'), 6 | action_prob: tuple = (1, 0, 0), scale: tuple = (0.8, 1.2), dx: float = 15, dy: float = 15, 7 | theta=(-1, 1), color_prob=0.5, heatmap_flag=False): 8 | """ 9 | :param action_candidate: tuple of action candidates. 'normal', 'horizontal', 'vertical', 'skip' are supported 10 | :param action_prob: tuple of corresponding action probabilities. Should be the same length as action_candidate 11 | :param scale: tuple of (min scale, max scale) 12 | :param dx: the maximum x-axis shift will be (instance width) / dx 13 | :param dy: the maximum y-axis shift will be (instance height) / dy 14 | :param theta: tuple of (min rotation degree, max rotation degree) 15 | :param color_prob: the probability of images for color augmentation 16 | :param heatmap_flag: whether to use heatmap guided 17 | """ 18 | assert len(action_candidate) == len(action_prob), 'Candidate & probability length mismatch' 19 | assert np.sum(action_prob) == 1, 'Probability must sum to 1' 20 | assert len(scale) == 2, 'scale should have 2 items (min scale, max scale)' 21 | assert len(theta) == 2, 'theta should have 2 items (min theta, max theta)' 22 | 23 | self.action_candidate = np.array(action_candidate) 24 | self.action_prob = np.array(action_prob) 25 | self.scale = scale 26 | self.dx = dx 27 | self.dy = dy 28 | self.theta = theta 29 | self.color_prob = color_prob 30 | self.heatmap_flag = heatmap_flag 31 | -------------------------------------------------------------------------------- /tools/draw_ft_epoch_ablation.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import math 6 | from matplotlib.ticker import FormatStrFormatter 7 | from matplotlib import scale as mscale 8 | from matplotlib import transforms as mtransforms 9 | 10 | # z = [0,0.1,0.3,0.9,1,2,5] 11 | # z = list(range(0, 30000, 1000)) 12 | # with open('./ft_cat_epoch_ablation_for_drawing.txt', 'r') as f: 13 | with open('./ft_cal_epoch_ablation_for_drawing_compose.txt', 'r') as f: 14 | epoch_results = f.readlines() 15 | epoch_results = [i.strip().split(' ') for i in epoch_results] 16 | epoch_results_array = np.array(epoch_results).astype(np.float) 17 | z = [0,1,2,3,4,5,6,7,8,9,10,11,13,15,20,25,30,35] 18 | # z = [0,1,2,3,4,5,6,7,8,9] 19 | 20 | eAP = epoch_results_array[:, :4].mean(axis=1).tolist() 21 | bin1 = epoch_results_array[:, 0].tolist() 22 | bin2 = epoch_results_array[:, 1].tolist() 23 | bin3 = epoch_results_array[:, 2].tolist() 24 | bin4 = epoch_results_array[:, 3].tolist() 25 | 26 | fig = plt.figure(figsize=(8,5)) 27 | ax1 = fig.add_subplot(111) 28 | 29 | matplotlib.rcParams.update({'font.size': 14}) 30 | ax1.plot(z, bin4, marker='o', linewidth=2, label='AP of class bin [1000, -)') 31 | ax1.plot(z, bin3, marker='o', linewidth=2, label='AP of class bin [100, 1000)') 32 | ax1.plot(z, bin2, marker='o', linewidth=2, label='AP of class bin [10, 100)') 33 | ax1.plot(z, bin1, marker='o', linewidth=2, label='AP of class bin (0, 10)') 34 | 35 | ax1.plot(z, eAP, linestyle='-', marker='o', linewidth=2, label='bAP') 36 | 37 | 38 | # ax1.plot([0],[15.4], 'D', color = 'green') 39 | 40 | plt.xlabel('calibration steps (k)', size=16) 41 | plt.ylabel('AP or bAP', size=16) 42 | # ax1.set_xscale('log') 43 | 44 | plt.legend( loc='best') 45 | 46 | plt.grid() 47 | plt.savefig('ablation_cal_steps.eps', format='eps', dpi=1000) 48 | plt.show() 49 | 50 | 51 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 32 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 34 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 35 | for i in range(bboxes1.shape[0]): 36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 41 | y_end - y_start + 1, 0) 42 | if mode == 'iou': 43 | union = area1[i] + area2 - overlap 44 | else: 45 | union = area1[i] if not exchange else area2 46 | ious[i, :] = overlap / union 47 | if exchange: 48 | ious = ious.T 49 | return ious 50 | -------------------------------------------------------------------------------- /mmdet/datasets/dataset_wrappers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | from .registry import DATASETS 5 | 6 | 7 | @DATASETS.register_module 8 | class ConcatDataset(_ConcatDataset): 9 | """A wrapper of concatenated dataset. 10 | 11 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 12 | concat the group flag for image aspect ratio. 13 | 14 | Args: 15 | datasets (list[:obj:`Dataset`]): A list of datasets. 16 | """ 17 | 18 | def __init__(self, datasets): 19 | super(ConcatDataset, self).__init__(datasets) 20 | self.CLASSES = datasets[0].CLASSES 21 | if hasattr(datasets[0], 'flag'): 22 | flags = [] 23 | for i in range(0, len(datasets)): 24 | flags.append(datasets[i].flag) 25 | self.flag = np.concatenate(flags) 26 | 27 | 28 | @DATASETS.register_module 29 | class RepeatDataset(object): 30 | """A wrapper of repeated dataset. 31 | 32 | The length of repeated dataset will be `times` larger than the original 33 | dataset. This is useful when the data loading time is long but the dataset 34 | is small. Using RepeatDataset can reduce the data loading time between 35 | epochs. 36 | 37 | Args: 38 | dataset (:obj:`Dataset`): The dataset to be repeated. 39 | times (int): Repeat times. 40 | """ 41 | 42 | def __init__(self, dataset, times): 43 | self.dataset = dataset 44 | self.times = times 45 | self.CLASSES = dataset.CLASSES 46 | if hasattr(self.dataset, 'flag'): 47 | self.flag = np.tile(self.dataset.flag, times) 48 | 49 | self._ori_len = len(self.dataset) 50 | 51 | def __getitem__(self, idx): 52 | return self.dataset[idx % self._ori_len] 53 | 54 | def __len__(self): 55 | return self.times * self._ori_len 56 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from . import sigmoid_focal_loss_cuda 6 | 7 | 8 | class SigmoidFocalLossFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx, input, target, gamma=2.0, alpha=0.25): 12 | ctx.save_for_backward(input, target) 13 | num_classes = input.shape[1] 14 | ctx.num_classes = num_classes 15 | ctx.gamma = gamma 16 | ctx.alpha = alpha 17 | 18 | loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes, 19 | gamma, alpha) 20 | return loss 21 | 22 | @staticmethod 23 | @once_differentiable 24 | def backward(ctx, d_loss): 25 | input, target = ctx.saved_tensors 26 | num_classes = ctx.num_classes 27 | gamma = ctx.gamma 28 | alpha = ctx.alpha 29 | d_loss = d_loss.contiguous() 30 | d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss, 31 | num_classes, gamma, alpha) 32 | return d_input, None, None, None, None 33 | 34 | 35 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply 36 | 37 | 38 | # TODO: remove this module 39 | class SigmoidFocalLoss(nn.Module): 40 | 41 | def __init__(self, gamma, alpha): 42 | super(SigmoidFocalLoss, self).__init__() 43 | self.gamma = gamma 44 | self.alpha = alpha 45 | 46 | def forward(self, logits, targets): 47 | assert logits.is_cuda 48 | loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha) 49 | return loss.sum() 50 | 51 | def __repr__(self): 52 | tmpstr = self.__class__.__name__ + '(gamma={}, alpha={})'.format( 53 | self.gamma, self.alpha) 54 | return tmpstr 55 | -------------------------------------------------------------------------------- /configs/grid_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Grid R-CNN 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{lu2019grid, 7 | title={Grid r-cnn}, 8 | author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie}, 9 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 10 | year={2019} 11 | } 12 | 13 | @article{lu2019grid, 14 | title={Grid R-CNN Plus: Faster and Better}, 15 | author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie}, 16 | journal={arXiv preprint arXiv:1906.05688}, 17 | year={2019} 18 | } 19 | ``` 20 | 21 | ## Results and Models 22 | 23 | | Backbone | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 24 | |:-----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 25 | | R-50 | 2x | 4.8 | 1.172 | 10.9 | 40.3 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x_20190619-5b29cf9d.pth) | 26 | | R-101 | 2x | 6.7 | 1.214 | 10.0 | 41.7 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r101_fpn_2x_20190619-a4b61645.pth) | 27 | | X-101-32x4d | 2x | 8.0 | 1.335 | 8.5 | 43.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x_20190619-0bbfd87a.pth) | 28 | | X-101-64x4d | 2x | 10.9 | 1.753 | 6.4 | 43.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_64x4d_fpn_2x_20190619-8f4e20bb.pth) | 29 | 30 | **Notes:** 31 | - All models are trained with 8 GPUs instead of 32 GPUs in the original paper. 32 | - The warming up lasts for 1 epoch and `2x` here indicates 25 epochs. 33 | -------------------------------------------------------------------------------- /tools/configs/grid_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Grid R-CNN 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{lu2019grid, 7 | title={Grid r-cnn}, 8 | author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie}, 9 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 10 | year={2019} 11 | } 12 | 13 | @article{lu2019grid, 14 | title={Grid R-CNN Plus: Faster and Better}, 15 | author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie}, 16 | journal={arXiv preprint arXiv:1906.05688}, 17 | year={2019} 18 | } 19 | ``` 20 | 21 | ## Results and Models 22 | 23 | | Backbone | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 24 | |:-----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 25 | | R-50 | 2x | 4.8 | 1.172 | 10.9 | 40.3 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x_20190619-5b29cf9d.pth) | 26 | | R-101 | 2x | 6.7 | 1.214 | 10.0 | 41.7 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r101_fpn_2x_20190619-a4b61645.pth) | 27 | | X-101-32x4d | 2x | 8.0 | 1.335 | 8.5 | 43.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x_20190619-0bbfd87a.pth) | 28 | | X-101-64x4d | 2x | 10.9 | 1.753 | 6.4 | 43.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_64x4d_fpn_2x_20190619-8f4e20bb.pth) | 29 | 30 | **Notes:** 31 | - All models are trained with 8 GPUs instead of 32 GPUs in the original paper. 32 | - The warming up lasts for 1 epoch and `2x` here indicates 25 epochs. 33 | -------------------------------------------------------------------------------- /configs/cityscapes/README.md: -------------------------------------------------------------------------------- 1 | ## Common settings 2 | 3 | - All baselines were trained using 8 GPU with a batch size of 8 (1 images per GPU) using the [linear scaling rule](https://arxiv.org/abs/1706.02677) to scale the learning rate. 4 | - All models were trained on `cityscapes_train`, and tested on `cityscapes_val`. 5 | - 1x training schedule indicates 64 epochs which corresponds to slightly less than the 24k iterations reported in the original schedule from the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870) 6 | - All pytorch-style pretrained backbones on ImageNet are from PyTorch model zoo. 7 | 8 | 9 | ## Baselines 10 | 11 | Download links and more models with different backbones and training schemes will be added to the model zoo. 12 | 13 | 14 | ### Faster R-CNN 15 | 16 | | Backbone | Style | Lr schd | Scale | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 17 | | :-------------: | :-----: | :-----: | :---: | :------: | :-----------------: | :------------: | :----: | :------: | 18 | | R-50-FPN | pytorch | 1x | 800-1024 | 4.9 | 0.345 | 8.8 | 36.0 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/cityscapes/faster_rcnn_r50_fpn_1x_city_20190727-7b9c0534.pth) | 19 | 20 | ### Mask R-CNN 21 | 22 | | Backbone | Style | Lr schd | Scale | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | 23 | | :-------------: | :-----: | :-----: | :------: | :------: | :-----------------: | :------------: | :----: | :-----: | :------: | 24 | | R-50-FPN | pytorch | 1x | 800-1024 | 4.9 | 0.609 | 2.5 | 37.4 | 32.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/cityscapes/mask_rcnn_r50_fpn_1x_city_20190727-9b3c56a5.pth) | 25 | 26 | **Notes:** 27 | - In the original paper, the mask AP of Mask R-CNN R-50-FPN is 31.5. 28 | 29 | -------------------------------------------------------------------------------- /mmdet/models/utils/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | norm_cfg = { 4 | # format: layer_type: (abbreviation, module) 5 | 'BN': ('bn', nn.BatchNorm2d), 6 | 'SyncBN': ('bn', nn.SyncBatchNorm), 7 | 'GN': ('gn', nn.GroupNorm), 8 | # and potentially 'SN' 9 | } 10 | 11 | 12 | def build_norm_layer(cfg, num_features, postfix=''): 13 | """ Build normalization layer 14 | 15 | Args: 16 | cfg (dict): cfg should contain: 17 | type (str): identify norm layer type. 18 | layer args: args needed to instantiate a norm layer. 19 | requires_grad (bool): [optional] whether stop gradient updates 20 | num_features (int): number of channels from input. 21 | postfix (int, str): appended into norm abbreviation to 22 | create named layer. 23 | 24 | Returns: 25 | name (str): abbreviation + postfix 26 | layer (nn.Module): created norm layer 27 | """ 28 | assert isinstance(cfg, dict) and 'type' in cfg 29 | cfg_ = cfg.copy() 30 | 31 | layer_type = cfg_.pop('type') 32 | if layer_type not in norm_cfg: 33 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 34 | else: 35 | abbr, norm_layer = norm_cfg[layer_type] 36 | if norm_layer is None: 37 | raise NotImplementedError 38 | 39 | assert isinstance(postfix, (int, str)) 40 | name = abbr + str(postfix) 41 | 42 | requires_grad = cfg_.pop('requires_grad', True) 43 | cfg_.setdefault('eps', 1e-5) 44 | if layer_type != 'GN': 45 | layer = norm_layer(num_features, **cfg_) 46 | if layer_type == 'SyncBN': 47 | layer._specify_ddp_gpu_num(1) 48 | else: 49 | assert 'num_groups' in cfg_ 50 | layer = norm_layer(num_channels=num_features, **cfg_) 51 | 52 | for param in layer.parameters(): 53 | param.requires_grad = requires_grad 54 | 55 | return name, layer 56 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class InstanceBalancedPosSampler(RandomSampler): 8 | 9 | def _sample_pos(self, assign_result, num_expected, **kwargs): 10 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 11 | if pos_inds.numel() != 0: 12 | pos_inds = pos_inds.squeeze(1) 13 | if pos_inds.numel() <= num_expected: 14 | return pos_inds 15 | else: 16 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 17 | num_gts = len(unique_gt_inds) 18 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 19 | sampled_inds = [] 20 | for i in unique_gt_inds: 21 | inds = torch.nonzero(assign_result.gt_inds == i.item()) 22 | if inds.numel() != 0: 23 | inds = inds.squeeze(1) 24 | else: 25 | continue 26 | if len(inds) > num_per_gt: 27 | inds = self.random_choice(inds, num_per_gt) 28 | sampled_inds.append(inds) 29 | sampled_inds = torch.cat(sampled_inds) 30 | if len(sampled_inds) < num_expected: 31 | num_extra = num_expected - len(sampled_inds) 32 | extra_inds = np.array( 33 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 34 | if len(extra_inds) > num_extra: 35 | extra_inds = self.random_choice(extra_inds, num_extra) 36 | extra_inds = torch.from_numpy(extra_inds).to( 37 | assign_result.gt_inds.device).long() 38 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 39 | elif len(sampled_inds) > num_expected: 40 | sampled_inds = self.random_choice(sampled_inds, num_expected) 41 | return sampled_inds 42 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FastRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | bbox_roi_extractor, 11 | bbox_head, 12 | train_cfg, 13 | test_cfg, 14 | neck=None, 15 | shared_head=None, 16 | mask_roi_extractor=None, 17 | mask_head=None, 18 | pretrained=None): 19 | super(FastRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | shared_head=shared_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | mask_roi_extractor=mask_roi_extractor, 28 | mask_head=mask_head, 29 | pretrained=pretrained) 30 | 31 | def forward_test(self, imgs, img_metas, proposals, **kwargs): 32 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 33 | if not isinstance(var, list): 34 | raise TypeError('{} must be a list, but got {}'.format( 35 | name, type(var))) 36 | 37 | num_augs = len(imgs) 38 | if num_augs != len(img_metas): 39 | raise ValueError( 40 | 'num of augmentations ({}) != num of image meta ({})'.format( 41 | len(imgs), len(img_metas))) 42 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared 43 | imgs_per_gpu = imgs[0].size(0) 44 | assert imgs_per_gpu == 1 45 | 46 | if num_augs == 1: 47 | return self.simple_test(imgs[0], img_metas[0], proposals[0], 48 | **kwargs) 49 | else: 50 | return self.aug_test(imgs, img_metas, proposals, **kwargs) 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # cython generated cpp 107 | mmdet/ops/nms/src/soft_nms_cpu.cpp 108 | mmdet/version.py 109 | data 110 | .vscode 111 | .idea 112 | 113 | # custom 114 | *.pkl 115 | *.pkl.json 116 | *.log.json 117 | work_dirs/ 118 | 119 | # Pytorch 120 | *.pth 121 | -------------------------------------------------------------------------------- /tools/voc_eval.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | 3 | import mmcv 4 | import numpy as np 5 | 6 | from mmdet import datasets 7 | from mmdet.core import eval_map 8 | 9 | 10 | def voc_eval(result_file, dataset, iou_thr=0.5): 11 | det_results = mmcv.load(result_file) 12 | gt_bboxes = [] 13 | gt_labels = [] 14 | gt_ignore = [] 15 | for i in range(len(dataset)): 16 | ann = dataset.get_ann_info(i) 17 | bboxes = ann['bboxes'] 18 | labels = ann['labels'] 19 | if 'bboxes_ignore' in ann: 20 | ignore = np.concatenate([ 21 | np.zeros(bboxes.shape[0], dtype=np.bool), 22 | np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool) 23 | ]) 24 | gt_ignore.append(ignore) 25 | bboxes = np.vstack([bboxes, ann['bboxes_ignore']]) 26 | labels = np.concatenate([labels, ann['labels_ignore']]) 27 | gt_bboxes.append(bboxes) 28 | gt_labels.append(labels) 29 | if not gt_ignore: 30 | gt_ignore = gt_ignore 31 | if hasattr(dataset, 'year') and dataset.year == 2007: 32 | dataset_name = 'voc07' 33 | else: 34 | dataset_name = dataset.CLASSES 35 | eval_map( 36 | det_results, 37 | gt_bboxes, 38 | gt_labels, 39 | gt_ignore=gt_ignore, 40 | scale_ranges=None, 41 | iou_thr=iou_thr, 42 | dataset=dataset_name, 43 | print_summary=True) 44 | 45 | 46 | def main(): 47 | parser = ArgumentParser(description='VOC Evaluation') 48 | parser.add_argument('result', help='result file path') 49 | parser.add_argument('config', help='config file path') 50 | parser.add_argument( 51 | '--iou-thr', 52 | type=float, 53 | default=0.5, 54 | help='IoU threshold for evaluation') 55 | args = parser.parse_args() 56 | cfg = mmcv.Config.fromfile(args.config) 57 | test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets) 58 | voc_eval(args.result, test_dataset, args.iou_thr) 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /configs/libra_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Libra R-CNN: Towards Balanced Learning for Object Detection 2 | 3 | ## Introduction 4 | 5 | We provide config files to reproduce the results in the CVPR 2019 paper [Libra R-CNN](https://arxiv.org/pdf/1904.02701.pdf). 6 | 7 | ``` 8 | @inproceedings{pang2019libra, 9 | title={Libra R-CNN: Towards Balanced Learning for Object Detection}, 10 | author={Pang, Jiangmiao and Chen, Kai and Shi, Jianping and Feng, Huajun and Ouyang, Wanli and Dahua Lin}, 11 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, 12 | year={2019} 13 | } 14 | ``` 15 | 16 | ## Results and models 17 | 18 | The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) 19 | 20 | | Architecture | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 21 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 22 | | Faster R-CNN | R-50-FPN | pytorch | 1x | 4.2 | 0.375 | 12.0 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_20190610-bf0ea559.pth) | 23 | | Fast R-CNN | R-50-FPN | pytorch | 1x | 3.7 | 0.272 | 16.3 | 38.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_20190525-a43f88b5.pth) | 24 | | Faster R-CNN | R-101-FPN | pytorch | 1x | 6.0 | 0.495 | 10.4 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_20190525-94e94051.pth) | 25 | | Faster R-CNN | X-101-64x4d-FPN | pytorch | 1x | 10.1 | 1.050 | 6.8 | 42.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_20190525-359c134a.pth) | 26 | | RetinaNet | R-50-FPN | pytorch | 1x | 3.7 | 0.328 | 11.8 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_retinanet_r50_fpn_1x_20190525-ead2a6bb.pth) | 27 | -------------------------------------------------------------------------------- /tools/configs/libra_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Libra R-CNN: Towards Balanced Learning for Object Detection 2 | 3 | ## Introduction 4 | 5 | We provide config files to reproduce the results in the CVPR 2019 paper [Libra R-CNN](https://arxiv.org/pdf/1904.02701.pdf). 6 | 7 | ``` 8 | @inproceedings{pang2019libra, 9 | title={Libra R-CNN: Towards Balanced Learning for Object Detection}, 10 | author={Pang, Jiangmiao and Chen, Kai and Shi, Jianping and Feng, Huajun and Ouyang, Wanli and Dahua Lin}, 11 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, 12 | year={2019} 13 | } 14 | ``` 15 | 16 | ## Results and models 17 | 18 | The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val) 19 | 20 | | Architecture | Backbone | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 21 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 22 | | Faster R-CNN | R-50-FPN | pytorch | 1x | 4.2 | 0.375 | 12.0 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_20190610-bf0ea559.pth) | 23 | | Fast R-CNN | R-50-FPN | pytorch | 1x | 3.7 | 0.272 | 16.3 | 38.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_20190525-a43f88b5.pth) | 24 | | Faster R-CNN | R-101-FPN | pytorch | 1x | 6.0 | 0.495 | 10.4 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_20190525-94e94051.pth) | 25 | | Faster R-CNN | X-101-64x4d-FPN | pytorch | 1x | 10.1 | 1.050 | 6.8 | 42.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_20190525-359c134a.pth) | 26 | | RetinaNet | R-50-FPN | pytorch | 1x | 3.7 | 0.328 | 11.8 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_retinanet_r50_fpn_1x_20190525-ead2a6bb.pth) | 27 | -------------------------------------------------------------------------------- /mmdet/models/losses/balanced_l1_loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from ..registry import LOSSES 6 | from .utils import weighted_loss 7 | 8 | 9 | @weighted_loss 10 | def balanced_l1_loss(pred, 11 | target, 12 | beta=1.0, 13 | alpha=0.5, 14 | gamma=1.5, 15 | reduction='mean'): 16 | assert beta > 0 17 | assert pred.size() == target.size() and target.numel() > 0 18 | 19 | diff = torch.abs(pred - target) 20 | b = np.e**(gamma / alpha) - 1 21 | loss = torch.where( 22 | diff < beta, alpha / b * 23 | (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff, 24 | gamma * diff + gamma / b - alpha * beta) 25 | 26 | return loss 27 | 28 | 29 | @LOSSES.register_module 30 | class BalancedL1Loss(nn.Module): 31 | """Balanced L1 Loss 32 | 33 | arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019) 34 | """ 35 | 36 | def __init__(self, 37 | alpha=0.5, 38 | gamma=1.5, 39 | beta=1.0, 40 | reduction='mean', 41 | loss_weight=1.0): 42 | super(BalancedL1Loss, self).__init__() 43 | self.alpha = alpha 44 | self.gamma = gamma 45 | self.beta = beta 46 | self.reduction = reduction 47 | self.loss_weight = loss_weight 48 | 49 | def forward(self, 50 | pred, 51 | target, 52 | weight=None, 53 | avg_factor=None, 54 | reduction_override=None, 55 | **kwargs): 56 | assert reduction_override in (None, 'none', 'mean', 'sum') 57 | reduction = ( 58 | reduction_override if reduction_override else self.reduction) 59 | loss_bbox = self.loss_weight * balanced_l1_loss( 60 | pred, 61 | target, 62 | weight, 63 | alpha=self.alpha, 64 | gamma=self.gamma, 65 | beta=self.beta, 66 | reduction=reduction, 67 | avg_factor=avg_factor, 68 | **kwargs) 69 | return loss_bbox 70 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .two_stage import TwoStageDetector as TwoStageDetector_normal 3 | from .two_stage_calibration import TwoStageDetector as TwoStageDetector_calibration 4 | 5 | 6 | @DETECTORS.register_module 7 | class MaskRCNN_normal(TwoStageDetector_normal): 8 | 9 | def __init__(self, 10 | backbone, 11 | rpn_head, 12 | bbox_roi_extractor, 13 | bbox_head, 14 | mask_roi_extractor, 15 | mask_head, 16 | train_cfg, 17 | test_cfg, 18 | neck=None, 19 | shared_head=None, 20 | pretrained=None): 21 | super(MaskRCNN_normal, self).__init__( 22 | backbone=backbone, 23 | neck=neck, 24 | shared_head=shared_head, 25 | rpn_head=rpn_head, 26 | bbox_roi_extractor=bbox_roi_extractor, 27 | bbox_head=bbox_head, 28 | mask_roi_extractor=mask_roi_extractor, 29 | mask_head=mask_head, 30 | train_cfg=train_cfg, 31 | test_cfg=test_cfg, 32 | pretrained=pretrained) 33 | 34 | @DETECTORS.register_module 35 | class MaskRCNN_calibration(TwoStageDetector_calibration): 36 | 37 | def __init__(self, 38 | backbone, 39 | rpn_head, 40 | bbox_roi_extractor, 41 | bbox_head, 42 | mask_roi_extractor, 43 | mask_head, 44 | train_cfg, 45 | test_cfg, 46 | neck=None, 47 | shared_head=None, 48 | pretrained=None): 49 | super(MaskRCNN_calibration, self).__init__( 50 | backbone=backbone, 51 | neck=neck, 52 | shared_head=shared_head, 53 | rpn_head=rpn_head, 54 | bbox_roi_extractor=bbox_roi_extractor, 55 | bbox_head=bbox_head, 56 | mask_roi_extractor=mask_roi_extractor, 57 | mask_head=mask_head, 58 | train_cfg=train_cfg, 59 | test_cfg=test_cfg, 60 | pretrained=pretrained) -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from mmcv.runner import OptimizerHook 5 | from torch._utils import (_flatten_dense_tensors, _take_tensors, 6 | _unflatten_dense_tensors) 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | if bucket_size_mb > 0: 11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 12 | buckets = _take_tensors(tensors, bucket_size_bytes) 13 | else: 14 | buckets = OrderedDict() 15 | for tensor in tensors: 16 | tp = tensor.type() 17 | if tp not in buckets: 18 | buckets[tp] = [] 19 | buckets[tp].append(tensor) 20 | buckets = buckets.values() 21 | 22 | for bucket in buckets: 23 | flat_tensors = _flatten_dense_tensors(bucket) 24 | dist.all_reduce(flat_tensors) 25 | flat_tensors.div_(world_size) 26 | for tensor, synced in zip( 27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data for param in params 34 | if param.requires_grad and param.grad is not None 35 | ] 36 | world_size = dist.get_world_size() 37 | if coalesce: 38 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 39 | else: 40 | for tensor in grads: 41 | dist.all_reduce(tensor.div_(world_size)) 42 | 43 | 44 | class DistOptimizerHook(OptimizerHook): 45 | 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs['loss'].backward() 54 | allreduce_grads(runner.model.parameters(), self.coalesce, 55 | self.bucket_size_mb) 56 | if self.grad_clip is not None: 57 | self.clip_grads(runner.model.parameters()) 58 | runner.optimizer.step() 59 | -------------------------------------------------------------------------------- /configs/gn/README.md: -------------------------------------------------------------------------------- 1 | # Group Normalization 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{wu2018group, 7 | title={Group Normalization}, 8 | author={Wu, Yuxin and He, Kaiming}, 9 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, 10 | year={2018} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | model | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | 17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 18 | | R-50-FPN (d) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.8 | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) | 19 | | R-50-FPN (d) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.1 | 36.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) | 20 | | R-101-FPN (d) | Mask R-CNN | 2x | 9.9 | 0.970 | 4.8 | 41.5 | 37.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) | 21 | | R-101-FPN (d) | Mask R-CNN | 3x | 9.9 | 0.970 | 4.8 | 41.6 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) | 22 | | R-50-FPN (c) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.7 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) | 23 | | R-50-FPN (c) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.0 | 36.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) | 24 | 25 | **Notes:** 26 | - (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk). 27 | - The `3x` schedule is epoch [28, 34, 36]. 28 | - **Memory, Train/Inf time is outdated.** -------------------------------------------------------------------------------- /tools/configs/gn/README.md: -------------------------------------------------------------------------------- 1 | # Group Normalization 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{wu2018group, 7 | title={Group Normalization}, 8 | author={Wu, Yuxin and He, Kaiming}, 9 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, 10 | year={2018} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | model | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | 17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 18 | | R-50-FPN (d) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.8 | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) | 19 | | R-50-FPN (d) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.1 | 36.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) | 20 | | R-101-FPN (d) | Mask R-CNN | 2x | 9.9 | 0.970 | 4.8 | 41.5 | 37.0 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) | 21 | | R-101-FPN (d) | Mask R-CNN | 3x | 9.9 | 0.970 | 4.8 | 41.6 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) | 22 | | R-50-FPN (c) | Mask R-CNN | 2x | 7.2 | 0.806 | 5.4 | 39.7 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) | 23 | | R-50-FPN (c) | Mask R-CNN | 3x | 7.2 | 0.806 | 5.4 | 40.0 | 36.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) | 24 | 25 | **Notes:** 26 | - (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk). 27 | - The `3x` schedule is epoch [28, 34, 36]. 28 | - **Memory, Train/Inf time is outdated.** -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h 3 | #include 4 | 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, 6 | const at::Tensor &targets, 7 | const int num_classes, 8 | const float gamma, const float alpha); 9 | 10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, 11 | const at::Tensor &targets, 12 | const at::Tensor &d_losses, 13 | const int num_classes, 14 | const float gamma, const float alpha); 15 | 16 | // Interface for Python 17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits, 18 | const at::Tensor &targets, 19 | const int num_classes, const float gamma, 20 | const float alpha) { 21 | if (logits.type().is_cuda()) { 22 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, 23 | alpha); 24 | } 25 | AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); 26 | } 27 | 28 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits, 29 | const at::Tensor &targets, 30 | const at::Tensor &d_losses, 31 | const int num_classes, const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, 35 | num_classes, gamma, alpha); 36 | } 37 | AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); 38 | } 39 | 40 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 41 | m.def("forward", &SigmoidFocalLoss_forward, 42 | "SigmoidFocalLoss forward (CUDA)"); 43 | m.def("backward", &SigmoidFocalLoss_backward, 44 | "SigmoidFocalLoss backward (CUDA)"); 45 | } 46 | -------------------------------------------------------------------------------- /mmdet/apis/env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import random 4 | import subprocess 5 | 6 | import numpy as np 7 | import torch 8 | import torch.distributed as dist 9 | import torch.multiprocessing as mp 10 | from mmcv.runner import get_dist_info 11 | 12 | 13 | def init_dist(launcher, backend='nccl', **kwargs): 14 | if mp.get_start_method(allow_none=True) is None: 15 | mp.set_start_method('spawn') 16 | if launcher == 'pytorch': 17 | _init_dist_pytorch(backend, **kwargs) 18 | elif launcher == 'mpi': 19 | _init_dist_mpi(backend, **kwargs) 20 | elif launcher == 'slurm': 21 | _init_dist_slurm(backend, **kwargs) 22 | else: 23 | raise ValueError('Invalid launcher type: {}'.format(launcher)) 24 | 25 | 26 | def _init_dist_pytorch(backend, **kwargs): 27 | # TODO: use local_rank instead of rank % num_gpus 28 | rank = int(os.environ['RANK']) 29 | num_gpus = torch.cuda.device_count() 30 | torch.cuda.set_device(rank % num_gpus) 31 | dist.init_process_group(backend=backend, **kwargs) 32 | 33 | 34 | def _init_dist_mpi(backend, **kwargs): 35 | raise NotImplementedError 36 | 37 | 38 | def _init_dist_slurm(backend, port=29500, **kwargs): 39 | proc_id = int(os.environ['SLURM_PROCID']) 40 | ntasks = int(os.environ['SLURM_NTASKS']) 41 | node_list = os.environ['SLURM_NODELIST'] 42 | num_gpus = torch.cuda.device_count() 43 | torch.cuda.set_device(proc_id % num_gpus) 44 | addr = subprocess.getoutput( 45 | 'scontrol show hostname {} | head -n1'.format(node_list)) 46 | os.environ['MASTER_PORT'] = str(port) 47 | os.environ['MASTER_ADDR'] = addr 48 | os.environ['WORLD_SIZE'] = str(ntasks) 49 | os.environ['RANK'] = str(proc_id) 50 | dist.init_process_group(backend=backend) 51 | 52 | 53 | def set_random_seed(seed): 54 | random.seed(seed) 55 | np.random.seed(seed) 56 | torch.manual_seed(seed) 57 | torch.cuda.manual_seed_all(seed) 58 | 59 | 60 | def get_root_logger(log_level=logging.INFO): 61 | logger = logging.getLogger() 62 | if not logger.hasHandlers(): 63 | logging.basicConfig( 64 | format='%(asctime)s - %(levelname)s - %(message)s', 65 | level=log_level) 66 | rank, _ = get_dist_info() 67 | if rank != 0: 68 | logger.setLevel('ERROR') 69 | return logger 70 | -------------------------------------------------------------------------------- /tools/draw_comparison_head_design_choices.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | 6 | labels = ['AP on bin (0,10)', 'AP on bin (10,100)'] 7 | baseline = [0.0, 13.3] 8 | fc2_ncm = [6.0, 18.9] 9 | fc2 = [8.6, 22.0] 10 | fc3_rand = [9.1, 18.8] 11 | fc3_ft = [13.2, 23.1] 12 | 13 | x = np.arange(len(labels)) # the label locations 14 | width = 0.15 # the width of the bars 15 | 16 | matplotlib.rcParams.update({'font.size': 16}) 17 | # plt.rc('ytick', labelsize=10) 18 | 19 | fig, ax = plt.subplots() 20 | # rects1 = ax.bar(x - width, baseline, width, label='baseline') 21 | # rects2 = ax.bar(x - width/2, fc2_ncm, width, label='2fc_ncm') 22 | # rects3 = ax.bar(x , baseline, fc2, label='baseline') 23 | # rects4 = ax.bar(x + width/2, fc3_rand, width, label='2fc_ncm') 24 | # rects5 = ax.bar(x + width, fc3_ft, width, label='baseline') 25 | 26 | # Set position of bar on X axis 27 | r1 = np.arange(len(labels)) 28 | r2 = [x + width for x in r1] 29 | r3 = [x + width for x in r2] 30 | r4 = [x + width for x in r3] 31 | r5 = [x + width for x in r4] 32 | 33 | # Make the plot 34 | rects1 = ax.bar(r1, baseline, color='#7f6d5f', width=width, edgecolor='white', label='baseline') 35 | rects2 = ax.bar(r2, fc2_ncm, color='#557f2d', width=width, edgecolor='white', label='2fc_ncm') 36 | rects3 = ax.bar(r3, fc2, width=width, edgecolor='white', label='2fc_rand') 37 | rects4 = ax.bar(r4, fc3_rand, width=width, edgecolor='white', label='3fc_rand') 38 | rects5 = ax.bar(r5, fc3_ft, width=width, edgecolor='white', label='3fc_ft') 39 | 40 | ax.set_ylim([0,25]) 41 | ax.set_xticks([0.3, 1.3]) 42 | ax.set_xticklabels(labels) 43 | ax.legend() 44 | 45 | 46 | def autolabel(rects): 47 | """Attach a text label above each bar in *rects*, displaying its height.""" 48 | for rect in rects: 49 | height = rect.get_height() 50 | ax.annotate('{}'.format(height), 51 | xy=(rect.get_x() + rect.get_width() / 2, height), 52 | xytext=(0, 3), # 3 points vertical offset 53 | textcoords="offset points", 54 | ha='center', va='bottom') 55 | 56 | 57 | autolabel(rects1) 58 | autolabel(rects2) 59 | autolabel(rects3) 60 | autolabel(rects4) 61 | autolabel(rects5) 62 | 63 | fig.tight_layout() 64 | plt.savefig('head_design_choices.eps', format='eps', dpi=1000) 65 | plt.show() 66 | -------------------------------------------------------------------------------- /configs/ms_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Mask Scoring R-CNN 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{huang2019msrcnn, 7 | title={Mask Scoring R-CNN}, 8 | author={Zhaojin Huang and Lichao Huang and Yongchao Gong and Chang Huang and Xinggang Wang}, 9 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, 10 | year={2019}, 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | 17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 18 | | R-50-FPN | caffe | 1x | 4.3 | 0.537 | 10.1 | 37.4 | 35.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_1x_20190624-619934b5.pth) | 19 | | R-50-FPN | caffe | 2x | - | - | - | 38.2 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_2x_20190525-a07be31e.pth) | 20 | | R-101-FPN | caffe | 1x | 6.2 | 0.682 | 9.1 | 39.8 | 37.2 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_1x_20190624-677a5548.pth) | 21 | | R-101-FPN | caffe | 2x | - | - | - | 40.7 | 37.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_2x_20190525-4aee1528.pth) | 22 | | R-X101-32x4d | pytorch | 2x | 7.6 | 0.844 | 8.0 | 41.7 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_32x4d_fpn_2x_20190628-ab454d07.pth) | 23 | | R-X101-64x4d | pytorch | 1x | 10.5 | 1.214 | 6.4 | 42.0 | 39.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_1x_20190628-dec32bda.pth) | 24 | | R-X101-64x4d | pytorch | 2x | - | - | - | 42.2 | 38.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_2x_20190525-c044c25a.pth) | 25 | -------------------------------------------------------------------------------- /tools/configs/ms_rcnn/README.md: -------------------------------------------------------------------------------- 1 | # Mask Scoring R-CNN 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{huang2019msrcnn, 7 | title={Mask Scoring R-CNN}, 8 | author={Zhaojin Huang and Lichao Huang and Yongchao Gong and Chang Huang and Xinggang Wang}, 9 | booktitle={IEEE Conference on Computer Vision and Pattern Recognition}, 10 | year={2019}, 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download | 17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:| 18 | | R-50-FPN | caffe | 1x | 4.3 | 0.537 | 10.1 | 37.4 | 35.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_1x_20190624-619934b5.pth) | 19 | | R-50-FPN | caffe | 2x | - | - | - | 38.2 | 35.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_2x_20190525-a07be31e.pth) | 20 | | R-101-FPN | caffe | 1x | 6.2 | 0.682 | 9.1 | 39.8 | 37.2 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_1x_20190624-677a5548.pth) | 21 | | R-101-FPN | caffe | 2x | - | - | - | 40.7 | 37.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_2x_20190525-4aee1528.pth) | 22 | | R-X101-32x4d | pytorch | 2x | 7.6 | 0.844 | 8.0 | 41.7 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_32x4d_fpn_2x_20190628-ab454d07.pth) | 23 | | R-X101-64x4d | pytorch | 1x | 10.5 | 1.214 | 6.4 | 42.0 | 39.1 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_1x_20190628-dec32bda.pth) | 24 | | R-X101-64x4d | pytorch | 2x | - | - | - | 42.2 | 38.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_2x_20190525-c044c25a.pth) | 25 | -------------------------------------------------------------------------------- /tools/draw_eAP_sensitivity_eap.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import math 6 | from matplotlib.ticker import FormatStrFormatter 7 | from matplotlib import scale as mscale 8 | from matplotlib import transforms as mtransforms 9 | 10 | 11 | epoch_results_ours = [0.2055814944259203, 0.2070789429222732, 0.21312734490544888, 0.21330987617112582, 0.21681729633603414, 0.21868618147095492, 0.22139531986572789, 0.2207302553866501] 12 | epoch_results_imgsample = [0.20085911273955764, 0.20221658896725567, 0.2056222469806897, 0.2051128644774435, 0.208011478430485, 0.2117306883053619, 0.21469247380489614, 0.21509586079595858] 13 | z = [8,9,10,11,12,13,14,15] 14 | 15 | # fig = plt.figure(figsize=(8,5)) 16 | fig = plt.figure() 17 | ax1 = fig.add_subplot(111) 18 | 19 | matplotlib.rcParams.update({'font.size': 16}) 20 | ax1.plot(z, epoch_results_ours, marker='o', linewidth=2, color='darkorange', label='r50-ours') 21 | ax1.plot(z, epoch_results_imgsample, marker='o', linewidth=2, color='blue', label='r50-IS') 22 | 23 | 24 | 25 | # ax1.plot([0],[15.4], 'D', color = 'green') 26 | 27 | plt.xlabel('bAP f value (m=3)', size=16) 28 | plt.ylabel('bAP', size=16) 29 | # ax1.set_xscale('log') 30 | 31 | plt.legend( loc='best') 32 | 33 | plt.grid() 34 | plt.savefig('eap_sensitivity_eap_f.eps', format='eps', dpi=1000) 35 | plt.show() 36 | 37 | ### eap m value 38 | # [0.19348653350208908, 0.20481586368658788, 0.20629655179889703] 39 | # [0.1960601492969575, 0.20901640943344768, 0.211437803122666] 40 | 41 | epoch_results_imgsample = [0.1905114721878586, 0.2056222469806897, 0.2249353584074827] 42 | epoch_results_ours = [0.19661301291002395, 0.21312734490544888, 0.2335877606226477] 43 | 44 | z = [2,3,4] 45 | 46 | fig = plt.figure() 47 | ax1 = fig.add_subplot(111) 48 | 49 | matplotlib.rcParams.update({'font.size': 16}) 50 | ax1.plot(z, epoch_results_ours, marker='o', linewidth=3, color='darkorange', label='r50-ours') 51 | ax1.plot(z, epoch_results_imgsample, marker='o', linewidth=3, color='blue', label='r50-IS') 52 | 53 | plt.xticks(np.arange(2, 5, step=1)) 54 | 55 | # ax1.plot([0],[15.4], 'D', color = 'green') 56 | 57 | plt.xlabel('bAP m value (f=10)', size=16) 58 | plt.ylabel('bAP', size=16) 59 | # ax1.set_xscale('log') 60 | 61 | plt.legend( loc='best') 62 | 63 | plt.grid() 64 | plt.savefig('eap_sensitivity_eap_m.eps', format='eps', dpi=1000) 65 | plt.show() -------------------------------------------------------------------------------- /mmdet/core/bbox/geometry.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): 5 | """Calculate overlap between two set of bboxes. 6 | 7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox 8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of 9 | bboxes1 and bboxes2. 10 | 11 | Args: 12 | bboxes1 (Tensor): shape (m, 4) 13 | bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n 14 | must be equal. 15 | mode (str): "iou" (intersection over union) or iof (intersection over 16 | foreground). 17 | 18 | Returns: 19 | ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1) 20 | """ 21 | 22 | assert mode in ['iou', 'iof'] 23 | 24 | rows = bboxes1.size(0) 25 | cols = bboxes2.size(0) 26 | if is_aligned: 27 | assert rows == cols 28 | 29 | if rows * cols == 0: 30 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) 31 | 32 | if is_aligned: 33 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 34 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 35 | 36 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2] 37 | overlap = wh[:, 0] * wh[:, 1] 38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 39 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 40 | 41 | if mode == 'iou': 42 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 43 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 44 | ious = overlap / (area1 + area2 - overlap) 45 | else: 46 | ious = overlap / area1 47 | else: 48 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] 49 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] 50 | 51 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] 52 | overlap = wh[:, :, 0] * wh[:, :, 1] 53 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 54 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 55 | 56 | if mode == 'iou': 57 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 58 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 59 | ious = overlap / (area1[:, None] + area2 - overlap) 60 | else: 61 | ious = overlap / (area1[:, None]) 62 | 63 | return ious 64 | -------------------------------------------------------------------------------- /mmdet/datasets/utils.py: -------------------------------------------------------------------------------- 1 | from collections import Sequence 2 | 3 | import matplotlib.pyplot as plt 4 | import mmcv 5 | import numpy as np 6 | import torch 7 | 8 | 9 | def to_tensor(data): 10 | """Convert objects of various python types to :obj:`torch.Tensor`. 11 | 12 | Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, 13 | :class:`Sequence`, :class:`int` and :class:`float`. 14 | """ 15 | if isinstance(data, torch.Tensor): 16 | return data 17 | elif isinstance(data, np.ndarray): 18 | return torch.from_numpy(data) 19 | elif isinstance(data, Sequence) and not mmcv.is_str(data): 20 | return torch.tensor(data) 21 | elif isinstance(data, int): 22 | return torch.LongTensor([data]) 23 | elif isinstance(data, float): 24 | return torch.FloatTensor([data]) 25 | else: 26 | raise TypeError('type {} cannot be converted to tensor.'.format( 27 | type(data))) 28 | 29 | 30 | def random_scale(img_scales, mode='range'): 31 | """Randomly select a scale from a list of scales or scale ranges. 32 | 33 | Args: 34 | img_scales (list[tuple]): Image scale or scale range. 35 | mode (str): "range" or "value". 36 | 37 | Returns: 38 | tuple: Sampled image scale. 39 | """ 40 | num_scales = len(img_scales) 41 | if num_scales == 1: # fixed scale is specified 42 | img_scale = img_scales[0] 43 | elif num_scales == 2: # randomly sample a scale 44 | if mode == 'range': 45 | img_scale_long = [max(s) for s in img_scales] 46 | img_scale_short = [min(s) for s in img_scales] 47 | long_edge = np.random.randint( 48 | min(img_scale_long), 49 | max(img_scale_long) + 1) 50 | short_edge = np.random.randint( 51 | min(img_scale_short), 52 | max(img_scale_short) + 1) 53 | img_scale = (long_edge, short_edge) 54 | elif mode == 'value': 55 | img_scale = img_scales[np.random.randint(num_scales)] 56 | else: 57 | if mode != 'value': 58 | raise ValueError( 59 | 'Only "value" mode supports more than 2 image scales') 60 | img_scale = img_scales[np.random.randint(num_scales)] 61 | return img_scale 62 | 63 | 64 | def show_ann(coco, img, ann_info): 65 | plt.imshow(mmcv.bgr2rgb(img)) 66 | plt.axis('off') 67 | coco.showAnns(ann_info) 68 | plt.show() 69 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | import platform 2 | from functools import partial 3 | 4 | from mmcv.runner import get_dist_info 5 | from mmcv.parallel import collate 6 | from torch.utils.data import DataLoader 7 | 8 | from .sampler import GroupSampler, DistributedGroupSampler, DistributedSampler, EpisodicSampler 9 | 10 | if platform.system() != 'Windows': 11 | # https://github.com/pytorch/pytorch/issues/973 12 | import resource 13 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 14 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 15 | 16 | 17 | def build_dataloader(dataset, 18 | imgs_per_gpu, 19 | workers_per_gpu, 20 | num_gpus=1, 21 | dist=True, 22 | cls_balanced_sampler=False, 23 | shuffle=True, 24 | **kwargs): 25 | if dist: 26 | rank, world_size = get_dist_info() 27 | if shuffle: 28 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, 29 | world_size, rank) 30 | else: 31 | sampler = DistributedSampler( 32 | dataset, world_size, rank, shuffle=False) 33 | batch_size = imgs_per_gpu 34 | num_workers = workers_per_gpu 35 | else: 36 | # sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None 37 | # batch_size = num_gpus * imgs_per_gpu 38 | # num_workers = num_gpus * workers_per_gpu 39 | 40 | # sampler = GroupSampler_addrepeat(dataset, imgs_per_gpu) if shuffle else None 41 | # batch_size = num_gpus * imgs_per_gpu 42 | # num_workers = num_gpus * workers_per_gpu 43 | 44 | if cls_balanced_sampler==True: 45 | batch_size = num_gpus * imgs_per_gpu 46 | sampler = EpisodicSampler(dataset, batch_size, nc=16, episode=1000) if shuffle else None 47 | num_workers = num_gpus * workers_per_gpu 48 | else: 49 | sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None 50 | batch_size = num_gpus * imgs_per_gpu 51 | num_workers = num_gpus * workers_per_gpu 52 | 53 | data_loader = DataLoader( 54 | dataset, 55 | batch_size=batch_size, 56 | sampler=sampler, 57 | num_workers=num_workers, 58 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), 59 | pin_memory=False, 60 | **kwargs) 61 | 62 | return data_loader 63 | 64 | 65 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class RandomSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | neg_pos_ub=-1, 13 | add_gt_as_proposals=True, 14 | **kwargs): 15 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 16 | add_gt_as_proposals) 17 | 18 | @staticmethod 19 | def random_choice(gallery, num): 20 | """Random select some elements from the gallery. 21 | 22 | It seems that Pytorch's implementation is slower than numpy so we use 23 | numpy to randperm the indices. 24 | """ 25 | assert len(gallery) >= num 26 | if isinstance(gallery, list): 27 | gallery = np.array(gallery) 28 | cands = np.arange(len(gallery)) 29 | np.random.shuffle(cands) 30 | rand_inds = cands[:num] 31 | if not isinstance(gallery, np.ndarray): 32 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 33 | return gallery[rand_inds] 34 | 35 | def _sample_pos(self, assign_result, num_expected, **kwargs): 36 | """Randomly sample some positive samples.""" 37 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 38 | if pos_inds.numel() != 0: 39 | pos_inds = pos_inds.squeeze(1) 40 | if pos_inds.numel() <= num_expected: 41 | return pos_inds 42 | else: 43 | return self.random_choice(pos_inds, num_expected) 44 | 45 | def _sample_neg(self, assign_result, num_expected, **kwargs): 46 | """Randomly sample some negative samples.""" 47 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 48 | if neg_inds.numel() != 0: 49 | neg_inds = neg_inds.squeeze(1) 50 | if len(neg_inds) <= num_expected: 51 | return neg_inds 52 | else: 53 | return self.random_choice(neg_inds, num_expected) 54 | 55 | def _sample_neg_lowthr(self, assign_result, num_expected, lowthr=0.1, **kwargs): 56 | neg_inds = torch.nonzero((assign_result.max_overlaps>=0.1)*(assign_result.gt_inds == 0)) 57 | if neg_inds.numel() != 0: 58 | neg_inds = neg_inds.squeeze(1) 59 | if len(neg_inds) <= num_expected: 60 | return neg_inds 61 | else: 62 | return self.random_choice(neg_inds, num_expected) 63 | -------------------------------------------------------------------------------- /mmdet/models/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch.nn as nn 4 | from mmcv.cnn import constant_init, kaiming_init 5 | from mmcv.runner import load_checkpoint 6 | 7 | from mmdet.core import auto_fp16 8 | from ..backbones import ResNet, make_res_layer 9 | from ..registry import SHARED_HEADS 10 | 11 | 12 | @SHARED_HEADS.register_module 13 | class ResLayer(nn.Module): 14 | 15 | def __init__(self, 16 | depth, 17 | stage=3, 18 | stride=2, 19 | dilation=1, 20 | style='pytorch', 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | with_cp=False, 24 | dcn=None): 25 | super(ResLayer, self).__init__() 26 | self.norm_eval = norm_eval 27 | self.norm_cfg = norm_cfg 28 | self.stage = stage 29 | self.fp16_enabled = False 30 | block, stage_blocks = ResNet.arch_settings[depth] 31 | stage_block = stage_blocks[stage] 32 | planes = 64 * 2**stage 33 | inplanes = 64 * 2**(stage - 1) * block.expansion 34 | 35 | res_layer = make_res_layer( 36 | block, 37 | inplanes, 38 | planes, 39 | stage_block, 40 | stride=stride, 41 | dilation=dilation, 42 | style=style, 43 | with_cp=with_cp, 44 | norm_cfg=self.norm_cfg, 45 | dcn=dcn) 46 | self.add_module('layer{}'.format(stage + 1), res_layer) 47 | 48 | def init_weights(self, pretrained=None): 49 | if isinstance(pretrained, str): 50 | logger = logging.getLogger() 51 | load_checkpoint(self, pretrained, strict=False, logger=logger) 52 | elif pretrained is None: 53 | for m in self.modules(): 54 | if isinstance(m, nn.Conv2d): 55 | kaiming_init(m) 56 | elif isinstance(m, nn.BatchNorm2d): 57 | constant_init(m, 1) 58 | else: 59 | raise TypeError('pretrained must be a str or None') 60 | 61 | @auto_fp16() 62 | def forward(self, x): 63 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1)) 64 | out = res_layer(x) 65 | return out 66 | 67 | def train(self, mode=True): 68 | super(ResLayer, self).train(mode) 69 | if self.norm_eval: 70 | for m in self.modules(): 71 | if isinstance(m, nn.BatchNorm2d): 72 | m.eval() 73 | -------------------------------------------------------------------------------- /mmdet/utils/registry.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | import mmcv 4 | 5 | 6 | class Registry(object): 7 | 8 | def __init__(self, name): 9 | self._name = name 10 | self._module_dict = dict() 11 | 12 | def __repr__(self): 13 | format_str = self.__class__.__name__ + '(name={}, items={})'.format( 14 | self._name, list(self._module_dict.keys())) 15 | return format_str 16 | 17 | @property 18 | def name(self): 19 | return self._name 20 | 21 | @property 22 | def module_dict(self): 23 | return self._module_dict 24 | 25 | def get(self, key): 26 | return self._module_dict.get(key, None) 27 | 28 | def _register_module(self, module_class): 29 | """Register a module. 30 | 31 | Args: 32 | module (:obj:`nn.Module`): Module to be registered. 33 | """ 34 | if not inspect.isclass(module_class): 35 | raise TypeError('module must be a class, but got {}'.format( 36 | type(module_class))) 37 | module_name = module_class.__name__ 38 | if module_name in self._module_dict: 39 | raise KeyError('{} is already registered in {}'.format( 40 | module_name, self.name)) 41 | self._module_dict[module_name] = module_class 42 | 43 | def register_module(self, cls): 44 | self._register_module(cls) 45 | return cls 46 | 47 | 48 | def build_from_cfg(cfg, registry, default_args=None): 49 | """Build a module from config dict. 50 | 51 | Args: 52 | cfg (dict): Config dict. It should at least contain the key "type". 53 | registry (:obj:`Registry`): The registry to search the type from. 54 | default_args (dict, optional): Default initialization arguments. 55 | 56 | Returns: 57 | obj: The constructed object. 58 | """ 59 | assert isinstance(cfg, dict) and 'type' in cfg 60 | assert isinstance(default_args, dict) or default_args is None 61 | args = cfg.copy() 62 | obj_type = args.pop('type') 63 | if mmcv.is_str(obj_type): 64 | obj_type = registry.get(obj_type) 65 | if obj_type is None: 66 | raise KeyError('{} is not in the {} registry'.format( 67 | obj_type, registry.name)) 68 | elif not inspect.isclass(obj_type): 69 | raise TypeError('type must be a str or valid type, but got {}'.format( 70 | type(obj_type))) 71 | if default_args is not None: 72 | for name, value in default_args.items(): 73 | args.setdefault(name, value) 74 | return obj_type(**args) 75 | -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | template 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { 6 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 7 | 8 | if (dets.numel() == 0) { 9 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 10 | } 11 | 12 | auto x1_t = dets.select(1, 0).contiguous(); 13 | auto y1_t = dets.select(1, 1).contiguous(); 14 | auto x2_t = dets.select(1, 2).contiguous(); 15 | auto y2_t = dets.select(1, 3).contiguous(); 16 | auto scores = dets.select(1, 4).contiguous(); 17 | 18 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 19 | 20 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 21 | 22 | auto ndets = dets.size(0); 23 | at::Tensor suppressed_t = 24 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 25 | 26 | auto suppressed = suppressed_t.data(); 27 | auto order = order_t.data(); 28 | auto x1 = x1_t.data(); 29 | auto y1 = y1_t.data(); 30 | auto x2 = x2_t.data(); 31 | auto y2 = y2_t.data(); 32 | auto areas = areas_t.data(); 33 | 34 | for (int64_t _i = 0; _i < ndets; _i++) { 35 | auto i = order[_i]; 36 | if (suppressed[i] == 1) continue; 37 | auto ix1 = x1[i]; 38 | auto iy1 = y1[i]; 39 | auto ix2 = x2[i]; 40 | auto iy2 = y2[i]; 41 | auto iarea = areas[i]; 42 | 43 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 44 | auto j = order[_j]; 45 | if (suppressed[j] == 1) continue; 46 | auto xx1 = std::max(ix1, x1[j]); 47 | auto yy1 = std::max(iy1, y1[j]); 48 | auto xx2 = std::min(ix2, x2[j]); 49 | auto yy2 = std::min(iy2, y2[j]); 50 | 51 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 52 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 53 | auto inter = w * h; 54 | auto ovr = inter / (iarea + areas[j] - inter); 55 | if (ovr >= threshold) suppressed[j] = 1; 56 | } 57 | } 58 | return at::nonzero(suppressed_t == 0).squeeze(1); 59 | } 60 | 61 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 62 | at::Tensor result; 63 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { 64 | result = nms_cpu_kernel(dets, threshold); 65 | }); 66 | return result; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("nms", &nms, "non-maximum suppression"); 71 | } -------------------------------------------------------------------------------- /mmdet/core/post_processing/bbox_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.ops.nms import nms_wrapper 4 | 5 | 6 | def multiclass_nms(multi_bboxes, 7 | multi_scores, 8 | score_thr, 9 | nms_cfg, 10 | max_num=-1, 11 | score_factors=None): 12 | """NMS for multi-class bboxes. 13 | 14 | Args: 15 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) 16 | multi_scores (Tensor): shape (n, #class) 17 | score_thr (float): bbox threshold, bboxes with scores lower than it 18 | will not be considered. 19 | nms_thr (float): NMS IoU threshold 20 | max_num (int): if there are more than max_num bboxes after NMS, 21 | only top max_num will be kept. 22 | score_factors (Tensor): The factors multiplied to scores before 23 | applying NMS 24 | 25 | Returns: 26 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels 27 | are 0-based. 28 | """ 29 | num_classes = multi_scores.shape[1] 30 | bboxes, labels = [], [] 31 | nms_cfg_ = nms_cfg.copy() 32 | nms_type = nms_cfg_.pop('type', 'nms') 33 | nms_op = getattr(nms_wrapper, nms_type) 34 | for i in range(1, num_classes): 35 | cls_inds = multi_scores[:, i] > score_thr 36 | if not cls_inds.any(): 37 | continue 38 | # get bboxes and scores of this class 39 | if multi_bboxes.shape[1] == 4: 40 | _bboxes = multi_bboxes[cls_inds, :] 41 | else: 42 | _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] 43 | _scores = multi_scores[cls_inds, i] 44 | if score_factors is not None: 45 | _scores *= score_factors[cls_inds] 46 | cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1) 47 | cls_dets, _ = nms_op(cls_dets, **nms_cfg_) 48 | cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ), 49 | i - 1, 50 | dtype=torch.long) 51 | bboxes.append(cls_dets) 52 | labels.append(cls_labels) 53 | if bboxes: 54 | bboxes = torch.cat(bboxes) 55 | labels = torch.cat(labels) 56 | if bboxes.shape[0] > max_num: 57 | _, inds = bboxes[:, -1].sort(descending=True) 58 | inds = inds[:max_num] 59 | bboxes = bboxes[inds] 60 | labels = labels[inds] 61 | else: 62 | bboxes = multi_bboxes.new_zeros((0, 5)) 63 | labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) 64 | 65 | return bboxes, labels 66 | -------------------------------------------------------------------------------- /mmdet/models/detectors/single_stage.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from mmdet.core import bbox2result 4 | from .. import builder 5 | from ..registry import DETECTORS 6 | from .base import BaseDetector 7 | 8 | 9 | @DETECTORS.register_module 10 | class SingleStageDetector(BaseDetector): 11 | 12 | def __init__(self, 13 | backbone, 14 | neck=None, 15 | bbox_head=None, 16 | train_cfg=None, 17 | test_cfg=None, 18 | pretrained=None): 19 | super(SingleStageDetector, self).__init__() 20 | self.backbone = builder.build_backbone(backbone) 21 | if neck is not None: 22 | self.neck = builder.build_neck(neck) 23 | self.bbox_head = builder.build_head(bbox_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(SingleStageDetector, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | if isinstance(self.neck, nn.Sequential): 33 | for m in self.neck: 34 | m.init_weights() 35 | else: 36 | self.neck.init_weights() 37 | self.bbox_head.init_weights() 38 | 39 | def extract_feat(self, img): 40 | x = self.backbone(img) 41 | if self.with_neck: 42 | x = self.neck(x) 43 | return x 44 | 45 | def forward_train(self, 46 | img, 47 | img_metas, 48 | gt_bboxes, 49 | gt_labels, 50 | gt_bboxes_ignore=None): 51 | x = self.extract_feat(img) 52 | outs = self.bbox_head(x) 53 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg) 54 | losses = self.bbox_head.loss( 55 | *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 56 | return losses 57 | 58 | def simple_test(self, img, img_meta, rescale=False): 59 | x = self.extract_feat(img) 60 | outs = self.bbox_head(x) 61 | bbox_inputs = outs + (img_meta, self.test_cfg, rescale) 62 | bbox_list = self.bbox_head.get_bboxes(*bbox_inputs) 63 | bbox_results = [ 64 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) 65 | for det_bboxes, det_labels in bbox_list 66 | ] 67 | return bbox_results[0] 68 | 69 | def aug_test(self, imgs, img_metas, rescale=False): 70 | raise NotImplementedError 71 | -------------------------------------------------------------------------------- /lvis_api/README.md: -------------------------------------------------------------------------------- 1 | # LVIS API 2 | 3 | 4 | LVIS (pronounced ‘el-vis’): is a new dataset for Large Vocabulary Instance Segmentation. 5 | When complete, it will feature more than 2 million high-quality instance segmentation masks for over 1200 entry-level object categories in 164k images. The LVIS API enables reading and interacting with annotation files, visualizing annotations, and evaluating results. 6 | 7 | 8 | 9 | ## LVIS v0.5 10 | 11 | LVIS v0.5 marks the halfway point in data collection. For this release, we have annotated an additional 82k images (57k train, 20k test, 5k val). Release v0.5 is publicly available at [LVIS website](http://www.lvisdataset.org) and will be used in the first LVIS Challenge to be held in conjunction with the COCO Workshop at ICCV 2019. 12 | 13 | ## Setup 14 | You can setup a virtual environment and then install `lvisapi` using pip: 15 | 16 | ```bash 17 | python3 -m venv env # Create a virtual environment 18 | source env/bin/activate # Activate virtual environment 19 | 20 | # install COCO API. COCO API requires numpy to install. Ensure that you installed numpy. 21 | pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' 22 | # install LVIS API 23 | pip install lvis 24 | # Work for a while ... 25 | deactivate # Exit virtual environment 26 | ``` 27 | 28 | You can also clone the repo first and then do the following steps inside the repo: 29 | ```bash 30 | python3 -m venv env # Create a virtual environment 31 | source env/bin/activate # Activate virtual environment 32 | 33 | # install COCO API. COCO API requires numpy to install. Ensure that you installed numpy. 34 | pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' 35 | # install LVIS API 36 | pip install . 37 | # test if the installation was correct 38 | python test.py 39 | # Work for a while ... 40 | deactivate # Exit virtual environment 41 | ``` 42 | ## Citing LVIS 43 | 44 | If you find this code/data useful in your research then please cite our [paper](http://www.lvisdataset.org/assets/lvis_v0.5.pdf): 45 | ``` 46 | @inproceedings{gupta2019lvis, 47 | title={{LVIS}: A Dataset for Large Vocabulary Instance Segmentation}, 48 | author={Gupta, Agrim and Dollar, Piotr and Girshick, Ross}, 49 | booktitle={Proceedings of the {IEEE} Conference on Computer Vision and Pattern Recognition}, 50 | year={2019} 51 | } 52 | ``` 53 | 54 | ## Credit 55 | 56 | The code is a re-write of PythonAPI for [COCO](https://github.com/cocodataset/cocoapi). 57 | The core functionality is the same with LVIS specific changes. 58 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from . import roi_pool_cuda 8 | 9 | 10 | class RoIPoolFunction(Function): 11 | 12 | @staticmethod 13 | def forward(ctx, features, rois, out_size, spatial_scale): 14 | assert features.is_cuda 15 | out_h, out_w = _pair(out_size) 16 | assert isinstance(out_h, int) and isinstance(out_w, int) 17 | ctx.save_for_backward(rois) 18 | num_channels = features.size(1) 19 | num_rois = rois.size(0) 20 | out_size = (num_rois, num_channels, out_h, out_w) 21 | output = features.new_zeros(out_size) 22 | argmax = features.new_zeros(out_size, dtype=torch.int) 23 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, 24 | output, argmax) 25 | ctx.spatial_scale = spatial_scale 26 | ctx.feature_size = features.size() 27 | ctx.argmax = argmax 28 | 29 | return output 30 | 31 | @staticmethod 32 | @once_differentiable 33 | def backward(ctx, grad_output): 34 | assert grad_output.is_cuda 35 | spatial_scale = ctx.spatial_scale 36 | feature_size = ctx.feature_size 37 | argmax = ctx.argmax 38 | rois = ctx.saved_tensors[0] 39 | assert feature_size is not None 40 | 41 | grad_input = grad_rois = None 42 | if ctx.needs_input_grad[0]: 43 | grad_input = grad_output.new_zeros(feature_size) 44 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax, 45 | spatial_scale, grad_input) 46 | 47 | return grad_input, grad_rois, None, None 48 | 49 | 50 | roi_pool = RoIPoolFunction.apply 51 | 52 | 53 | class RoIPool(nn.Module): 54 | 55 | def __init__(self, out_size, spatial_scale, use_torchvision=False): 56 | super(RoIPool, self).__init__() 57 | 58 | self.out_size = _pair(out_size) 59 | self.spatial_scale = float(spatial_scale) 60 | self.use_torchvision = use_torchvision 61 | 62 | def forward(self, features, rois): 63 | if self.use_torchvision: 64 | from torchvision.ops import roi_pool as tv_roi_pool 65 | return tv_roi_pool(features, rois, self.out_size, 66 | self.spatial_scale) 67 | else: 68 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 69 | 70 | def __repr__(self): 71 | format_str = self.__class__.__name__ 72 | format_str += '(out_size={}, spatial_scale={}'.format( 73 | self.out_size, self.spatial_scale) 74 | format_str += ', use_torchvision={})'.format(self.use_torchvision) 75 | return format_str 76 | -------------------------------------------------------------------------------- /mmdet/ops/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from . import nms_cpu, nms_cuda 5 | from .soft_nms_cpu import soft_nms_cpu 6 | 7 | 8 | def nms(dets, iou_thr, device_id=None): 9 | """Dispatch to either CPU or GPU NMS implementations. 10 | 11 | The input can be either a torch tensor or numpy array. GPU NMS will be used 12 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS 13 | will be used. The returned type will always be the same as inputs. 14 | 15 | Arguments: 16 | dets (torch.Tensor or np.ndarray): bboxes with scores. 17 | iou_thr (float): IoU threshold for NMS. 18 | device_id (int, optional): when `dets` is a numpy array, if `device_id` 19 | is None, then cpu nms is used, otherwise gpu_nms will be used. 20 | 21 | Returns: 22 | tuple: kept bboxes and indice, which is always the same data type as 23 | the input. 24 | """ 25 | # convert dets (tensor or numpy array) to tensor 26 | if isinstance(dets, torch.Tensor): 27 | is_numpy = False 28 | dets_th = dets 29 | elif isinstance(dets, np.ndarray): 30 | is_numpy = True 31 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id) 32 | dets_th = torch.from_numpy(dets).to(device) 33 | else: 34 | raise TypeError( 35 | 'dets must be either a Tensor or numpy array, but got {}'.format( 36 | type(dets))) 37 | 38 | # execute cpu or cuda nms 39 | if dets_th.shape[0] == 0: 40 | inds = dets_th.new_zeros(0, dtype=torch.long) 41 | else: 42 | if dets_th.is_cuda: 43 | inds = nms_cuda.nms(dets_th, iou_thr) 44 | else: 45 | inds = nms_cpu.nms(dets_th, iou_thr) 46 | 47 | if is_numpy: 48 | inds = inds.cpu().numpy() 49 | return dets[inds, :], inds 50 | 51 | 52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 53 | if isinstance(dets, torch.Tensor): 54 | is_tensor = True 55 | dets_np = dets.detach().cpu().numpy() 56 | elif isinstance(dets, np.ndarray): 57 | is_tensor = False 58 | dets_np = dets 59 | else: 60 | raise TypeError( 61 | 'dets must be either a Tensor or numpy array, but got {}'.format( 62 | type(dets))) 63 | 64 | method_codes = {'linear': 1, 'gaussian': 2} 65 | if method not in method_codes: 66 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 67 | new_dets, inds = soft_nms_cpu( 68 | dets_np, 69 | iou_thr, 70 | method=method_codes[method], 71 | sigma=sigma, 72 | min_score=min_score) 73 | 74 | if is_tensor: 75 | return dets.new_tensor(new_dets), dets.new_tensor( 76 | inds, dtype=torch.long) 77 | else: 78 | return new_dets.astype(np.float32), inds.astype(np.int64) 79 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..transforms import bbox2roi 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class OHEMSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | context, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub, 17 | add_gt_as_proposals) 18 | if not hasattr(context, 'num_stages'): 19 | self.bbox_roi_extractor = context.bbox_roi_extractor 20 | self.bbox_head = context.bbox_head 21 | else: 22 | self.bbox_roi_extractor = context.bbox_roi_extractor[ 23 | context.current_stage] 24 | self.bbox_head = context.bbox_head[context.current_stage] 25 | 26 | def hard_mining(self, inds, num_expected, bboxes, labels, feats): 27 | with torch.no_grad(): 28 | rois = bbox2roi([bboxes]) 29 | bbox_feats = self.bbox_roi_extractor( 30 | feats[:self.bbox_roi_extractor.num_inputs], rois) 31 | cls_score, _ = self.bbox_head(bbox_feats) 32 | loss = self.bbox_head.loss( 33 | cls_score=cls_score, 34 | bbox_pred=None, 35 | labels=labels, 36 | label_weights=cls_score.new_ones(cls_score.size(0)), 37 | bbox_targets=None, 38 | bbox_weights=None, 39 | reduction_override='none')['loss_cls'] 40 | _, topk_loss_inds = loss.topk(num_expected) 41 | return inds[topk_loss_inds] 42 | 43 | def _sample_pos(self, 44 | assign_result, 45 | num_expected, 46 | bboxes=None, 47 | feats=None, 48 | **kwargs): 49 | # Sample some hard positive samples 50 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 51 | if pos_inds.numel() != 0: 52 | pos_inds = pos_inds.squeeze(1) 53 | if pos_inds.numel() <= num_expected: 54 | return pos_inds 55 | else: 56 | return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds], 57 | assign_result.labels[pos_inds], feats) 58 | 59 | def _sample_neg(self, 60 | assign_result, 61 | num_expected, 62 | bboxes=None, 63 | feats=None, 64 | **kwargs): 65 | # Sample some hard negative samples 66 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 67 | if neg_inds.numel() != 0: 68 | neg_inds = neg_inds.squeeze(1) 69 | if len(neg_inds) <= num_expected: 70 | return neg_inds 71 | else: 72 | return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds], 73 | assign_result.labels[neg_inds], feats) 74 | -------------------------------------------------------------------------------- /mmdet/models/losses/focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss 5 | from ..registry import LOSSES 6 | from .utils import weight_reduce_loss 7 | 8 | 9 | # This method is only for debugging 10 | def py_sigmoid_focal_loss(pred, 11 | target, 12 | weight=None, 13 | gamma=2.0, 14 | alpha=0.25, 15 | reduction='mean', 16 | avg_factor=None): 17 | pred_sigmoid = pred.sigmoid() 18 | target = target.type_as(pred) 19 | pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) 20 | focal_weight = (alpha * target + (1 - alpha) * 21 | (1 - target)) * pt.pow(gamma) 22 | loss = F.binary_cross_entropy_with_logits( 23 | pred, target, reduction='none') * focal_weight 24 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 25 | return loss 26 | 27 | 28 | def sigmoid_focal_loss(pred, 29 | target, 30 | weight=None, 31 | gamma=2.0, 32 | alpha=0.25, 33 | reduction='mean', 34 | avg_factor=None): 35 | # Function.apply does not accept keyword arguments, so the decorator 36 | # "weighted_loss" is not applicable 37 | loss = _sigmoid_focal_loss(pred, target, gamma, alpha) 38 | # TODO: find a proper way to handle the shape of weight 39 | if weight is not None: 40 | weight = weight.view(-1, 1) 41 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 42 | return loss 43 | 44 | 45 | @LOSSES.register_module 46 | class FocalLoss(nn.Module): 47 | 48 | def __init__(self, 49 | use_sigmoid=True, 50 | gamma=2.0, 51 | alpha=0.25, 52 | reduction='mean', 53 | loss_weight=1.0): 54 | super(FocalLoss, self).__init__() 55 | assert use_sigmoid is True, 'Only sigmoid focal loss supported now.' 56 | self.use_sigmoid = use_sigmoid 57 | self.gamma = gamma 58 | self.alpha = alpha 59 | self.reduction = reduction 60 | self.loss_weight = loss_weight 61 | 62 | def forward(self, 63 | pred, 64 | target, 65 | weight=None, 66 | avg_factor=None, 67 | reduction_override=None): 68 | assert reduction_override in (None, 'none', 'mean', 'sum') 69 | reduction = ( 70 | reduction_override if reduction_override else self.reduction) 71 | if self.use_sigmoid: 72 | loss_cls = self.loss_weight * sigmoid_focal_loss( 73 | pred, 74 | target, 75 | weight, 76 | gamma=self.gamma, 77 | alpha=self.alpha, 78 | reduction=reduction, 79 | avg_factor=avg_factor) 80 | else: 81 | raise NotImplementedError 82 | return loss_cls 83 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int MaskedIm2colForwardLaucher(const at::Tensor im, const int height, 7 | const int width, const int channels, 8 | const int kernel_h, const int kernel_w, 9 | const int pad_h, const int pad_w, 10 | const at::Tensor mask_h_idx, 11 | const at::Tensor mask_w_idx, const int mask_cnt, 12 | at::Tensor col); 13 | 14 | int MaskedCol2imForwardLaucher(const at::Tensor col, const int height, 15 | const int width, const int channels, 16 | const at::Tensor mask_h_idx, 17 | const at::Tensor mask_w_idx, const int mask_cnt, 18 | at::Tensor im); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx, 28 | const at::Tensor mask_w_idx, const int kernel_h, 29 | const int kernel_w, const int pad_h, 30 | const int pad_w, at::Tensor col) { 31 | CHECK_INPUT(im); 32 | CHECK_INPUT(mask_h_idx); 33 | CHECK_INPUT(mask_w_idx); 34 | CHECK_INPUT(col); 35 | // im: (n, ic, h, w), kernel size (kh, kw) 36 | // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) 37 | 38 | int channels = im.size(1); 39 | int height = im.size(2); 40 | int width = im.size(3); 41 | int mask_cnt = mask_h_idx.size(0); 42 | 43 | MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w, 44 | pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt, 45 | col); 46 | 47 | return 1; 48 | } 49 | 50 | int masked_col2im_forward_cuda(const at::Tensor col, 51 | const at::Tensor mask_h_idx, 52 | const at::Tensor mask_w_idx, int height, 53 | int width, int channels, at::Tensor im) { 54 | CHECK_INPUT(col); 55 | CHECK_INPUT(mask_h_idx); 56 | CHECK_INPUT(mask_w_idx); 57 | CHECK_INPUT(im); 58 | // im: (n, ic, h, w), kernel size (kh, kw) 59 | // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) 60 | 61 | int mask_cnt = mask_h_idx.size(0); 62 | 63 | MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx, 64 | mask_w_idx, mask_cnt, im); 65 | 66 | return 1; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("masked_im2col_forward", &masked_im2col_forward_cuda, 71 | "masked_im2col forward (CUDA)"); 72 | m.def("masked_col2im_forward", &masked_col2im_forward_cuda, 73 | "masked_col2im forward (CUDA)"); 74 | } -------------------------------------------------------------------------------- /configs/fcos/README.md: -------------------------------------------------------------------------------- 1 | # FCOS: Fully Convolutional One-Stage Object Detection 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{tian2019fcos, 7 | title={FCOS: Fully Convolutional One-Stage Object Detection}, 8 | author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong}, 9 | journal={arXiv preprint arXiv:1904.01355}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 17 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 18 | | R-50 | caffe | N | N | 1x | 5.5 | 0.373 | 13.7 | 35.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_1x_4gpu_20190516-a7cac5ff.pth) | 19 | | R-50 | caffe | Y | N | 1x | 6.9 | 0.396 | 13.6 | 36.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu_20190516-9f253a93.pth) | 20 | | R-50 | caffe | Y | N | 2x | - | - | - | 36.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_2x_4gpu_20190516_-93484354.pth) | 21 | | R-101 | caffe | Y | N | 1x | 10.4 | 0.558 | 11.6 | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_1x_4gpu_20190516-e4889733.pth) | 22 | | R-101 | caffe | Y | N | 2x | - | - | - | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_2x_4gpu_20190516-c03af97b.pth) | 23 | 24 | 25 | | Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 26 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 27 | | R-50 | caffe | Y | Y | 2x | - | - | - | 38.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r50_caffe_fpn_gn_2x_4gpu_20190516-f7329d80.pth) | 28 | | R-101 | caffe | Y | Y | 2x | - | - | - | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu_20190516-42e6f62d.pth) | 29 | | X-101 | caffe | Y | Y | 2x | 9.7 | 0.892 | 7.0 | 42.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x_20190516-a36c0872.pth) | 30 | 31 | **Notes:** 32 | - To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU for R-50 and R-101 models, and 8 GPUs with 2 image/GPU for X-101 models. 33 | - The X-101 backbone is X-101-64x4d. 34 | -------------------------------------------------------------------------------- /tools/configs/fcos/README.md: -------------------------------------------------------------------------------- 1 | # FCOS: Fully Convolutional One-Stage Object Detection 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @article{tian2019fcos, 7 | title={FCOS: Fully Convolutional One-Stage Object Detection}, 8 | author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong}, 9 | journal={arXiv preprint arXiv:1904.01355}, 10 | year={2019} 11 | } 12 | ``` 13 | 14 | ## Results and Models 15 | 16 | | Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 17 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 18 | | R-50 | caffe | N | N | 1x | 5.5 | 0.373 | 13.7 | 35.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_1x_4gpu_20190516-a7cac5ff.pth) | 19 | | R-50 | caffe | Y | N | 1x | 6.9 | 0.396 | 13.6 | 36.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu_20190516-9f253a93.pth) | 20 | | R-50 | caffe | Y | N | 2x | - | - | - | 36.9 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_2x_4gpu_20190516_-93484354.pth) | 21 | | R-101 | caffe | Y | N | 1x | 10.4 | 0.558 | 11.6 | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_1x_4gpu_20190516-e4889733.pth) | 22 | | R-101 | caffe | Y | N | 2x | - | - | - | 39.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_2x_4gpu_20190516-c03af97b.pth) | 23 | 24 | 25 | | Backbone | Style | GN | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download | 26 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:| 27 | | R-50 | caffe | Y | Y | 2x | - | - | - | 38.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r50_caffe_fpn_gn_2x_4gpu_20190516-f7329d80.pth) | 28 | | R-101 | caffe | Y | Y | 2x | - | - | - | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu_20190516-42e6f62d.pth) | 29 | | X-101 | caffe | Y | Y | 2x | 9.7 | 0.892 | 7.0 | 42.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x_20190516-a36c0872.pth) | 30 | 31 | **Notes:** 32 | - To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU for R-50 and R-101 models, and 8 GPUs with 2 image/GPU for X-101 models. 33 | - The X-101 backbone is X-101-64x4d. 34 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/retina_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | from mmcv.cnn import normal_init 4 | 5 | from ..registry import HEADS 6 | from ..utils import ConvModule, bias_init_with_prob 7 | from .anchor_head import AnchorHead 8 | 9 | 10 | @HEADS.register_module 11 | class RetinaHead(AnchorHead): 12 | 13 | def __init__(self, 14 | num_classes, 15 | in_channels, 16 | stacked_convs=4, 17 | octave_base_scale=4, 18 | scales_per_octave=3, 19 | conv_cfg=None, 20 | norm_cfg=None, 21 | **kwargs): 22 | self.stacked_convs = stacked_convs 23 | self.octave_base_scale = octave_base_scale 24 | self.scales_per_octave = scales_per_octave 25 | self.conv_cfg = conv_cfg 26 | self.norm_cfg = norm_cfg 27 | octave_scales = np.array( 28 | [2**(i / scales_per_octave) for i in range(scales_per_octave)]) 29 | anchor_scales = octave_scales * octave_base_scale 30 | super(RetinaHead, self).__init__( 31 | num_classes, in_channels, anchor_scales=anchor_scales, **kwargs) 32 | 33 | def _init_layers(self): 34 | self.relu = nn.ReLU(inplace=True) 35 | self.cls_convs = nn.ModuleList() 36 | self.reg_convs = nn.ModuleList() 37 | for i in range(self.stacked_convs): 38 | chn = self.in_channels if i == 0 else self.feat_channels 39 | self.cls_convs.append( 40 | ConvModule( 41 | chn, 42 | self.feat_channels, 43 | 3, 44 | stride=1, 45 | padding=1, 46 | conv_cfg=self.conv_cfg, 47 | norm_cfg=self.norm_cfg)) 48 | self.reg_convs.append( 49 | ConvModule( 50 | chn, 51 | self.feat_channels, 52 | 3, 53 | stride=1, 54 | padding=1, 55 | conv_cfg=self.conv_cfg, 56 | norm_cfg=self.norm_cfg)) 57 | self.retina_cls = nn.Conv2d( 58 | self.feat_channels, 59 | self.num_anchors * self.cls_out_channels, 60 | 3, 61 | padding=1) 62 | self.retina_reg = nn.Conv2d( 63 | self.feat_channels, self.num_anchors * 4, 3, padding=1) 64 | 65 | def init_weights(self): 66 | for m in self.cls_convs: 67 | normal_init(m.conv, std=0.01) 68 | for m in self.reg_convs: 69 | normal_init(m.conv, std=0.01) 70 | bias_cls = bias_init_with_prob(0.01) 71 | normal_init(self.retina_cls, std=0.01, bias=bias_cls) 72 | normal_init(self.retina_reg, std=0.01) 73 | 74 | def forward_single(self, x): 75 | cls_feat = x 76 | reg_feat = x 77 | for cls_conv in self.cls_convs: 78 | cls_feat = cls_conv(cls_feat) 79 | for reg_conv in self.reg_convs: 80 | reg_feat = reg_conv(reg_feat) 81 | cls_score = self.retina_cls(cls_feat) 82 | bbox_pred = self.retina_reg(reg_feat) 83 | return cls_score, bbox_pred 84 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | class BaseSampler(metaclass=ABCMeta): 9 | 10 | def __init__(self, 11 | num, 12 | pos_fraction, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | self.num = num 17 | self.pos_fraction = pos_fraction 18 | self.neg_pos_ub = neg_pos_ub 19 | self.add_gt_as_proposals = add_gt_as_proposals 20 | self.pos_sampler = self 21 | self.neg_sampler = self 22 | 23 | @abstractmethod 24 | def _sample_pos(self, assign_result, num_expected, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def _sample_neg(self, assign_result, num_expected, **kwargs): 29 | pass 30 | 31 | @abstractmethod 32 | def _sample_neg_lowthr(self, assign_result, num_expected, lowthr=0.1, **kwargs): 33 | pass 34 | 35 | def sample(self, 36 | assign_result, 37 | bboxes, 38 | gt_bboxes, 39 | gt_labels=None, 40 | **kwargs): 41 | """Sample positive and negative bboxes. 42 | 43 | This is a simple implementation of bbox sampling given candidates, 44 | assigning results and ground truth bboxes. 45 | 46 | Args: 47 | assign_result (:obj:`AssignResult`): Bbox assigning results. 48 | bboxes (Tensor): Boxes to be sampled from. 49 | gt_bboxes (Tensor): Ground truth bboxes. 50 | gt_labels (Tensor, optional): Class labels of ground truth bboxes. 51 | 52 | Returns: 53 | :obj:`SamplingResult`: Sampling result. 54 | """ 55 | bboxes = bboxes[:, :4] 56 | 57 | gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8) 58 | if self.add_gt_as_proposals: 59 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 60 | assign_result.add_gt_(gt_labels) 61 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 62 | gt_flags = torch.cat([gt_ones, gt_flags]) 63 | 64 | num_expected_pos = int(self.num * self.pos_fraction) 65 | pos_inds = self.pos_sampler._sample_pos( 66 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 67 | # We found that sampled indices have duplicated items occasionally. 68 | # (may be a bug of PyTorch) 69 | pos_inds = pos_inds.unique() 70 | num_sampled_pos = pos_inds.numel() 71 | num_expected_neg = self.num - num_sampled_pos 72 | if self.neg_pos_ub >= 0: 73 | _pos = max(1, num_sampled_pos) 74 | neg_upper_bound = int(self.neg_pos_ub * _pos) 75 | if num_expected_neg > neg_upper_bound: 76 | num_expected_neg = neg_upper_bound 77 | neg_inds = self.neg_sampler._sample_neg_lowthr( 78 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs) 79 | neg_inds = neg_inds.unique() 80 | 81 | return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 82 | assign_result, gt_flags) 83 | -------------------------------------------------------------------------------- /configs/hrnet/README.md: -------------------------------------------------------------------------------- 1 | # High-resolution networks (HRNets) for object detection 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{SunXLW19, 7 | title={Deep High-Resolution Representation Learning for Human Pose Estimation}, 8 | author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, 9 | booktitle={CVPR}, 10 | year={2019} 11 | } 12 | 13 | @article{SunZJCXLMWLW19, 14 | title={High-Resolution Representations for Labeling Pixels and Regions}, 15 | author={Ke Sun and Yang Zhao and Borui Jiang and Tianheng Cheng and Bin Xiao 16 | and Dong Liu and Yadong Mu and Xinggang Wang and Wenyu Liu and Jingdong Wang}, 17 | journal = {CoRR}, 18 | volume = {abs/1904.04514}, 19 | year={2019} 20 | } 21 | ``` 22 | 23 | ## Results and Models 24 | 25 | Faster R-CNN 26 | 27 | | Backbone|#Params|GFLOPs|Lr sched|mAP|Download| 28 | | :--:|:--:|:--:|:--:|:--:|:--:| 29 | | HRNetV2-W18 |26.2M|159.1| 1x | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_1x_20190522-e368c387.pth)| 30 | | HRNetV2-W18 |26.2M|159.1| 20-23-24e | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-ed3c0293.pth)| 31 | | HRNetV2-W32 |45.0M|245.3| 1x | 39.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_1x_20190522-d22f1fef.pth)| 32 | | HRNetV2-W32 |45.0M|245.3| 20-23-24e | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-2d67a5eb.pth)| 33 | | HRNetV2-W40 |60.5M|314.9| 1x | 40.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_1x_20190522-30502318.pth)| 34 | | HRNetV2-W40 |60.5M|314.9| 20-23-24e | 41.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_20_23_24e_20190522-050a7c7f.pth)| 35 | 36 | 37 | Mask R-CNN 38 | 39 | |Backbone|Lr sched|mask mAP|box mAP|Download| 40 | |:--:|:--:|:--:|:--:|:--:| 41 | | HRNetV2-W18 | 1x | 34.2 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_1x_20190522-c8ad459f.pth)| 42 | | HRNetV2-W18 | 20-23-24e | 35.7 | 39.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-5c11b7f2.pth)| 43 | | HRNetV2-W32 | 1x | 36.8 | 40.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_1x_20190522-374aaa00.pth)| 44 | | HRNetV2-W32 | 20-23-24e | 37.6 | 42.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-4dd02a79.pth)| 45 | 46 | Cascade R-CNN 47 | 48 | |Backbone|Lr sched|mAP|Download| 49 | |:--:|:--:|:--:|:--:| 50 | | HRNetV2-W32 | 20e | 43.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/cascade_rcnn_hrnetv2_w32_fpn_20e_20190522-55bec4ee.pth)| 51 | 52 | **Note:** 53 | 54 | - HRNetV2 ImageNet pretrained models are in [HRNets for Image Classification](https://github.com/HRNet/HRNet-Image-Classification). 55 | -------------------------------------------------------------------------------- /tools/configs/hrnet/README.md: -------------------------------------------------------------------------------- 1 | # High-resolution networks (HRNets) for object detection 2 | 3 | ## Introduction 4 | 5 | ``` 6 | @inproceedings{SunXLW19, 7 | title={Deep High-Resolution Representation Learning for Human Pose Estimation}, 8 | author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, 9 | booktitle={CVPR}, 10 | year={2019} 11 | } 12 | 13 | @article{SunZJCXLMWLW19, 14 | title={High-Resolution Representations for Labeling Pixels and Regions}, 15 | author={Ke Sun and Yang Zhao and Borui Jiang and Tianheng Cheng and Bin Xiao 16 | and Dong Liu and Yadong Mu and Xinggang Wang and Wenyu Liu and Jingdong Wang}, 17 | journal = {CoRR}, 18 | volume = {abs/1904.04514}, 19 | year={2019} 20 | } 21 | ``` 22 | 23 | ## Results and Models 24 | 25 | Faster R-CNN 26 | 27 | | Backbone|#Params|GFLOPs|Lr sched|mAP|Download| 28 | | :--:|:--:|:--:|:--:|:--:|:--:| 29 | | HRNetV2-W18 |26.2M|159.1| 1x | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_1x_20190522-e368c387.pth)| 30 | | HRNetV2-W18 |26.2M|159.1| 20-23-24e | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-ed3c0293.pth)| 31 | | HRNetV2-W32 |45.0M|245.3| 1x | 39.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_1x_20190522-d22f1fef.pth)| 32 | | HRNetV2-W32 |45.0M|245.3| 20-23-24e | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-2d67a5eb.pth)| 33 | | HRNetV2-W40 |60.5M|314.9| 1x | 40.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_1x_20190522-30502318.pth)| 34 | | HRNetV2-W40 |60.5M|314.9| 20-23-24e | 41.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_20_23_24e_20190522-050a7c7f.pth)| 35 | 36 | 37 | Mask R-CNN 38 | 39 | |Backbone|Lr sched|mask mAP|box mAP|Download| 40 | |:--:|:--:|:--:|:--:|:--:| 41 | | HRNetV2-W18 | 1x | 34.2 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_1x_20190522-c8ad459f.pth)| 42 | | HRNetV2-W18 | 20-23-24e | 35.7 | 39.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-5c11b7f2.pth)| 43 | | HRNetV2-W32 | 1x | 36.8 | 40.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_1x_20190522-374aaa00.pth)| 44 | | HRNetV2-W32 | 20-23-24e | 37.6 | 42.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-4dd02a79.pth)| 45 | 46 | Cascade R-CNN 47 | 48 | |Backbone|Lr sched|mAP|Download| 49 | |:--:|:--:|:--:|:--:| 50 | | HRNetV2-W32 | 20e | 43.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/cascade_rcnn_hrnetv2_w32_fpn_20e_20190522-55bec4ee.pth)| 51 | 52 | **Note:** 53 | 54 | - HRNetV2 ImageNet pretrained models are in [HRNets for Image Classification](https://github.com/HRNet/HRNet-Image-Classification). 55 | --------------------------------------------------------------------------------