├── cls_head_models
    ├── __init__.py
    ├── 2fc.py
    ├── simple2fc.py
    └── simple3fc.py
├── instaboost
    ├── .svn
    │   ├── entries
    │   ├── format
    │   ├── wc.db-journal
    │   ├── wc.db
    │   └── pristine
    │   │   ├── c7
    │   │       └── c701008e17212a7a6aa964551bf957bba1c3fb95.svn-base
    │   │   ├── aa
    │   │       └── aa94ef40fd3d99558c5026f2eaf05a380206b80e.svn-base
    │   │   └── 9a
    │   │       └── 9a5e7d16f998e55c36ca225a6b41887910f19387.svn-base
    ├── __init__.py
    ├── exceptions.py
    └── config.py
├── htc.png
├── demo
    ├── demo.jpg
    ├── 000000125100.jpg
    ├── 000000125106.jpg
    ├── 000000125107.jpg
    ├── 000000125109.jpg
    ├── 000000125110.jpg
    ├── 000000412510.jpg
    ├── coco_test_12510.jpg
    ├── corruptions_sev_3.png
    ├── demo.py
    └── webcam_demo.py
├── mrcnn-lvis.png
├── mrcnn_cocolt.png
├── is_crowd_id_val.pt
├── lvis_val_cats_info.pt
├── lvis_train_cate_info.pt
├── lvis_train_cats_info.pt
├── mmdet
    ├── datasets
    │   ├── registry.py
    │   ├── loader
    │   │   ├── __init__.py
    │   │   └── build_loader.py
    │   ├── cityscapes.py
    │   ├── voc.py
    │   ├── __init__.py
    │   ├── builder.py
    │   ├── wider_face.py
    │   ├── dataset_wrappers.py
    │   └── utils.py
    ├── models
    │   ├── shared_heads
    │   │   ├── __init__.py
    │   │   └── res_layer.py
    │   ├── roi_extractors
    │   │   └── __init__.py
    │   ├── necks
    │   │   └── __init__.py
    │   ├── plugins
    │   │   └── __init__.py
    │   ├── backbones
    │   │   └── __init__.py
    │   ├── bbox_heads
    │   │   └── __init__.py
    │   ├── registry.py
    │   ├── utils
    │   │   ├── scale.py
    │   │   ├── __init__.py
    │   │   ├── conv_ws.py
    │   │   ├── weight_init.py
    │   │   └── norm.py
    │   ├── mask_heads
    │   │   ├── __init__.py
    │   │   └── htc_mask_head.py
    │   ├── anchor_heads
    │   │   ├── __init__.py
    │   │   └── retina_head.py
    │   ├── detectors
    │   │   ├── fcos.py
    │   │   ├── retinanet.py
    │   │   ├── faster_rcnn.py
    │   │   ├── __init__.py
    │   │   ├── fast_rcnn.py
    │   │   ├── mask_rcnn.py
    │   │   └── single_stage.py
    │   ├── losses
    │   │   ├── mse_loss.py
    │   │   ├── accuracy.py
    │   │   ├── __init__.py
    │   │   ├── smooth_l1_loss.py
    │   │   ├── balanced_l1_loss.py
    │   │   └── focal_loss.py
    │   ├── __init__.py
    │   └── builder.py
    ├── ops
    │   ├── nms
    │   │   ├── __init__.py
    │   │   ├── src
    │   │   │   ├── nms_cuda.cpp
    │   │   │   └── nms_cpu.cpp
    │   │   └── nms_wrapper.py
    │   ├── roi_pool
    │   │   ├── __init__.py
    │   │   ├── gradcheck.py
    │   │   └── roi_pool.py
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   └── gradcheck.py
    │   ├── masked_conv
    │   │   ├── __init__.py
    │   │   └── src
    │   │   │   └── masked_conv2d_cuda.cpp
    │   ├── sigmoid_focal_loss
    │   │   ├── __init__.py
    │   │   ├── sigmoid_focal_loss.py
    │   │   └── src
    │   │   │   └── sigmoid_focal_loss.cpp
    │   ├── dcn
    │   │   └── __init__.py
    │   └── __init__.py
    ├── __init__.py
    ├── utils
    │   ├── __init__.py
    │   └── registry.py
    ├── core
    │   ├── mask
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── mask_target.py
    │   ├── fp16
    │   │   ├── __init__.py
    │   │   └── utils.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── misc.py
    │   │   └── dist_utils.py
    │   ├── bbox
    │   │   ├── assigners
    │   │   │   ├── base_assigner.py
    │   │   │   ├── __init__.py
    │   │   │   └── assign_result.py
    │   │   ├── samplers
    │   │   │   ├── combined_sampler.py
    │   │   │   ├── __init__.py
    │   │   │   ├── sampling_result.py
    │   │   │   ├── pseudo_sampler.py
    │   │   │   ├── instance_balanced_pos_sampler.py
    │   │   │   ├── random_sampler.py
    │   │   │   ├── ohem_sampler.py
    │   │   │   └── base_sampler.py
    │   │   ├── __init__.py
    │   │   ├── assign_sampling.py
    │   │   └── geometry.py
    │   ├── __init__.py
    │   ├── anchor
    │   │   └── __init__.py
    │   ├── post_processing
    │   │   ├── __init__.py
    │   │   └── bbox_nms.py
    │   └── evaluation
    │   │   ├── __init__.py.bk
    │   │   ├── __init__.py
    │   │   └── bbox_overlaps.py
    └── apis
    │   ├── __init__.py
    │   └── env.py
├── cls_id_683_top_sim_classes.pt
├── class_to_imageid_and_inscount.pt
├── lvis_api
    ├── .gitignore
    ├── requirements.txt
    ├── lvis
    │   └── __init__.py
    ├── test.py
    ├── setup.py
    ├── LICENSE
    └── README.md
├── class_to_imageid_and_inscount_new.pt
├── class_to_imageid_and_inscount_val.pt
├── tools
    ├── stop_para_test.sh
    ├── dist_train.sh
    ├── dist_test.sh
    ├── dist_test_htc.sh
    ├── start_para_test_gpu4.sh
    ├── start_para_test_gpu8.sh
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── draw_cls_dist_coco.py
    ├── coco_eval.py
    ├── configs
    │   ├── empirical_attention
    │   │   └── README.md
    │   ├── scratch
    │   │   └── README.md
    │   ├── wider_face
    │   │   └── README.md
    │   ├── ghm
    │   │   └── README.md
    │   ├── grid_rcnn
    │   │   └── README.md
    │   ├── libra_rcnn
    │   │   └── README.md
    │   ├── gn
    │   │   └── README.md
    │   ├── ms_rcnn
    │   │   └── README.md
    │   ├── fcos
    │   │   └── README.md
    │   └── hrnet
    │   │   └── README.md
    ├── publish_model.py
    ├── draw_cls_dist_lvis.py
    ├── draw_pr_recall_bar_lviscoco.py
    ├── upgrade_model_version.py
    ├── draw_eAP_sensitivity_binnum.py
    ├── draw_ft_epoch_ablation.py
    ├── voc_eval.py
    ├── draw_comparison_head_design_choices.py
    └── draw_eAP_sensitivity_eap.py
├── Transparent_Martini_PNG_Clipart-621.png
├── zero_ap_classes_mrcnnr50_boxmask_ag.pt
├── class_to_imageid_and_inscount_val_new.pt
├── lvis_maskrcnn_r50fpn.pkl_per_cat_recall.pt
├── class_to_imageid_and_inscount_coco_sampled.pt
├── .style.yapf
├── exist_categories_in_val_ap_sorted_mrcnn_r101fpn.pt
├── exist_categories_in_val_ap_sorted_mrcnn_r50fpn_props_gt_label.pt
├── .github
    └── ISSUE_TEMPLATE
    │   ├── general_questions.md
    │   ├── feature_request.md
    │   └── error-report.md
├── .isort.cfg
├── .travis.yml
├── docker
    └── Dockerfile
├── install.sh
├── configs
    ├── empirical_attention
    │   └── README.md
    ├── pascal_voc
    │   └── README.md
    ├── scratch
    │   └── README.md
    ├── wider_face
    │   └── README.md
    ├── ghm
    │   └── README.md
    ├── grid_rcnn
    │   └── README.md
    ├── cityscapes
    │   └── README.md
    ├── libra_rcnn
    │   └── README.md
    ├── gn
    │   └── README.md
    ├── ms_rcnn
    │   └── README.md
    ├── fcos
    │   └── README.md
    └── hrnet
    │   └── README.md
├── CONTRIBUTING.md
├── ft_cal_epoch_ablation_for_drawing_compose.txt
├── ft_cal_epoch_ablation_for_drawing.txt
├── ft_cat_epoch_ablation_for_drawing.txt
└── .gitignore


/cls_head_models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/instaboost/.svn/entries:
--------------------------------------------------------------------------------
1 | 12
2 | 


--------------------------------------------------------------------------------
/instaboost/.svn/format:
--------------------------------------------------------------------------------
1 | 12
2 | 


--------------------------------------------------------------------------------
/instaboost/.svn/wc.db-journal:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/htc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/htc.png


--------------------------------------------------------------------------------
/demo/demo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/demo.jpg


--------------------------------------------------------------------------------
/mrcnn-lvis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/mrcnn-lvis.png


--------------------------------------------------------------------------------
/mrcnn_cocolt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/mrcnn_cocolt.png


--------------------------------------------------------------------------------
/is_crowd_id_val.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/is_crowd_id_val.pt


--------------------------------------------------------------------------------
/demo/000000125100.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/000000125100.jpg


--------------------------------------------------------------------------------
/demo/000000125106.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/000000125106.jpg


--------------------------------------------------------------------------------
/demo/000000125107.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/000000125107.jpg


--------------------------------------------------------------------------------
/demo/000000125109.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/000000125109.jpg


--------------------------------------------------------------------------------
/demo/000000125110.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/000000125110.jpg


--------------------------------------------------------------------------------
/demo/000000412510.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/000000412510.jpg


--------------------------------------------------------------------------------
/instaboost/.svn/wc.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/instaboost/.svn/wc.db


--------------------------------------------------------------------------------
/lvis_val_cats_info.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/lvis_val_cats_info.pt


--------------------------------------------------------------------------------
/demo/coco_test_12510.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/coco_test_12510.jpg


--------------------------------------------------------------------------------
/lvis_train_cate_info.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/lvis_train_cate_info.pt


--------------------------------------------------------------------------------
/lvis_train_cats_info.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/lvis_train_cats_info.pt


--------------------------------------------------------------------------------
/demo/corruptions_sev_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/demo/corruptions_sev_3.png


--------------------------------------------------------------------------------
/mmdet/datasets/registry.py:
--------------------------------------------------------------------------------
1 | from mmdet.utils import Registry
2 | 
3 | DATASETS = Registry('dataset')
4 | 


--------------------------------------------------------------------------------
/cls_id_683_top_sim_classes.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/cls_id_683_top_sim_classes.pt


--------------------------------------------------------------------------------
/mmdet/models/shared_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .res_layer import ResLayer
2 | 
3 | __all__ = ['ResLayer']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_wrapper import nms, soft_nms
2 | 
3 | __all__ = ['nms', 'soft_nms']
4 | 


--------------------------------------------------------------------------------
/class_to_imageid_and_inscount.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/class_to_imageid_and_inscount.pt


--------------------------------------------------------------------------------
/lvis_api/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | __pycache__
3 | .DS_Store
4 | dist/*
5 | lvis.egg-info/
6 | build/*
7 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_pool import RoIPool, roi_pool
2 | 
3 | __all__ = ['roi_pool', 'RoIPool']
4 | 


--------------------------------------------------------------------------------
/class_to_imageid_and_inscount_new.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/class_to_imageid_and_inscount_new.pt


--------------------------------------------------------------------------------
/class_to_imageid_and_inscount_val.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/class_to_imageid_and_inscount_val.pt


--------------------------------------------------------------------------------
/mmdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 | 
3 | __all__ = ['__version__', 'short_version']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_align import RoIAlign, roi_align
2 | 
3 | __all__ = ['roi_align', 'RoIAlign']
4 | 


--------------------------------------------------------------------------------
/tools/stop_para_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for idx in 0 1 2 3 4 5 6 7
3 | do
4 | 	tmux kill-session -t "set$idx"
5 | done
6 | 


--------------------------------------------------------------------------------
/Transparent_Martini_PNG_Clipart-621.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/Transparent_Martini_PNG_Clipart-621.png


--------------------------------------------------------------------------------
/mmdet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .registry import Registry, build_from_cfg
2 | 
3 | __all__ = ['Registry', 'build_from_cfg']
4 | 


--------------------------------------------------------------------------------
/zero_ap_classes_mrcnnr50_boxmask_ag.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/zero_ap_classes_mrcnnr50_boxmask_ag.pt


--------------------------------------------------------------------------------
/class_to_imageid_and_inscount_val_new.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/class_to_imageid_and_inscount_val_new.pt


--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .single_level import SingleRoIExtractor
2 | 
3 | __all__ = ['SingleRoIExtractor']
4 | 


--------------------------------------------------------------------------------
/instaboost/__init__.py:
--------------------------------------------------------------------------------
1 | from .InstaBoost import *
2 | from .config import *
3 | 
4 | __all__ = ['get_new_data', 'InstaBoostConfig']
5 | 


--------------------------------------------------------------------------------
/lvis_maskrcnn_r50fpn.pkl_per_cat_recall.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/lvis_maskrcnn_r50fpn.pkl_per_cat_recall.pt


--------------------------------------------------------------------------------
/class_to_imageid_and_inscount_coco_sampled.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/class_to_imageid_and_inscount_coco_sampled.pt


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/__init__.py:
--------------------------------------------------------------------------------
1 | from .masked_conv import MaskedConv2d, masked_conv2d
2 | 
3 | __all__ = ['masked_conv2d', 'MaskedConv2d']
4 | 


--------------------------------------------------------------------------------
/.style.yapf:
--------------------------------------------------------------------------------
1 | [style]
2 | BASED_ON_STYLE = pep8
3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
5 | 


--------------------------------------------------------------------------------
/exist_categories_in_val_ap_sorted_mrcnn_r101fpn.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/exist_categories_in_val_ap_sorted_mrcnn_r101fpn.pt


--------------------------------------------------------------------------------
/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .bfp import BFP
2 | from .fpn import FPN
3 | from .hrfpn import HRFPN
4 | 
5 | __all__ = ['FPN', 'BFP', 'HRFPN']
6 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | from .mask_target import mask_target
2 | from .utils import split_combined_polys
3 | 
4 | __all__ = ['split_combined_polys', 'mask_target']
5 | 


--------------------------------------------------------------------------------
/exist_categories_in_val_ap_sorted_mrcnn_r50fpn_props_gt_label.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/twangnh/SimCal/HEAD/exist_categories_in_val_ap_sorted_mrcnn_r50fpn_props_gt_label.pt


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/__init__.py:
--------------------------------------------------------------------------------
1 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
2 | 
3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | from .generalized_attention import GeneralizedAttention
2 | from .non_local import NonLocal2D
3 | 
4 | __all__ = ['NonLocal2D', 'GeneralizedAttention']
5 | 


--------------------------------------------------------------------------------
/instaboost/.svn/pristine/c7/c701008e17212a7a6aa964551bf957bba1c3fb95.svn-base:
--------------------------------------------------------------------------------
1 | from .InstaBoost import *
2 | from .config import *
3 | 
4 | __all__ = ['get_new_data', 'InstaBoostConfig']
5 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/general_questions.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: General questions
 3 | about: Ask general questions to get help
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/__init__.py:
--------------------------------------------------------------------------------
1 | from .decorators import auto_fp16, force_fp32
2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model
3 | 
4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model']
5 | 


--------------------------------------------------------------------------------
/lvis_api/requirements.txt:
--------------------------------------------------------------------------------
 1 | cycler==0.10.0
 2 | Cython==0.29.12
 3 | kiwisolver==1.1.0
 4 | matplotlib==3.1.1
 5 | numpy==1.16.4
 6 | opencv-python==4.1.0.25
 7 | pyparsing==2.4.0
 8 | python-dateutil==2.8.0
 9 | six==1.12.0
10 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/__init__.py:
--------------------------------------------------------------------------------
1 | from .build_loader import build_dataloader
2 | from .sampler import GroupSampler, DistributedGroupSampler
3 | 
4 | __all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader']
5 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .hrnet import HRNet
2 | from .resnet import ResNet, make_res_layer
3 | from .resnext import ResNeXt
4 | from .ssd_vgg import SSDVGG
5 | 
6 | __all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG', 'HRNet']
7 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | PYTHON=${PYTHON:-"python"}
 4 | 
 5 | CONFIG=$1
 6 | GPUS=$2
 7 | 
 8 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .dist_utils import DistOptimizerHook, allreduce_grads
2 | from .misc import multi_apply, tensor2imgs, unmap
3 | 
4 | __all__ = [
5 |     'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',
6 |     'multi_apply'
7 | ]
8 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | line_length = 79
3 | multi_line_output = 0
4 | known_first_party = mmdet
5 | known_third_party = mmcv,numpy,matplotlib,pycocotools,six,seaborn,terminaltables,torch,torchvision
6 | no_lines_before = STDLIB,LOCALFOLDER
7 | default_section = THIRDPARTY


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/base_assigner.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | 
3 | 
4 | class BaseAssigner(metaclass=ABCMeta):
5 | 
6 |     @abstractmethod
7 |     def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
8 |         pass
9 | 


--------------------------------------------------------------------------------
/mmdet/models/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_head import BBoxHead
2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead
3 | from .double_bbox_head import DoubleConvFCBBoxHead
4 | 
5 | __all__ = [
6 |     'BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'DoubleConvFCBBoxHead'
7 | ]
8 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | PYTHON=${PYTHON:-"python"}
 4 | 
 5 | CONFIG=$1
 6 | GPUS=$2
 7 | PORT=${PORT:-29500}
 8 | 
 9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test_lvis.py $CONFIG --launcher pytorch ${@:3}


--------------------------------------------------------------------------------
/mmdet/datasets/cityscapes.py:
--------------------------------------------------------------------------------
 1 | from .coco import CocoDataset
 2 | from .registry import DATASETS
 3 | 
 4 | 
 5 | @DATASETS.register_module
 6 | class CityscapesDataset(CocoDataset):
 7 | 
 8 |     CLASSES = ('person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
 9 |                'bicycle')
10 | 


--------------------------------------------------------------------------------
/tools/dist_test_htc.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | PYTHON=${PYTHON:-"python"}
 4 | 
 5 | CONFIG=$1
 6 | GPUS=$2
 7 | PORT=${PORT:-29500}
 8 | 
 9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test_lvis_htc.py $CONFIG --launcher pytorch ${@:3}


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: xenial
 2 | language: python
 3 | 
 4 | install:
 5 |   - pip install isort flake8 yapf 
 6 | 
 7 | python:
 8 |   - "3.5"
 9 |   - "3.6"
10 |   - "3.7"
11 | 
12 | script:
13 |   - flake8
14 |   - isort -rc --check-only --diff mmdet/ tools/
15 |   - yapf -r -d --style .style.yapf mmdet/ tools/


--------------------------------------------------------------------------------
/mmdet/models/registry.py:
--------------------------------------------------------------------------------
 1 | from mmdet.utils import Registry
 2 | 
 3 | BACKBONES = Registry('backbone')
 4 | NECKS = Registry('neck')
 5 | ROI_EXTRACTORS = Registry('roi_extractor')
 6 | SHARED_HEADS = Registry('shared_head')
 7 | HEADS = Registry('head')
 8 | LOSSES = Registry('loss')
 9 | DETECTORS = Registry('detector')
10 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner
2 | from .assign_result import AssignResult
3 | from .base_assigner import BaseAssigner
4 | from .max_iou_assigner import MaxIoUAssigner
5 | 
6 | __all__ = [
7 |     'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import *  # noqa: F401, F403
2 | from .bbox import *  # noqa: F401, F403
3 | from .evaluation import *  # noqa: F401, F403
4 | from .fp16 import *  # noqa: F401, F403
5 | from .mask import *  # noqa: F401, F403
6 | from .post_processing import *  # noqa: F401, F403
7 | from .utils import *  # noqa: F401, F403
8 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_generator import AnchorGenerator
2 | from .anchor_target import anchor_inside_flags, anchor_target
3 | from .guided_anchor_target import ga_loc_target, ga_shape_target
4 | 
5 | __all__ = [
6 |     'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target',
7 |     'ga_shape_target'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_nms import multiclass_nms
2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks,
3 |                          merge_aug_proposals, merge_aug_scores)
4 | 
5 | __all__ = [
6 |     'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
7 |     'merge_aug_scores', 'merge_aug_masks'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/scale.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Scale(nn.Module):
 6 | 
 7 |     def __init__(self, scale=1.0):
 8 |         super(Scale, self).__init__()
 9 |         self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float))
10 | 
11 |     def forward(self, x):
12 |         return x * self.scale
13 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .fcn_mask_head import FCNMaskHead
 2 | from .fused_semantic_head import FusedSemanticHead
 3 | from .grid_head import GridHead
 4 | from .htc_mask_head import HTCMaskHead
 5 | from .maskiou_head import MaskIoUHead
 6 | 
 7 | __all__ = [
 8 |     'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead',
 9 |     'MaskIoUHead'
10 | ]
11 | 


--------------------------------------------------------------------------------
/instaboost/exceptions.py:
--------------------------------------------------------------------------------
 1 | class TrimapError(Exception):
 2 |     """
 3 |     Error when creating matting trimap.
 4 |     """
 5 |     def __init__(self, err):
 6 |         super(TrimapError, self).__init__(err)
 7 | 
 8 | 
 9 | class AnnError(Exception):
10 |     """
11 |     Error with Input annotation.
12 |     """
13 |     def __init__(self, err):
14 |         super(AnnError, self).__init__(err)
15 | 


--------------------------------------------------------------------------------
/lvis_api/lvis/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from .lvis import LVIS
 3 | from .results import LVISResults
 4 | from .eval import LVISEval
 5 | from .vis import LVISVis
 6 | 
 7 | logging.basicConfig(
 8 |     format="[%(asctime)s] %(name)s %(levelname)s: %(message)s", datefmt="%m/%d %H:%M:%S",
 9 |     level=logging.WARN,
10 | )
11 | 
12 | __all__ = ["LVIS", "LVISResults", "LVISEval", "LVISVis"]
13 | 


--------------------------------------------------------------------------------
/tools/start_para_test_gpu4.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | for idx in 0 1 2 3
3 | do
4 | 	tmux new-session -d -s "set$idx" \; 
5 | 	send-keys "mmd" Enter \; 
6 | 	send-keys "python ./tools/test_lvis.py configs/mask_rcnn_r50_fpn_1x_lvis.py /home/wangtao/prj/liyu_mmdet/work_dirs/mask_rcnn_r50_fpn_1x_lvis_liyu_finetune_imglevelsampler/epoch_12.pth --out ./set$idx.pkl --eval segm --set $idx" Enter \;
7 | done
8 | 
9 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG PYTORCH="1.1.0"
 2 | ARG CUDA="10.0"
 3 | ARG CUDNN="7.5"
 4 | 
 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
 6 | 
 7 | RUN apt-get update && apt-get install -y libglib2.0-0 libsm6 libxrender-dev libxext6
 8 | 
 9 | # Install mmdetection
10 | RUN conda install cython -y
11 | RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection
12 | WORKDIR /mmdetection
13 | RUN pip install -e .
14 | 


--------------------------------------------------------------------------------
/instaboost/.svn/pristine/aa/aa94ef40fd3d99558c5026f2eaf05a380206b80e.svn-base:
--------------------------------------------------------------------------------
 1 | class TrimapError(Exception):
 2 |     """
 3 |     Error when creating matting trimap.
 4 |     """
 5 |     def __init__(self, err):
 6 |         super(TrimapError, self).__init__(err)
 7 | 
 8 | 
 9 | class AnnError(Exception):
10 |     """
11 |     Error with Input annotation.
12 |     """
13 |     def __init__(self, err):
14 |         super(AnnError, self).__init__(err)
15 | 


--------------------------------------------------------------------------------
/tools/start_para_test_gpu8.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | config=$1
 3 | model=$2
 4 | out=$3
 5 | 
 6 | for idx in 0 1 2 3 4 5 6 7
 7 | do
 8 |         tmux kill-session -t "set$idx"
 9 | done
10 | 
11 | for idx in 0 1 2 3 4 5 6 7
12 | do
13 | 	tmux new-session -d -s "set$idx" \; send-keys "mmd" Enter \; send-keys "CUDA_VISIBLE_DEVICES=$idx python ./tools/test_lvis_split_parallel.py $config $model --out ./$out"_set"$idx.pkl --eval segm --set $idx --total_set_num 8" Enter \;
14 | done
15 | 
16 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_head import AnchorHead
 2 | from .fcos_head import FCOSHead
 3 | from .ga_retina_head import GARetinaHead
 4 | from .ga_rpn_head import GARPNHead
 5 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead
 6 | from .retina_head import RetinaHead
 7 | from .rpn_head import RPNHead
 8 | from .ssd_head import SSDHead
 9 | 
10 | __all__ = [
11 |     'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead',
12 |     'GARPNHead', 'RetinaHead', 'GARetinaHead', 'SSDHead', 'FCOSHead'
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .conv_module import ConvModule, build_conv_layer
 2 | from .conv_ws import ConvWS2d, conv_ws_2d
 3 | from .norm import build_norm_layer
 4 | from .scale import Scale
 5 | from .weight_init import (bias_init_with_prob, kaiming_init, normal_init,
 6 |                           uniform_init, xavier_init)
 7 | 
 8 | __all__ = [
 9 |     'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule',
10 |     'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init',
11 |     'kaiming_init', 'bias_init_with_prob', 'Scale'
12 | ]
13 | 


--------------------------------------------------------------------------------
/lvis_api/test.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from lvis import LVIS, LVISResults, LVISEval
 3 | 
 4 | # result and val files for 100 randomly sampled images.
 5 | # ANNOTATION_PATH = "./data/lvis_val_100.json"
 6 | # RESULT_PATH = "./data/lvis_results_100.json"
 7 | ANNOTATION_PATH = "./data/lvis/lvis_v0.5_val.json"
 8 | RESULT_PATH = './debug_file.pkl.segm.json'
 9 | # RESULT_PATH = './mask_rcnn_r101_fpn_1x_lvis.pkl.segm.json'
10 | ANN_TYPE = 'segm'
11 | 
12 | lvis_eval = LVISEval(ANNOTATION_PATH, RESULT_PATH, ANN_TYPE)
13 | lvis_eval.run()
14 | lvis_eval.print_results(True)
15 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/fcos.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .single_stage import SingleStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FCOS(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                    test_cfg, pretrained)
17 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/retinanet.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .single_stage import SingleStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class RetinaNet(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                         test_cfg, pretrained)
17 | 


--------------------------------------------------------------------------------
/mmdet/apis/__init__.py:
--------------------------------------------------------------------------------
 1 | from .env import get_root_logger, init_dist, set_random_seed
 2 | from .inference import (inference_detector, init_detector, show_result,
 3 |                         show_result_pyplot)
 4 | # from .train import train_detector
 5 | from .train_new import train_detector as train_detector_calibration
 6 | from .train_orig import train_detector as train_detector_normal
 7 | __all__ = [
 8 |     'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector_calibration', 'train_detector_normal',
 9 |     'init_detector', 'inference_detector', 'show_result', 'show_result_pyplot'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import torch
 5 | from torch.autograd import gradcheck
 6 | 
 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 8 | from roi_pool import RoIPool  # noqa: E402, isort:skip
 9 | 
10 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda()
11 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55],
12 |                      [1, 67, 40, 110, 120]]).cuda()
13 | inputs = (feat, rois)
14 | print('Gradcheck for roi pooling...')
15 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3)
16 | print(test)
17 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/combined_sampler.py:
--------------------------------------------------------------------------------
 1 | from ..assign_sampling import build_sampler
 2 | from .base_sampler import BaseSampler
 3 | 
 4 | 
 5 | class CombinedSampler(BaseSampler):
 6 | 
 7 |     def __init__(self, pos_sampler, neg_sampler, **kwargs):
 8 |         super(CombinedSampler, self).__init__(**kwargs)
 9 |         self.pos_sampler = build_sampler(pos_sampler, **kwargs)
10 |         self.neg_sampler = build_sampler(neg_sampler, **kwargs)
11 | 
12 |     def _sample_pos(self, **kwargs):
13 |         raise NotImplementedError
14 | 
15 |     def _sample_neg(self, **kwargs):
16 |         raise NotImplementedError
17 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | conda create -n simcal_mmdet python=3.7
 3 | source ~/anaconda3/etc/profile.d/conda.sh
 4 | conda init bash
 5 | conda activate simcal_mmdet
 6 | echo "python path"
 7 | which python
 8 | conda install pytorch==1.2.0 torchvision==0.4.0 cudatoolkit=9.2 -c pytorch
 9 | pip install cython==0.29.12 mmcv==0.2.16 matplotlib terminaltables
10 | pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI"
11 | pip install opencv-python-headless
12 | pip install Pillow==6.1
13 | pip install numpy==1.17.1 --no-deps
14 | git clone https://github.com/twangnh/SimCal
15 | cd SimCal
16 | pip install -v -e .


--------------------------------------------------------------------------------
/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --kill-on-bad-exit=1 \
22 |     ${SRUN_ARGS} \
23 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv,
 2 |                           ModulatedDeformConvPack, deform_conv,
 3 |                           modulated_deform_conv)
 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack,
 5 |                           ModulatedDeformRoIPoolingPack, deform_roi_pooling)
 6 | 
 7 | __all__ = [
 8 |     'DeformConv', 'DeformConvPack', 'ModulatedDeformConv',
 9 |     'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
10 |     'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv',
11 |     'deform_roi_pooling'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_sampler import BaseSampler
 2 | from .combined_sampler import CombinedSampler
 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler
 5 | from .ohem_sampler import OHEMSampler
 6 | from .pseudo_sampler import PseudoSampler
 7 | from .random_sampler import RandomSampler
 8 | from .sampling_result import SamplingResult
 9 | 
10 | __all__ = [
11 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
12 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
13 |     'OHEMSampler', 'SamplingResult'
14 | ]
15 | 


--------------------------------------------------------------------------------
/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${5:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${PY_ARGS:-"--validate"}
14 | 
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --kill-on-bad-exit=1 \
22 |     ${SRUN_ARGS} \
23 |     python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/src/nms_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 5 | 
 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 7 | 
 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
 9 |   CHECK_CUDA(dets);
10 |   if (dets.numel() == 0)
11 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 |   return nms_cuda(dets, threshold);
13 | }
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("nms", &nms, "non-maximum suppression");
17 | }


--------------------------------------------------------------------------------
/mmdet/models/losses/mse_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from ..registry import LOSSES
 5 | from .utils import weighted_loss
 6 | 
 7 | mse_loss = weighted_loss(F.mse_loss)
 8 | 
 9 | 
10 | @LOSSES.register_module
11 | class MSELoss(nn.Module):
12 | 
13 |     def __init__(self, reduction='mean', loss_weight=1.0):
14 |         super().__init__()
15 |         self.reduction = reduction
16 |         self.loss_weight = loss_weight
17 | 
18 |     def forward(self, pred, target, weight=None, avg_factor=None):
19 |         loss = self.loss_weight * mse_loss(
20 |             pred,
21 |             target,
22 |             weight,
23 |             reduction=self.reduction,
24 |             avg_factor=avg_factor)
25 |         return loss
26 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/assign_result.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class AssignResult(object):
 5 | 
 6 |     def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
 7 |         self.num_gts = num_gts
 8 |         self.gt_inds = gt_inds
 9 |         self.max_overlaps = max_overlaps
10 |         self.labels = labels
11 | 
12 |     def add_gt_(self, gt_labels):
13 |         self_inds = torch.arange(
14 |             1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
15 |         self.gt_inds = torch.cat([self_inds, self.gt_inds])
16 |         self.max_overlaps = torch.cat(
17 |             [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps])
18 |         if self.labels is not None:
19 |             self.labels = torch.cat([gt_labels, self.labels])
20 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import abc
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | 
 7 | def cast_tensor_type(inputs, src_type, dst_type):
 8 |     if isinstance(inputs, torch.Tensor):
 9 |         return inputs.to(dst_type)
10 |     elif isinstance(inputs, str):
11 |         return inputs
12 |     elif isinstance(inputs, np.ndarray):
13 |         return inputs
14 |     elif isinstance(inputs, abc.Mapping):
15 |         return type(inputs)({
16 |             k: cast_tensor_type(v, src_type, dst_type)
17 |             for k, v in inputs.items()
18 |         })
19 |     elif isinstance(inputs, abc.Iterable):
20 |         return type(inputs)(
21 |             cast_tensor_type(item, src_type, dst_type) for item in inputs)
22 |     else:
23 |         return inputs
24 | 


--------------------------------------------------------------------------------
/mmdet/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | from .registry import DATASETS
 2 | from .xml_style import XMLDataset
 3 | 
 4 | 
 5 | @DATASETS.register_module
 6 | class VOCDataset(XMLDataset):
 7 | 
 8 |     CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
 9 |                'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
10 |                'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
11 |                'tvmonitor')
12 | 
13 |     def __init__(self, **kwargs):
14 |         super(VOCDataset, self).__init__(**kwargs)
15 |         if 'VOC2007' in self.img_prefix:
16 |             self.year = 2007
17 |         elif 'VOC2012' in self.img_prefix:
18 |             self.year = 2012
19 |         else:
20 |             raise ValueError('Cannot infer dataset year from img_prefix')
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the feature**
11 | 
12 | **Motivation**
13 | A clear and concise description of the motivation of the feature.
14 | Ex1. It is inconvenient when [....].
15 | Ex2. There is a recent paper [....], which is very helpful for [....].
16 | 
17 | **Related resources**
18 | If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
19 | 
20 | **Additional context**
21 | Add any other context or screenshots about the feature request here.
22 | If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.
23 | 


--------------------------------------------------------------------------------
/tools/draw_cls_dist_coco.py:
--------------------------------------------------------------------------------
 1 | from matplotlib import pyplot as plt
 2 | import mmcv
 3 | import numpy as np
 4 | import pickle
 5 | 
 6 | x_name = mmcv.load('./x_name.pkl')
 7 | y = mmcv.load('./y.pkl')
 8 | 
 9 | y_coco_sampled = pickle.load(open('./class_to_imageid_and_inscount_coco_sampled.pt', 'rb'))
10 | y_coco_lt = sorted([y_coco_sampled[i]['isntance_count'] for i in range(1,81)])[::-1]
11 | # plt.figure(figsize=(90, 50))
12 | plt.bar(range(1, 81), y, align='center', alpha=0.5, width=0.8)
13 | plt.grid(color='#95a5a6', linestyle='--', linewidth=2, axis='y', alpha=0.7)
14 | plt.yscale('log')
15 | plt.ylabel('Number of instances')
16 | plt.xlabel('Sorted category index')
17 | # plt.title('')
18 | # plt.xticks(np.arange(len(x_name)), x_name, rotation=45)
19 | plt.savefig('coco_orig_cls_dist.eps', format='eps')
20 | # plt.savefig('coco_sample_cls_dist_1.eps', format='eps')
21 | plt.show()
22 | 


--------------------------------------------------------------------------------
/mmdet/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .custom import CustomDataset
 2 | from .xml_style import XMLDataset
 3 | from .coco import CocoDataset
 4 | from .lvis import LvisDataset
 5 | from .voc import VOCDataset
 6 | from .wider_face import WIDERFaceDataset
 7 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
 8 | from .utils import to_tensor, random_scale, show_ann
 9 | from .dataset_wrappers import ConcatDataset, RepeatDataset
10 | from .extra_aug import ExtraAugmentation
11 | from .registry import DATASETS
12 | from .builder import build_dataset
13 | 
14 | __all__ = [
15 |     'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'GroupSampler',
16 |     'DistributedGroupSampler', 'build_dataloader', 'to_tensor', 'random_scale',
17 |     'show_ann', 'ConcatDataset', 'RepeatDataset', 'ExtraAugmentation',
18 |     'WIDERFaceDataset', 'DATASETS', 'build_dataset'
19 | ]
20 | 


--------------------------------------------------------------------------------
/tools/coco_eval.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from mmdet.core import coco_eval
 4 | 
 5 | 
 6 | def main():
 7 |     parser = ArgumentParser(description='COCO Evaluation')
 8 |     parser.add_argument('result', help='result file path')
 9 |     parser.add_argument('--ann', help='annotation file path')
10 |     parser.add_argument(
11 |         '--types',
12 |         type=str,
13 |         nargs='+',
14 |         choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'],
15 |         default=['bbox'],
16 |         help='result types')
17 |     parser.add_argument(
18 |         '--max-dets',
19 |         type=int,
20 |         nargs='+',
21 |         default=[100, 300, 1000],
22 |         help='proposal numbers, only used for recall evaluation')
23 |     args = parser.parse_args()
24 |     coco_eval(args.result, args.types, args.ann, args.max_dets)
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     main()
29 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/sampling_result.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class SamplingResult(object):
 5 | 
 6 |     def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
 7 |                  gt_flags):
 8 |         self.pos_inds = pos_inds
 9 |         self.neg_inds = neg_inds
10 |         self.pos_bboxes = bboxes[pos_inds]
11 |         self.neg_bboxes = bboxes[neg_inds]
12 |         self.pos_is_gt = gt_flags[pos_inds]
13 | 
14 |         self.num_gts = gt_bboxes.shape[0]
15 |         self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
16 |         self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
17 |         if assign_result.labels is not None:
18 |             self.pos_gt_labels = assign_result.labels[pos_inds]
19 |         else:
20 |             self.pos_gt_labels = None
21 | 
22 |     @property
23 |     def bboxes(self):
24 |         return torch.cat([self.pos_bboxes, self.neg_bboxes])
25 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/faster_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FasterRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  rpn_head,
11 |                  bbox_roi_extractor,
12 |                  bbox_head,
13 |                  train_cfg,
14 |                  test_cfg,
15 |                  neck=None,
16 |                  shared_head=None,
17 |                  pretrained=None):
18 |         super(FasterRCNN, self).__init__(
19 |             backbone=backbone,
20 |             neck=neck,
21 |             shared_head=shared_head,
22 |             rpn_head=rpn_head,
23 |             bbox_roi_extractor=bbox_roi_extractor,
24 |             bbox_head=bbox_head,
25 |             train_cfg=train_cfg,
26 |             test_cfg=test_cfg,
27 |             pretrained=pretrained)
28 | 


--------------------------------------------------------------------------------
/configs/empirical_attention/README.md:
--------------------------------------------------------------------------------
 1 | # An Empirical Study of Spatial Attention Mechanisms in Deep Networks
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @article{zhu2019empirical,
 7 |   title={An Empirical Study of Spatial Attention Mechanisms in Deep Networks},
 8 |   author={Zhu, Xizhou and Cheng, Dazhi and Zhang, Zheng and Lin, Stephen and Dai, Jifeng},
 9 |   journal={arXiv preprint arXiv:1904.05873},
10 |   year={2019}
11 | }
12 | ```
13 | 
14 | 
15 | ## Results and Models
16 | 
17 | | Backbone  | Attention Component | DCN  | Lr schd | box AP | Download |
18 | |:---------:|:-------------------:|:----:|:-------:|:------:|:--------:|
19 | | R-50      | 1111                | N    | 1x      | 38.6   |     -    |
20 | | R-50      | 0010                | N    | 1x      | 38.2   |     -    |
21 | | R-50      | 1111                | Y    | 1x      | 41.0   |     -    |
22 | | R-50      | 0010                | Y    | 1x      | 40.8   |     -    |
23 | 
24 | 


--------------------------------------------------------------------------------
/configs/pascal_voc/README.md:
--------------------------------------------------------------------------------
1 | ### SSD
2 | 
3 | | Backbone | Size  | Style | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP |                                                             Download                                                             |
4 | | :------: | :---: | :---: | :-----: | :------: | :-----------------: | :------------: | :----: | :------------------------------------------------------------------------------------------------------------------------------: |
5 | |  VGG16   |  300  | caffe |  240e   |   2.5    |        0.159        |  35.7 / 53.6   |  77.5  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_voc_vgg16_caffe_240e_20190501-7160d09a.pth) |
6 | |  VGG16   |  512  | caffe |  240e   |   4.3    |        0.214        |  27.5 / 35.9   |  80.0  | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd512_voc_vgg16_caffe_240e_20190501-ff194be1.pth) |


--------------------------------------------------------------------------------
/tools/configs/empirical_attention/README.md:
--------------------------------------------------------------------------------
 1 | # An Empirical Study of Spatial Attention Mechanisms in Deep Networks
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @article{zhu2019empirical,
 7 |   title={An Empirical Study of Spatial Attention Mechanisms in Deep Networks},
 8 |   author={Zhu, Xizhou and Cheng, Dazhi and Zhang, Zheng and Lin, Stephen and Dai, Jifeng},
 9 |   journal={arXiv preprint arXiv:1904.05873},
10 |   year={2019}
11 | }
12 | ```
13 | 
14 | 
15 | ## Results and Models
16 | 
17 | | Backbone  | Attention Component | DCN  | Lr schd | box AP | Download |
18 | |:---------:|:-------------------:|:----:|:-------:|:------:|:--------:|
19 | | R-50      | 1111                | N    | 1x      | 38.6   |     -    |
20 | | R-50      | 0010                | N    | 1x      | 38.2   |     -    |
21 | | R-50      | 1111                | Y    | 1x      | 41.0   |     -    |
22 | | R-50      | 0010                | Y    | 1x      | 40.8   |     -    |
23 | 
24 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/accuracy.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | def accuracy(pred, target, topk=1):
 5 |     assert isinstance(topk, (int, tuple))
 6 |     if isinstance(topk, int):
 7 |         topk = (topk, )
 8 |         return_single = True
 9 |     else:
10 |         return_single = False
11 | 
12 |     maxk = max(topk)
13 |     _, pred_label = pred.topk(maxk, dim=1)
14 |     pred_label = pred_label.t()
15 |     correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
16 | 
17 |     res = []
18 |     for k in topk:
19 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
20 |         res.append(correct_k.mul_(100.0 / pred.size(0)))
21 |     return res[0] if return_single else res
22 | 
23 | 
24 | class Accuracy(nn.Module):
25 | 
26 |     def __init__(self, topk=(1, )):
27 |         super().__init__()
28 |         self.topk = topk
29 | 
30 |     def forward(self, pred, target):
31 |         return accuracy(pred, target, self.topk)
32 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/pseudo_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .base_sampler import BaseSampler
 4 | from .sampling_result import SamplingResult
 5 | 
 6 | 
 7 | class PseudoSampler(BaseSampler):
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         pass
11 | 
12 |     def _sample_pos(self, **kwargs):
13 |         raise NotImplementedError
14 | 
15 |     def _sample_neg(self, **kwargs):
16 |         raise NotImplementedError
17 | 
18 |     def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
19 |         pos_inds = torch.nonzero(
20 |             assign_result.gt_inds > 0).squeeze(-1).unique()
21 |         neg_inds = torch.nonzero(
22 |             assign_result.gt_inds == 0).squeeze(-1).unique()
23 |         gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
24 |         sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
25 |                                          assign_result, gt_flags)
26 |         return sampling_result
27 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseDetector
 2 | from .cascade_rcnn import CascadeRCNN
 3 | from .double_head_rcnn import DoubleHeadRCNN
 4 | from .fast_rcnn import FastRCNN
 5 | from .faster_rcnn import FasterRCNN
 6 | from .fcos import FCOS
 7 | from .grid_rcnn import GridRCNN
 8 | from .htc import HybridTaskCascade
 9 | from .mask_rcnn import MaskRCNN_calibration
10 | from .mask_rcnn import MaskRCNN_normal
11 | from .mask_scoring_rcnn import MaskScoringRCNN
12 | from .retinanet import RetinaNet
13 | from .rpn import RPN
14 | from .single_stage import SingleStageDetector
15 | # from .two_stage import TwoStageDetector
16 | from .two_stage_calibration import TwoStageDetector
17 | 
18 | __all__ = [
19 |     'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN',
20 |     'FastRCNN', 'FasterRCNN', 'MaskRCNN_normal', 'MaskRCNN_calibration', 'CascadeRCNN', 'HybridTaskCascade',
21 |     'DoubleHeadRCNN', 'RetinaNet', 'FCOS', 'GridRCNN', 'MaskScoringRCNN'
22 | ]
23 | 


--------------------------------------------------------------------------------
/mmdet/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_heads import *  # noqa: F401,F403
 2 | from .backbones import *  # noqa: F401,F403
 3 | from .bbox_heads import *  # noqa: F401,F403
 4 | from .builder import (build_backbone, build_detector, build_head, build_loss,
 5 |                       build_neck, build_roi_extractor, build_shared_head)
 6 | from .detectors import *  # noqa: F401,F403
 7 | from .losses import *  # noqa: F401,F403
 8 | from .mask_heads import *  # noqa: F401,F403
 9 | from .necks import *  # noqa: F401,F403
10 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
11 |                        ROI_EXTRACTORS, SHARED_HEADS)
12 | from .roi_extractors import *  # noqa: F401,F403
13 | from .shared_heads import *  # noqa: F401,F403
14 | 
15 | __all__ = [
16 |     'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES',
17 |     'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor',
18 |     'build_shared_head', 'build_head', 'build_loss', 'build_detector'
19 | ]
20 | 


--------------------------------------------------------------------------------
/configs/scratch/README.md:
--------------------------------------------------------------------------------
 1 | # Rethinking ImageNet Pre-training
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @article{he2018rethinking,
 7 |   title={Rethinking imagenet pre-training},
 8 |   author={He, Kaiming and Girshick, Ross and Doll{\'a}r, Piotr},
 9 |   journal={arXiv preprint arXiv:1811.08883},
10 |   year={2018}
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | | Model        | Backbone  | Style   | Lr schd | box AP | mask AP | Download |
17 | |:------------:|:---------:|:-------:|:-------:|:------:|:-------:|:--------:|
18 | | Faster R-CNN | R-50-FPN  | pytorch | 6x      | 40.1   | -       | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/scratch/scratch_faster_rcnn_r50_fpn_gn_6x_20190515-ff554978.pth) |
19 | | Mask R-CNN   | R-50-FPN  | pytorch | 6x      | 41.0   | 37.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_mask_rcnn_r50_fpn_gn_6x_20190515-96743f5e.pth) |
20 | 
21 | Note:
22 | - The above models are trained with 16 GPUs.


--------------------------------------------------------------------------------
/mmdet/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .context_block import ContextBlock
 2 | from .dcn import (DeformConv, DeformConvPack, DeformRoIPooling,
 3 |                   DeformRoIPoolingPack, ModulatedDeformConv,
 4 |                   ModulatedDeformConvPack, ModulatedDeformRoIPoolingPack,
 5 |                   deform_conv, deform_roi_pooling, modulated_deform_conv)
 6 | from .masked_conv import MaskedConv2d
 7 | from .nms import nms, soft_nms
 8 | from .roi_align import RoIAlign, roi_align
 9 | from .roi_pool import RoIPool, roi_pool
10 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
11 | 
12 | __all__ = [
13 |     'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool',
14 |     'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
15 |     'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv',
16 |     'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv',
17 |     'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss',
18 |     'MaskedConv2d', 'ContextBlock'
19 | ]
20 | 


--------------------------------------------------------------------------------
/tools/configs/scratch/README.md:
--------------------------------------------------------------------------------
 1 | # Rethinking ImageNet Pre-training
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @article{he2018rethinking,
 7 |   title={Rethinking imagenet pre-training},
 8 |   author={He, Kaiming and Girshick, Ross and Doll{\'a}r, Piotr},
 9 |   journal={arXiv preprint arXiv:1811.08883},
10 |   year={2018}
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | | Model        | Backbone  | Style   | Lr schd | box AP | mask AP | Download |
17 | |:------------:|:---------:|:-------:|:-------:|:------:|:-------:|:--------:|
18 | | Faster R-CNN | R-50-FPN  | pytorch | 6x      | 40.1   | -       | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_faster_rcnn_r50_fpn_gn_6x-20190515-ff554978.pth) |
19 | | Mask R-CNN   | R-50-FPN  | pytorch | 6x      | 41.0   | 37.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/scratch/scratch_mask_rcnn_r50_fpn_gn_6x_20190515-96743f5e.pth) |
20 | 
21 | Note:
22 | - The above models are trained with 16 GPUs.


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | from torch.autograd import gradcheck
 7 | 
 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 9 | from roi_align import RoIAlign  # noqa: E402, isort:skip
10 | 
11 | feat_size = 15
12 | spatial_scale = 1.0 / 8
13 | img_size = feat_size / spatial_scale
14 | num_imgs = 2
15 | num_rois = 20
16 | 
17 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1))
18 | rois = np.random.rand(num_rois, 4) * img_size * 0.5
19 | rois[:, 2:] += img_size * 0.5
20 | rois = np.hstack((batch_ind, rois))
21 | 
22 | feat = torch.randn(
23 |     num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0')
24 | rois = torch.from_numpy(rois).float().cuda()
25 | inputs = (feat, rois)
26 | print('Gradcheck for roi align...')
27 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3)
28 | print(test)
29 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3)
30 | print(test)
31 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/__init__.py.bk:
--------------------------------------------------------------------------------
 1 | from .class_names import (coco_classes, dataset_aliases, get_classes,
 2 |                           imagenet_det_classes, imagenet_vid_classes,
 3 |                           voc_classes)
 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json
 5 | from .eval_hooks import (CocoDistEvalmAPHook, CocoDistEvalRecallHook,
 6 |                          DistEvalHook, DistEvalmAPHook)
 7 | from .mean_ap import average_precision, eval_map, print_map_summary
 8 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall,
 9 |                      print_recall_summary)
10 | 
11 | __all__ = [
12 |     'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
13 |     'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
14 |     'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook',
15 |     'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
16 |     'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
17 |     'plot_num_recall', 'plot_iou_recall'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | from .accuracy import Accuracy, accuracy
 2 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss
 3 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
 4 |                                  cross_entropy, mask_cross_entropy)
 5 | from .focal_loss import FocalLoss, sigmoid_focal_loss
 6 | from .ghm_loss import GHMC, GHMR
 7 | from .iou_loss import BoundedIoULoss, IoULoss, bounded_iou_loss, iou_loss
 8 | from .mse_loss import MSELoss, mse_loss
 9 | from .smooth_l1_loss import SmoothL1Loss, smooth_l1_loss
10 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss
11 | 
12 | __all__ = [
13 |     'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy',
14 |     'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss',
15 |     'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss',
16 |     'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss',
17 |     'IoULoss', 'BoundedIoULoss', 'GHMC', 'GHMR', 'reduce_loss',
18 |     'weight_reduce_loss', 'weighted_loss'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .class_names import (coco_classes, dataset_aliases, get_classes,
 2 |                           imagenet_det_classes, imagenet_vid_classes,
 3 |                           voc_classes)
 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json
 5 | from .lvis_utils import lvis_eval, lvis_fast_eval_recall
 6 | from .eval_hooks import (CocoDistEvalmAPHook, CocoDistEvalRecallHook,
 7 |                          DistEvalHook, DistEvalmAPHook)
 8 | from .mean_ap import average_precision, eval_map, print_map_summary
 9 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall,
10 |                      print_recall_summary)
11 | 
12 | __all__ = [
13 |     'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
14 |     'coco_classes', 'dataset_aliases', 'get_classes', 'lvis_eval', 'coco_eval',
15 |     'fast_eval_recall','lvis_fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook',
16 |     'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
17 |     'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
18 |     'plot_num_recall', 'plot_iou_recall'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet/models/builder.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from mmdet.utils import build_from_cfg
 4 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
 5 |                        ROI_EXTRACTORS, SHARED_HEADS)
 6 | 
 7 | 
 8 | def build(cfg, registry, default_args=None):
 9 |     if isinstance(cfg, list):
10 |         modules = [
11 |             build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
12 |         ]
13 |         return nn.Sequential(*modules)
14 |     else:
15 |         return build_from_cfg(cfg, registry, default_args)
16 | 
17 | 
18 | def build_backbone(cfg):
19 |     return build(cfg, BACKBONES)
20 | 
21 | 
22 | def build_neck(cfg):
23 |     return build(cfg, NECKS)
24 | 
25 | 
26 | def build_roi_extractor(cfg):
27 |     return build(cfg, ROI_EXTRACTORS)
28 | 
29 | 
30 | def build_shared_head(cfg):
31 |     return build(cfg, SHARED_HEADS)
32 | 
33 | 
34 | def build_head(cfg):
35 |     return build(cfg, HEADS)
36 | 
37 | 
38 | def build_loss(cfg):
39 |     return build(cfg, LOSSES)
40 | 
41 | 
42 | def build_detector(cfg, train_cfg=None, test_cfg=None):
43 |     return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
44 | 


--------------------------------------------------------------------------------
/lvis_api/setup.py:
--------------------------------------------------------------------------------
 1 | """LVIS (pronounced ‘el-vis’): is a new dataset for Large Vocabulary Instance Segmentation.
 2 | We collect over 2 million high-quality instance segmentation masks for over 1200 entry-level object categories in 164k images. LVIS API enables reading and interacting with annotation files,
 3 | visualizing annotations, and evaluating results.
 4 | 
 5 | """
 6 | DOCLINES = (__doc__ or '')
 7 | 
 8 | import os.path
 9 | import sys
10 | import pip
11 | 
12 | import setuptools
13 | 
14 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "lvis"))
15 | 
16 | with open("requirements.txt") as f:
17 |     reqs = f.read()
18 | 
19 | DISTNAME = "lvis"
20 | DESCRIPTION = "Python API for LVIS dataset."
21 | AUTHOR = "Agrim Gupta"
22 | REQUIREMENTS = (reqs.strip().split("\n"),)
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     setuptools.setup(
27 |         name=DISTNAME,
28 |         install_requires=REQUIREMENTS,
29 |         packages=setuptools.find_packages(),
30 |         version="0.5",
31 |         description=DESCRIPTION,
32 |         long_description=DOCLINES,
33 |         long_description_content_type='text/markdown',
34 |         author=AUTHOR
35 |     )
36 | 


--------------------------------------------------------------------------------
/tools/publish_model.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import subprocess
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(
 9 |         description='Process a checkpoint to be published')
10 |     parser.add_argument('in_file', help='input checkpoint filename')
11 |     parser.add_argument('out_file', help='output checkpoint filename')
12 |     args = parser.parse_args()
13 |     return args
14 | 
15 | 
16 | def process_checkpoint(in_file, out_file):
17 |     checkpoint = torch.load(in_file, map_location='cpu')
18 |     # remove optimizer for smaller file size
19 |     if 'optimizer' in checkpoint:
20 |         del checkpoint['optimizer']
21 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
22 |     # add the code here.
23 |     torch.save(checkpoint, out_file)
24 |     sha = subprocess.check_output(['sha256sum', out_file]).decode()
25 |     final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
26 |     subprocess.Popen(['mv', out_file, final_file])
27 | 
28 | 
29 | def main():
30 |     args = parse_args()
31 |     process_checkpoint(args.in_file, args.out_file)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     main()
36 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
 2 | from .bbox_target import bbox_target
 3 | from .geometry import bbox_overlaps
 4 | from .samplers import (BaseSampler, CombinedSampler,
 5 |                        InstanceBalancedPosSampler, IoUBalancedNegSampler,
 6 |                        PseudoSampler, RandomSampler, SamplingResult)
 7 | from .transforms import (bbox2delta, bbox2result, bbox2roi, bbox_flip,
 8 |                          bbox_mapping, bbox_mapping_back, delta2bbox,
 9 |                          distance2bbox, roi2bbox)
10 | 
11 | from .assign_sampling import (  # isort:skip, avoid recursive imports
12 |     assign_and_sample, build_assigner, build_sampler)
13 | 
14 | __all__ = [
15 |     'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
16 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
17 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
18 |     'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',
19 |     'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
20 |     'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
21 |     'distance2bbox', 'bbox_target'
22 | ]
23 | 


--------------------------------------------------------------------------------
/tools/draw_cls_dist_lvis.py:
--------------------------------------------------------------------------------
 1 | from matplotlib import pyplot as plt
 2 | import mmcv
 3 | import numpy as np
 4 | import pickle
 5 | 
 6 | train_info = pickle.load(open('./lvis_train_cats_info.pt', 'rb'))
 7 | 
 8 | y_lvis = pickle.load(open('./class_to_imageid_and_inscount.pt', 'rb'))
 9 | y_lvis = sorted([y_lvis[i]['isntance_count'] for i in range(1,1231)])[::-1]
10 | # plt.figure(figsize=(90, 50))
11 | # plt.bar(range(1, 1231), y_lvis, align='center', alpha=0.5, width=0.8)
12 | plt.plot(range(1, 1231), y_lvis, color='black')
13 | plt.fill_between(range(1, 480), 0, y_lvis[1:480], facecolor='green', interpolate=True)
14 | plt.fill_between(range(481, 1230), 0, y_lvis[481:1230], facecolor='red')
15 | # plt.fill_between(range(401, 500), 0, y_lvis[401:500])
16 | # plt.fill_between(range(501, 1230), 0, y_lvis[501:1230])
17 | plt.grid(color='#95a5a6', linestyle='--', linewidth=2, axis='y', alpha=0.7)
18 | plt.yscale('log')
19 | plt.ylabel('Number of training instances')
20 | plt.xlabel('Sorted category index')
21 | # plt.title('')
22 | # plt.xticks(np.arange(len(x_name)), x_name, rotation=45)
23 | plt.savefig('lvis_cls_dist.eps', format='eps', dpi=1000)
24 | # plt.savefig('coco_sample_cls_dist_1.eps', format='eps')
25 | plt.show()
26 | 


--------------------------------------------------------------------------------
/configs/wider_face/README.md:
--------------------------------------------------------------------------------
 1 | ## WIDER Face Dataset
 2 | 
 3 | To use the WIDER Face dataset you need to download it
 4 | and extract to the `data/WIDERFace` folder. Annotation in the VOC format
 5 | can be found in this [repo](https://github.com/sovrasov/wider-face-pascal-voc-annotations.git).
 6 | You should move the annotation files from `WIDER_train_annotations` and `WIDER_val_annotations` folders
 7 | to the `Annotation` folders inside the corresponding directories `WIDER_train` and `WIDER_val`.
 8 | Also annotation lists `val.txt` and `train.txt` should be copied to `data/WIDERFace` from `WIDER_train_annotations` and `WIDER_val_annotations`.
 9 | The directory should be like this:
10 | 
11 | ```
12 | mmdetection
13 | ├── mmdet
14 | ├── tools
15 | ├── configs
16 | ├── data
17 | │   ├── WIDERFace
18 | │   │   ├── WIDER_train
19 | │   |   │   ├──0--Parade
20 | │   |   │   ├── ...
21 | │   |   │   ├── Annotations
22 | │   │   ├── WIDER_val
23 | │   |   │   ├──0--Parade
24 | │   |   │   ├── ...
25 | │   |   │   ├── Annotations
26 | │   │   ├── val.txt
27 | │   │   ├── train.txt
28 | 
29 | ```
30 | 
31 | After that you can train the SSD300 on WIDER by launching training with the `ssd300_wider_face.py` config or
32 | create your own config based on the presented one.
33 | 


--------------------------------------------------------------------------------
/tools/configs/wider_face/README.md:
--------------------------------------------------------------------------------
 1 | ## WIDER Face Dataset
 2 | 
 3 | To use the WIDER Face dataset you need to download it
 4 | and extract to the `data/WIDERFace` folder. Annotation in the VOC format
 5 | can be found in this [repo](https://github.com/sovrasov/wider-face-pascal-voc-annotations.git).
 6 | You should move the annotation files from `WIDER_train_annotations` and `WIDER_val_annotations` folders
 7 | to the `Annotation` folders inside the corresponding directories `WIDER_train` and `WIDER_val`.
 8 | Also annotation lists `val.txt` and `train.txt` should be copied to `data/WIDERFace` from `WIDER_train_annotations` and `WIDER_val_annotations`.
 9 | The directory should be like this:
10 | 
11 | ```
12 | mmdetection
13 | ├── mmdet
14 | ├── tools
15 | ├── configs
16 | ├── data
17 | │   ├── WIDERFace
18 | │   │   ├── WIDER_train
19 | │   |   │   ├──0--Parade
20 | │   |   │   ├── ...
21 | │   |   │   ├── Annotations
22 | │   │   ├── WIDER_val
23 | │   |   │   ├──0--Parade
24 | │   |   │   ├── ...
25 | │   |   │   ├── Annotations
26 | │   │   ├── val.txt
27 | │   │   ├── train.txt
28 | 
29 | ```
30 | 
31 | After that you can train the SSD300 on WIDER by launching training with the `ssd300_wider_face.py` config or
32 | create your own config based on the presented one.
33 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import mmcv
 4 | import numpy as np
 5 | from six.moves import map, zip
 6 | 
 7 | 
 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
 9 |     num_imgs = tensor.size(0)
10 |     mean = np.array(mean, dtype=np.float32)
11 |     std = np.array(std, dtype=np.float32)
12 |     imgs = []
13 |     for img_id in range(num_imgs):
14 |         img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
15 |         img = mmcv.imdenormalize(
16 |             img, mean, std, to_bgr=to_rgb).astype(np.uint8)
17 |         imgs.append(np.ascontiguousarray(img))
18 |     return imgs
19 | 
20 | 
21 | def multi_apply(func, *args, **kwargs):
22 |     pfunc = partial(func, **kwargs) if kwargs else func
23 |     map_results = map(pfunc, *args)
24 |     return tuple(map(list, zip(*map_results)))
25 | 
26 | 
27 | def unmap(data, count, inds, fill=0):
28 |     """ Unmap a subset of item (data) back to the original set of items (of
29 |     size count) """
30 |     if data.dim() == 1:
31 |         ret = data.new_full((count, ), fill)
32 |         ret[inds] = data
33 |     else:
34 |         new_size = (count, ) + data.size()[1:]
35 |         ret = data.new_full(new_size, fill)
36 |         ret[inds, :] = data
37 |     return ret
38 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/utils.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | 
 4 | def split_combined_polys(polys, poly_lens, polys_per_mask):
 5 |     """Split the combined 1-D polys into masks.
 6 | 
 7 |     A mask is represented as a list of polys, and a poly is represented as
 8 |     a 1-D array. In dataset, all masks are concatenated into a single 1-D
 9 |     tensor. Here we need to split the tensor into original representations.
10 | 
11 |     Args:
12 |         polys (list): a list (length = image num) of 1-D tensors
13 |         poly_lens (list): a list (length = image num) of poly length
14 |         polys_per_mask (list): a list (length = image num) of poly number
15 |             of each mask
16 | 
17 |     Returns:
18 |         list: a list (length = image num) of list (length = mask num) of
19 |             list (length = poly num) of numpy array
20 |     """
21 |     mask_polys_list = []
22 |     for img_id in range(len(polys)):
23 |         polys_single = polys[img_id]
24 |         polys_lens_single = poly_lens[img_id].tolist()
25 |         polys_per_mask_single = polys_per_mask[img_id].tolist()
26 | 
27 |         split_polys = mmcv.slice_list(polys_single, polys_lens_single)
28 |         mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
29 |         mask_polys_list.append(mask_polys)
30 |     return mask_polys_list
31 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assign_sampling.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from . import assigners, samplers
 4 | 
 5 | 
 6 | def build_assigner(cfg, **kwargs):
 7 |     if isinstance(cfg, assigners.BaseAssigner):
 8 |         return cfg
 9 |     elif isinstance(cfg, dict):
10 |         return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs)
11 |     else:
12 |         raise TypeError('Invalid type {} for building a sampler'.format(
13 |             type(cfg)))
14 | 
15 | 
16 | def build_sampler(cfg, **kwargs):
17 |     if isinstance(cfg, samplers.BaseSampler):
18 |         return cfg
19 |     elif isinstance(cfg, dict):
20 |         return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs)
21 |     else:
22 |         raise TypeError('Invalid type {} for building a sampler'.format(
23 |             type(cfg)))
24 | 
25 | 
26 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
27 |     bbox_assigner = build_assigner(cfg.assigner)
28 |     bbox_sampler = build_sampler(cfg.sampler)
29 |     assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,
30 |                                          gt_labels)
31 |     sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,
32 |                                           gt_labels)
33 |     return assign_result, sampling_result
34 | 


--------------------------------------------------------------------------------
/demo/demo.py:
--------------------------------------------------------------------------------
 1 | from mmdet.apis import init_detector, inference_detector, show_result
 2 | import mmcv
 3 | import os
 4 | 
 5 | demopath = os.path.dirname(os.path.realpath(__file__))
 6 | # config_file = 'configs/faster_rcnn_r50_fpn_1x.py'
 7 | # checkpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_20181010-3d1b3351.pth'
 8 | config_file = 'configs/mask_rcnn_r50_fpn_1x.py'
 9 | checkpoint_file = 'checkpoints/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth'
10 | 
11 | # build the model from a config file and a checkpoint file
12 | model = init_detector(config_file, checkpoint_file, device='cuda:0')
13 | 
14 | # test a single image and show the results
15 | img = os.path.join(demopath, '000000125100.jpg')  # or img = mmcv.imread(img), which will only load it once
16 | result = inference_detector(model, img)
17 | show_result(img, result, model.CLASSES)
18 | 
19 | # test a list of images and write the results to image files
20 | # imgs = ['test1.jpg', 'test2.jpg']
21 | # for i, result in enumerate(inference_detector(model, imgs)):
22 | #     show_result(imgs[i], result, model.CLASSES, out_file='result_{}.jpg'.format(i))
23 | 
24 | # test a video and show the results
25 | # video = mmcv.VideoReader('video.mp4')
26 | # for frame in video:
27 | #     result = inference_detector(model, frame)
28 | #     show_result(frame, result, model.CLASSES, wait_time=1)
29 | 
30 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/htc_mask_head.py:
--------------------------------------------------------------------------------
 1 | from ..registry import HEADS
 2 | from ..utils import ConvModule
 3 | from .fcn_mask_head import FCNMaskHead
 4 | 
 5 | 
 6 | @HEADS.register_module
 7 | class HTCMaskHead(FCNMaskHead):
 8 | 
 9 |     def __init__(self, *args, **kwargs):
10 |         super(HTCMaskHead, self).__init__(*args, **kwargs)
11 |         self.conv_res = ConvModule(
12 |             self.conv_out_channels,
13 |             self.conv_out_channels,
14 |             1,
15 |             conv_cfg=self.conv_cfg,
16 |             norm_cfg=self.norm_cfg)
17 | 
18 |     def init_weights(self):
19 |         super(HTCMaskHead, self).init_weights()
20 |         self.conv_res.init_weights()
21 | 
22 |     def forward(self, x, res_feat=None, return_logits=True, return_feat=True):
23 |         if res_feat is not None:
24 |             res_feat = self.conv_res(res_feat)
25 |             x = x + res_feat
26 |         for conv in self.convs:
27 |             x = conv(x)
28 |         res_feat = x
29 |         outs = []
30 |         if return_logits:
31 |             x = self.upsample(x)
32 |             if self.upsample_method == 'deconv':
33 |                 x = self.relu(x)
34 |             mask_pred = self.conv_logits(x)
35 |             outs.append(mask_pred)
36 |         if return_feat:
37 |             outs.append(res_feat)
38 |         return outs if len(outs) > 1 else outs[0]
39 | 


--------------------------------------------------------------------------------
/mmdet/datasets/builder.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | from mmdet.utils import build_from_cfg
 4 | from .dataset_wrappers import ConcatDataset, RepeatDataset
 5 | from .registry import DATASETS
 6 | 
 7 | 
 8 | def _concat_dataset(cfg):
 9 |     ann_files = cfg['ann_file']
10 |     img_prefixes = cfg.get('img_prefix', None)
11 |     seg_prefixes = cfg.get('seg_prefixes', None)
12 |     proposal_files = cfg.get('proposal_file', None)
13 | 
14 |     datasets = []
15 |     num_dset = len(ann_files)
16 |     for i in range(num_dset):
17 |         data_cfg = copy.deepcopy(cfg)
18 |         data_cfg['ann_file'] = ann_files[i]
19 |         if isinstance(img_prefixes, (list, tuple)):
20 |             data_cfg['img_prefix'] = img_prefixes[i]
21 |         if isinstance(seg_prefixes, (list, tuple)):
22 |             data_cfg['seg_prefix'] = seg_prefixes[i]
23 |         if isinstance(proposal_files, (list, tuple)):
24 |             data_cfg['proposal_file'] = proposal_files[i]
25 |         datasets.append(build_dataset(data_cfg))
26 | 
27 |     return ConcatDataset(datasets)
28 | 
29 | 
30 | def build_dataset(cfg):
31 |     if cfg['type'] == 'RepeatDataset':
32 |         dataset = RepeatDataset(build_dataset(cfg['dataset']), cfg['times'])
33 |     elif isinstance(cfg['ann_file'], (list, tuple)):
34 |         dataset = _concat_dataset(cfg)
35 |     else:
36 |         dataset = build_from_cfg(cfg, DATASETS)
37 | 
38 |     return dataset
39 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to mmdetection
 2 | 
 3 | All kinds of contributions are welcome, including but not limited to the following.
 4 | 
 5 | - Fixes (typo, bugs)
 6 | - New features and components
 7 | 
 8 | ## Workflow
 9 | 
10 | 1. fork and pull the latest mmdetection
11 | 2. checkout a new branch (do not use master branch for PRs)
12 | 3. commit your changes
13 | 4. create a PR
14 | 
15 | Note
16 | - If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first.
17 | - If you are the author of some papers and would like to include your method to mmdetection,
18 | please contact Kai Chen (chenkaidev[at]gmail[dot]com). We will much appreciate your contribution.
19 | 
20 | ## Code style
21 | 
22 | ### Python
23 | We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
24 | 
25 | We use the following tools for linting and formatting:
26 | - [flake8](http://flake8.pycqa.org/en/latest/): linter
27 | - [yapf](https://github.com/google/yapf): formatter
28 | - [isort](https://github.com/timothycrosley/isort): sort imports
29 | 
30 | Style configurations of yapf and isort can be found in [.style.yapf](.style.yapf) and [.isort.cfg](.isort.cfg).
31 | 
32 | >Before you create a PR, make sure that your code lints and is formatted by yapf.
33 | 
34 | ### C++ and CUDA
35 | We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).


--------------------------------------------------------------------------------
/demo/webcam_demo.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import cv2
 4 | import torch
 5 | 
 6 | from mmdet.apis import inference_detector, init_detector, show_result
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='MMDetection webcam demo')
11 |     parser.add_argument('config', help='test config file path')
12 |     parser.add_argument('checkpoint', help='checkpoint file')
13 |     parser.add_argument('--device', type=int, default=0, help='CUDA device id')
14 |     parser.add_argument(
15 |         '--camera-id', type=int, default=0, help='camera device id')
16 |     parser.add_argument(
17 |         '--score-thr', type=float, default=0.5, help='bbox score threshold')
18 |     args = parser.parse_args()
19 |     return args
20 | 
21 | 
22 | def main():
23 |     args = parse_args()
24 | 
25 |     model = init_detector(
26 |         args.config, args.checkpoint, device=torch.device('cuda', args.device))
27 | 
28 |     camera = cv2.VideoCapture(args.camera_id)
29 | 
30 |     print('Press "Esc", "q" or "Q" to exit.')
31 |     while True:
32 |         ret_val, img = camera.read()
33 |         result = inference_detector(model, img)
34 | 
35 |         ch = cv2.waitKey(1)
36 |         if ch == 27 or ch == ord('q') or ch == ord('Q'):
37 |             break
38 | 
39 |         show_result(
40 |             img, result, model.CLASSES, score_thr=args.score_thr, wait_time=1)
41 | 
42 | 
43 | if __name__ == '__main__':
44 |     main()
45 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/error-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Error report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | Thanks for your error report and we appreciate it a lot.
11 | 
12 | **Checklist**
13 | 1. I have searched related issues but cannot get the expected help.
14 | 2. The bug has not been fixed in the latest version.
15 | 
16 | **Describe the bug**
17 | A clear and concise description of what the bug is.
18 | 
19 | **Reproduction**
20 | 1. What command or script did you run?
21 | ```
22 | A placeholder for the command.
23 | ```
24 | 2. Did you make any modifications on the code or config? Did you understand what you have modified?
25 | 3. What dataset did you use?
26 | 
27 | **Environment**
28 |  - OS: [e.g., Ubuntu 16.04.6]
29 |  - GCC [e.g., 5.4.0]
30 |  - PyTorch version [e.g., 1.1.0]
31 | - How you installed PyTorch [e.g., pip, conda, source]
32 | - GPU model [e.g., 1080Ti, V100]
33 | - CUDA and CUDNN version
34 | - [optional] Other information that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
35 | 
36 | **Error traceback**
37 | If applicable, paste the error trackback here.
38 | ```
39 | A placeholder for trackback.
40 | ```
41 | 
42 | **Bug fix**
43 | If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
44 | 


--------------------------------------------------------------------------------
/ft_cal_epoch_ablation_for_drawing_compose.txt:
--------------------------------------------------------------------------------
 1 | 0.0 0.133 0.214 0.27
 2 | 0.04280418980064317 0.20160173382295815 0.1988776074768606 0.22835550743102145
 3 | 0.09616575450188941 0.1954467399271925 0.20311018258723598 0.23287101901797713
 4 | 0.0979132273568508 0.21812343785761834 0.19653808201066472 0.2312084008481571
 5 | 0.11101177281907294 0.2227236030473715 0.1906213960656182 0.22132534522222255
 6 | 0.12878948298692902 0.21914006163797856 0.19461092345089875 0.22616635362890605
 7 | 0.11844617291278857 0.21264685912320885 0.19239315593933407 0.22711695907796758
 8 | 0.12709551283158493 0.21645399522004918 0.19448744166679882 0.21668781745355545
 9 | 0.12406608517994656 0.22257170647785493 0.19644651999355758 0.2241675539773186
10 | 0.13204309939334583 0.22897236956240652 0.19264401188011646 0.22119623077589495
11 | 0.11417922262250052 0.22094165409452865 0.19157754172376854 0.2194968263538914
12 | 0.12809983556991092 0.23119536979857622 0.19368227690676004 0.22163935901414147
13 | 0.12632375300126727 0.22515345220504207 0.19510491178599484 0.22847079536768966
14 | 0.12250187918578637 0.23654185924390544 0.19835340748216393 0.22237704021269583
15 | 0.10328880702569189 0.23741552100789903 0.19897538729486794 0.22950141235625374
16 | 0.11183935930906523 0.23645881293063528 0.2007864199513981 0.22948890723346893
17 | 0.11545512563393455 0.23961815994386487 0.20013024142538005 0.22691280524591614
18 | 0.11456224300468426 0.23811144384852884 0.20343264275148323 0.22817559018420883


--------------------------------------------------------------------------------
/tools/draw_pr_recall_bar_lviscoco.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | 
 6 | labels = ['Proposal Recall@1k', 'AP']
 7 | men_means = [55.9, 32.8]
 8 | women_means = [51.0, 18.0]
 9 | 
10 | x = np.arange(len(labels))  # the label locations
11 | width = 0.35  # the width of the bars
12 | 
13 | matplotlib.rcParams.update({'font.size': 18})
14 | plt.rc('ytick', labelsize=10)
15 | 
16 | fig, ax = plt.subplots()
17 | rects1 = ax.bar(x - width/2, men_means, width, label='COCO')
18 | rects2 = ax.bar(x + width/2, women_means, width, label='LVIS')
19 | 
20 | # Add some text for labels, title and custom x-axis tick labels, etc.
21 | # ax.set_ylabel('Scores')
22 | # ax.set_title('Scores by group and gender')
23 | ax.set_ylim([0,65])
24 | ax.set_xticks(x)
25 | ax.set_xticklabels(labels)
26 | ax.legend()
27 | 
28 | 
29 | def autolabel(rects):
30 |     """Attach a text label above each bar in *rects*, displaying its height."""
31 |     for rect in rects:
32 |         height = rect.get_height()
33 |         ax.annotate('{}'.format(height),
34 |                     xy=(rect.get_x() + rect.get_width() / 2, height),
35 |                     xytext=(0, 3),  # 3 points vertical offset
36 |                     textcoords="offset points",
37 |                     ha='center', va='bottom')
38 | 
39 | 
40 | autolabel(rects1)
41 | autolabel(rects2)
42 | 
43 | fig.tight_layout()
44 | plt.savefig('coco_lvis_pr_recall_bar.eps', format='eps', dpi=1000)
45 | plt.show()
46 | 


--------------------------------------------------------------------------------
/mmdet/datasets/wider_face.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | import mmcv
 5 | 
 6 | from .registry import DATASETS
 7 | from .xml_style import XMLDataset
 8 | 
 9 | 
10 | @DATASETS.register_module
11 | class WIDERFaceDataset(XMLDataset):
12 |     """
13 |     Reader for the WIDER Face dataset in PASCAL VOC format.
14 |     Conversion scripts can be found in
15 |     https://github.com/sovrasov/wider-face-pascal-voc-annotations
16 |     """
17 |     CLASSES = ('face', )
18 | 
19 |     def __init__(self, **kwargs):
20 |         super(WIDERFaceDataset, self).__init__(**kwargs)
21 | 
22 |     def load_annotations(self, ann_file):
23 |         img_infos = []
24 |         img_ids = mmcv.list_from_file(ann_file)
25 |         for img_id in img_ids:
26 |             filename = '{}.jpg'.format(img_id)
27 |             xml_path = osp.join(self.img_prefix, 'Annotations',
28 |                                 '{}.xml'.format(img_id))
29 |             tree = ET.parse(xml_path)
30 |             root = tree.getroot()
31 |             size = root.find('size')
32 |             width = int(size.find('width').text)
33 |             height = int(size.find('height').text)
34 |             folder = root.find('folder').text
35 |             img_infos.append(
36 |                 dict(
37 |                     id=img_id,
38 |                     filename=osp.join(folder, filename),
39 |                     width=width,
40 |                     height=height))
41 | 
42 |         return img_infos
43 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from ..registry import LOSSES
 5 | from .utils import weighted_loss
 6 | 
 7 | 
 8 | @weighted_loss
 9 | def smooth_l1_loss(pred, target, beta=1.0):
10 |     assert beta > 0
11 |     assert pred.size() == target.size() and target.numel() > 0
12 |     diff = torch.abs(pred - target)
13 |     loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
14 |                        diff - 0.5 * beta)
15 |     return loss
16 | 
17 | 
18 | @LOSSES.register_module
19 | class SmoothL1Loss(nn.Module):
20 | 
21 |     def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0):
22 |         super(SmoothL1Loss, self).__init__()
23 |         self.beta = beta
24 |         self.reduction = reduction
25 |         self.loss_weight = loss_weight
26 | 
27 |     def forward(self,
28 |                 pred,
29 |                 target,
30 |                 weight=None,
31 |                 avg_factor=None,
32 |                 reduction_override=None,
33 |                 **kwargs):
34 |         assert reduction_override in (None, 'none', 'mean', 'sum')
35 |         reduction = (
36 |             reduction_override if reduction_override else self.reduction)
37 |         loss_bbox = self.loss_weight * smooth_l1_loss(
38 |             pred,
39 |             target,
40 |             weight,
41 |             beta=self.beta,
42 |             reduction=reduction,
43 |             avg_factor=avg_factor,
44 |             **kwargs)
45 |         return loss_bbox
46 | 


--------------------------------------------------------------------------------
/tools/upgrade_model_version.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import re
 3 | from collections import OrderedDict
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | def convert(in_file, out_file):
 9 |     """Convert keys in checkpoints.
10 | 
11 |     There can be some breaking changes during the development of mmdetection,
12 |     and this tool is used for upgrading checkpoints trained with old versions
13 |     to the latest one.
14 |     """
15 |     checkpoint = torch.load(in_file)
16 |     in_state_dict = checkpoint.pop('state_dict')
17 |     out_state_dict = OrderedDict()
18 |     for key, val in in_state_dict.items():
19 |         # Use ConvModule instead of nn.Conv2d in RetinaNet
20 |         # cls_convs.0.weight -> cls_convs.0.conv.weight
21 |         m = re.search(r'(cls_convs|reg_convs).\d.(weight|bias)', key)
22 |         if m is not None:
23 |             param = m.groups()[1]
24 |             new_key = key.replace(param, 'conv.{}'.format(param))
25 |             out_state_dict[new_key] = val
26 |             continue
27 | 
28 |         out_state_dict[key] = val
29 |     checkpoint['state_dict'] = out_state_dict
30 |     torch.save(checkpoint, out_file)
31 | 
32 | 
33 | def main():
34 |     parser = argparse.ArgumentParser(description='Upgrade model version')
35 |     parser.add_argument('in_file', help='input checkpoint file')
36 |     parser.add_argument('out_file', help='output checkpoint file')
37 |     args = parser.parse_args()
38 |     convert(args.in_file, args.out_file)
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     main()
43 | 


--------------------------------------------------------------------------------
/configs/ghm/README.md:
--------------------------------------------------------------------------------
 1 | # Gradient Harmonized Single-stage Detector
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{li2019gradient,
 7 |   title={Gradient Harmonized Single-stage Detector},
 8 |   author={Li, Buyu and Liu, Yu and Wang, Xiaogang},
 9 |   booktitle={AAAI Conference on Artificial Intelligence},
10 |   year={2019}
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | |    Backbone     |  Style  | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
17 | | :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :------: |
18 | |    R-50-FPN     | pytorch |   1x    |   3.9    | 0.500               | 9.4            |  36.9  | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r50_fpn_1x_20190608-b9aa5862.pth) |
19 | |    R-101-FPN    | pytorch |   1x    |   5.8    | 0.625               | 8.5            |  39.0  | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r101_fpn_1x_20190608-b885b74a.pth) |
20 | | X-101-32x4d-FPN | pytorch |   1x    |   7.0    | 0.818               | 7.6            |  40.5  | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_32x4d_fpn_1x_20190608-ed295d22.pth) |
21 | | X-101-64x4d-FPN | pytorch |   1x    |   9.9    | 1.191               | 6.1            |  41.6  | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_64x4d_fpn_1x_20190608-7f2037ce.pth) |


--------------------------------------------------------------------------------
/ft_cal_epoch_ablation_for_drawing.txt:
--------------------------------------------------------------------------------
 1 | 0.0 0.133 0.214 0.27 0.18
 2 | 0.030845771144278604 0.1902208185426685 0.1988776074768606 0.22835550743102145 0.203
 3 | 0.10822619575390374 0.2060170475260656 0.20311018258723598 0.23287101901797713 0.202
 4 | 0.12556453939637033 0.22611426962951972 0.19653808201066472 0.2312084008481571 0.211
 5 | 0.12397418846362247 0.20782131406074886 0.1906213960656182 0.22132534522222255 0.214
 6 | 0.09298596704020082 0.19833452971738785 0.19461092345089875 0.22616635362890605 0.214
 7 | 0.11705334712575734 0.22270016140556695 0.19239315593933407 0.22711695907796758 0.209
 8 | 0.10725190918690512 0.2091746843401387 0.19448744166679882 0.21668781745355545 0.210
 9 | 0.11928969016304615 0.2177085097241877 0.19644651999355758 0.2241675539773186 0.214
10 | 0.12043408881211713 0.21771775526842294 0.19264401188011646 0.22119623077589495 0.216
11 | 0.10610914582929507 0.21603875815042728 0.19157754172376854 0.2194968263538914 0.212
12 | 0.11457422927793845 0.2221024827401525 0.19368227690676004 0.22163935901414147 0.216
13 | 0.12398472309917556 0.22479705199942607 0.19510491178599484 0.22847079536768966 0.215
14 | 0.11924409072037266 0.22496923696141857 0.19835340748216393 0.22237704021269583 0.218
15 | 0.10200808193825778 0.22646670710206385 0.19897538729486794 0.22950141235625374 0.218
16 | 0.110377567607507 0.23374143119513724 0.2007864199513981 0.22948890723346893 0.219
17 | 0.11416522995583138 0.23100215512937564 0.20013024142538005 0.22691280524591614 0.220
18 | 0.09514752967834095 0.2308715098124401 0.20343264275148323 0.22817559018420883


--------------------------------------------------------------------------------
/ft_cat_epoch_ablation_for_drawing.txt:
--------------------------------------------------------------------------------
 1 | 0.0 0.133 0.214 0.27 0.18
 2 | 0.04280418980064317 0.20160173382295815 0.200568307268474 0.2671032643413395 0.203
 3 | 0.09616575450188941 0.1954467399271925 0.1943992314872916 0.2653585561754797 0.202
 4 | 0.0979132273568508 0.21812343785761834 0.19578938849837843 0.26581880358174925 0.211
 5 | 0.11101177281907294 0.2227236030473715 0.19580531795819484 0.2652209941015451 0.214
 6 | 0.12878948298692902 0.21914006163797856 0.19480494761560097 0.2652357706376281 0.214
 7 | 0.11844617291278857 0.21264685912320885 0.19098514971537514 0.2634918945035475 0.209
 8 | 0.12709551283158493 0.21645399522004918 0.19015205931533305 0.26332312509347844 0.210
 9 | 0.12406608517994656 0.22257170647785493 0.19399217829271048 0.26501951230658677 0.214
10 | 0.13204309939334583 0.22897236956240652 0.1913758157437348 0.2639267886854602 0.216
11 | 0.11417922262250052 0.22094165409452865 0.1916112849432229 0.2636358997450702 0.212
12 | 0.12809983556991092 0.23119536979857622 0.1915855842657778 0.2640212324643147 0.216
13 | 0.12632375300126727 0.22515345220504207 0.1943042860285571 0.26524881489714663 0.215
14 | 0.12250187918578637 0.23654185924390544 0.19151705754236964 0.26410391351880164 0.218
15 | 0.10328880702569189 0.23741552100789903 0.19546495169655584 0.2653966386186115 0.218
16 | 0.11183935930906523 0.23645881293063528 0.1952282774372787 0.26547618736841405 0.219
17 | 0.11545512563393455 0.23961815994386487 0.19514162398787996 0.26499435229124235 0.220
18 | 0.11456224300468426 0.23811144384852884 0.19497220837245285 0.26527226129414966 0.219


--------------------------------------------------------------------------------
/tools/configs/ghm/README.md:
--------------------------------------------------------------------------------
 1 | # Gradient Harmonized Single-stage Detector
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{li2019gradient,
 7 |   title={Gradient Harmonized Single-stage Detector},
 8 |   author={Li, Buyu and Liu, Yu and Wang, Xiaogang},
 9 |   booktitle={AAAI Conference on Artificial Intelligence},
10 |   year={2019}
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | |    Backbone     |  Style  | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
17 | | :-------------: | :-----: | :-----: | :------: | :-----------------: | :------------: | :----: | :------: |
18 | |    R-50-FPN     | pytorch |   1x    |   3.9    | 0.500               | 9.4            |  36.9  | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r50_fpn_1x_20190608-b9aa5862.pth) |
19 | |    R-101-FPN    | pytorch |   1x    |   5.8    | 0.625               | 8.5            |  39.0  | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_r101_fpn_1x_20190608-b885b74a.pth) |
20 | | X-101-32x4d-FPN | pytorch |   1x    |   7.0    | 0.818               | 7.6            |  40.5  | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_32x4d_fpn_1x_20190608-ed295d22.pth) |
21 | | X-101-64x4d-FPN | pytorch |   1x    |   9.9    | 1.191               | 6.1            |  41.6  | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ghm/retinanet_ghm_x101_64x4d_fpn_1x_20190608-7f2037ce.pth) |


--------------------------------------------------------------------------------
/mmdet/models/utils/conv_ws.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def conv_ws_2d(input,
 6 |                weight,
 7 |                bias=None,
 8 |                stride=1,
 9 |                padding=0,
10 |                dilation=1,
11 |                groups=1,
12 |                eps=1e-5):
13 |     c_in = weight.size(0)
14 |     weight_flat = weight.view(c_in, -1)
15 |     mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)
16 |     std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)
17 |     weight = (weight - mean) / (std + eps)
18 |     return F.conv2d(input, weight, bias, stride, padding, dilation, groups)
19 | 
20 | 
21 | class ConvWS2d(nn.Conv2d):
22 | 
23 |     def __init__(self,
24 |                  in_channels,
25 |                  out_channels,
26 |                  kernel_size,
27 |                  stride=1,
28 |                  padding=0,
29 |                  dilation=1,
30 |                  groups=1,
31 |                  bias=True,
32 |                  eps=1e-5):
33 |         super(ConvWS2d, self).__init__(
34 |             in_channels,
35 |             out_channels,
36 |             kernel_size,
37 |             stride=stride,
38 |             padding=padding,
39 |             dilation=dilation,
40 |             groups=groups,
41 |             bias=bias)
42 |         self.eps = eps
43 | 
44 |     def forward(self, x):
45 |         return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,
46 |                           self.dilation, self.groups, self.eps)
47 | 


--------------------------------------------------------------------------------
/cls_head_models/2fc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | import torch.nn.functional as F
 5 | ##############################################################
 6 | ### Code to compute batch counts and means
 7 | ##############################################################
 8 | 
 9 | class feat_extractor(torch.nn.Module):
10 |     def __init__(self, input_shape = [256, 7, 7], hidden_dim=512):
11 |         super(feat_extractor, self).__init__()
12 | 
13 |         self.cls_last_dim = input_shape[0]*input_shape[1]*input_shape[2]
14 | 
15 |         self.fc1 = nn.Linear(self.cls_last_dim, hidden_dim)
16 |         self.fc2 = nn.Linear(hidden_dim, hidden_dim)
17 | 
18 |     def forward(self, x):
19 |         x = x.view(x.size(0), -1)
20 |         x = self.fc1(x)
21 |         x = F.relu(x)
22 |         x = self.fc2(x)
23 | 
24 |         return x
25 | 
26 | class simple2fc(torch.nn.Module):
27 | 
28 |     def __init__(self, num_classes=1231):
29 |         super(simple2fc, self).__init__()
30 | 
31 |         self.feat_extractor = feat_extractor(hidden_dim=num_classes).cuda()
32 | 
33 |     def forward(self, input):
34 |         logits = self.feat_extractor(input)
35 |         return logits
36 | 
37 | 
38 | 
39 | # def ncm_sq_dist_bt_norm(a,b):
40 | #     anorm = tf.reshape(tf.reduce_sum(tf.square(a), 1),[-1, 1])
41 | #     bnorm = tf.reshape(tf.reduce_sum(tf.square(b), 0),[1, -1])
42 | #     d     = -2*tf.matmul(a,b,transpose_b=False)+anorm + bnorm
43 | #     return d, anorm
44 | #
45 | # def ncm_sq_dist_bt(a,b):
46 | #     d, bnorm = ncm_sq_dist_bt_norm(a,b)
47 | #     return d
48 | 
49 | 


--------------------------------------------------------------------------------
/lvis_api/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019, Agrim Gupta and Ross Girshick
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this
 8 |    list of conditions and the following disclaimer.
 9 | 2. Redistributions in binary form must reproduce the above copyright notice,
10 |    this list of conditions and the following disclaimer in the documentation
11 |    and/or other materials provided with the distribution.
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
14 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
17 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
20 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 | 
24 | The views and conclusions contained in the software and documentation are those
25 | of the authors and should not be interpreted as representing official policies,
26 | either expressed or implied, of the FreeBSD Project.
27 | 


--------------------------------------------------------------------------------
/cls_head_models/simple2fc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | import torch.nn.functional as F
 5 | ##############################################################
 6 | ### Code to compute batch counts and means
 7 | ##############################################################
 8 | 
 9 | class feat_extractor(torch.nn.Module):
10 |     def __init__(self, input_shape = [256, 7, 7], hidden_dim=512, num_classes=1231):
11 |         super(feat_extractor, self).__init__()
12 | 
13 |         self.cls_last_dim = input_shape[0]*input_shape[1]*input_shape[2]
14 | 
15 |         self.fc1 = nn.Linear(self.cls_last_dim, hidden_dim)
16 |         self.fc2 = nn.Linear(hidden_dim, num_classes)
17 | 
18 |     def forward(self, x):
19 |         x = x.view(x.size(0), -1)
20 |         x = self.fc1(x)
21 |         x = F.relu(x)
22 |         x = self.fc2(x)
23 | 
24 |         return x
25 | 
26 | class simple2fc(torch.nn.Module):
27 | 
28 |     def __init__(self, num_classes=1231):
29 |         super(simple2fc, self).__init__()
30 | 
31 |         self.feat_extractor = feat_extractor(hidden_dim=1024, num_classes=num_classes).cuda()
32 | 
33 |     def forward(self, input):
34 |         logits = self.feat_extractor(input)
35 |         return logits
36 | 
37 | 
38 | 
39 | # def ncm_sq_dist_bt_norm(a,b):
40 | #     anorm = tf.reshape(tf.reduce_sum(tf.square(a), 1),[-1, 1])
41 | #     bnorm = tf.reshape(tf.reduce_sum(tf.square(b), 0),[1, -1])
42 | #     d     = -2*tf.matmul(a,b,transpose_b=False)+anorm + bnorm
43 | #     return d, anorm
44 | #
45 | # def ncm_sq_dist_bt(a,b):
46 | #     d, bnorm = ncm_sq_dist_bt_norm(a,b)
47 | #     return d
48 | 
49 | 


--------------------------------------------------------------------------------
/tools/draw_eAP_sensitivity_binnum.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import matplotlib
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import math
 6 | from matplotlib.ticker import FormatStrFormatter
 7 | from matplotlib import scale as mscale
 8 | from matplotlib import transforms as mtransforms
 9 | 
10 | 
11 | epoch_results = [[51, 250, 276, 253], [62, 278, 276, 214], [67, 298, 284, 181], [71, 325, 280, 154], [77, 348, 282, 123], [81, 362, 289, 98], [85, 377, 297, 71], [90, 386, 301, 53]]
12 | 
13 | epoch_results_array = np.array(epoch_results).astype(np.float)
14 | z = [8,9,10,11,12,13,14,15]
15 | # z = [0,1,2,3,4,5,6,7,8,9]
16 | 
17 | eAP = epoch_results_array[:, :4].mean(axis=1).tolist()
18 | bin1 = epoch_results_array[:, 0].tolist()
19 | bin2 = epoch_results_array[:, 1].tolist()
20 | bin3 = epoch_results_array[:, 2].tolist()
21 | bin4 = epoch_results_array[:, 3].tolist()
22 | 
23 | fig = plt.figure(figsize=(8,5))
24 | ax1 = fig.add_subplot(111)
25 | 
26 | matplotlib.rcParams.update({'font.size': 12})
27 | ax1.plot(z, bin4, marker='o', linewidth=2,  label='class number in bin [f^3, -)')
28 | ax1.plot(z, bin3, marker='o', linewidth=2,  label='class number in bin [f^2, f^3)')
29 | ax1.plot(z, bin2, marker='o', linewidth=2,  label='class number in bin [f, f^2)')
30 | ax1.plot(z, bin1, marker='o', linewidth=2,  label='class number in bin (0, f)')
31 | 
32 | 
33 | 
34 | # ax1.plot([0],[15.4], 'D', color = 'green')
35 | 
36 | plt.xlabel('calibration steps (k)', size=16)
37 | plt.ylabel('AP or eAP', size=16)
38 | # ax1.set_xscale('log')
39 | 
40 | plt.legend( loc='best')
41 | 
42 | plt.grid()
43 | plt.savefig('eap_sensitivity_binnum.eps', format='eps', dpi=1000)
44 | plt.show()
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/mask_target.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | import numpy as np
 3 | import torch
 4 | from torch.nn.modules.utils import _pair
 5 | 
 6 | 
 7 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
 8 |                 cfg):
 9 |     cfg_list = [cfg for _ in range(len(pos_proposals_list))]
10 |     mask_targets = map(mask_target_single, pos_proposals_list,
11 |                        pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
12 |     mask_targets = torch.cat(list(mask_targets))
13 |     return mask_targets
14 | 
15 | 
16 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
17 |     mask_size = _pair(cfg.mask_size)
18 |     num_pos = pos_proposals.size(0)
19 |     mask_targets = []
20 |     if num_pos > 0:
21 |         proposals_np = pos_proposals.cpu().numpy()
22 |         pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
23 |         for i in range(num_pos):
24 |             gt_mask = gt_masks[pos_assigned_gt_inds[i]]
25 |             bbox = proposals_np[i, :].astype(np.int32)
26 |             x1, y1, x2, y2 = bbox
27 |             w = np.maximum(x2 - x1 + 1, 1)
28 |             h = np.maximum(y2 - y1 + 1, 1)
29 |             # mask is uint8 both before and after resizing
30 |             # mask_size (h, w) to (w, h)
31 |             target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
32 |                                    mask_size[::-1])
33 |             mask_targets.append(target)
34 |         mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
35 |             pos_proposals.device)
36 |     else:
37 |         mask_targets = pos_proposals.new_zeros((0, ) + mask_size)
38 |     return mask_targets
39 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/weight_init.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'):
 6 |     assert distribution in ['uniform', 'normal']
 7 |     if distribution == 'uniform':
 8 |         nn.init.xavier_uniform_(module.weight, gain=gain)
 9 |     else:
10 |         nn.init.xavier_normal_(module.weight, gain=gain)
11 |     if hasattr(module, 'bias'):
12 |         nn.init.constant_(module.bias, bias)
13 | 
14 | 
15 | def normal_init(module, mean=0, std=1, bias=0):
16 |     nn.init.normal_(module.weight, mean, std)
17 |     if hasattr(module, 'bias'):
18 |         nn.init.constant_(module.bias, bias)
19 | 
20 | 
21 | def uniform_init(module, a=0, b=1, bias=0):
22 |     nn.init.uniform_(module.weight, a, b)
23 |     if hasattr(module, 'bias'):
24 |         nn.init.constant_(module.bias, bias)
25 | 
26 | 
27 | def kaiming_init(module,
28 |                  mode='fan_out',
29 |                  nonlinearity='relu',
30 |                  bias=0,
31 |                  distribution='normal'):
32 |     assert distribution in ['uniform', 'normal']
33 |     if distribution == 'uniform':
34 |         nn.init.kaiming_uniform_(
35 |             module.weight, mode=mode, nonlinearity=nonlinearity)
36 |     else:
37 |         nn.init.kaiming_normal_(
38 |             module.weight, mode=mode, nonlinearity=nonlinearity)
39 |     if hasattr(module, 'bias'):
40 |         nn.init.constant_(module.bias, bias)
41 | 
42 | 
43 | def bias_init_with_prob(prior_prob):
44 |     """ initialize conv/fc bias value according to giving probablity"""
45 |     bias_init = float(-np.log((1 - prior_prob) / prior_prob))
46 |     return bias_init
47 | 


--------------------------------------------------------------------------------
/cls_head_models/simple3fc.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | import torch.nn.functional as F
 5 | 
 6 | class feat_classifier(torch.nn.Module):
 7 |     def __init__(self, num_classes, input_shape = [256, 7, 7], hidden_dim=1024, cls_fc_bias=True):
 8 |         super(feat_classifier, self).__init__()
 9 | 
10 |         self.cls_last_dim = input_shape[0]*input_shape[1]*input_shape[2]
11 | 
12 |         self.fc1 = nn.Linear(self.cls_last_dim, hidden_dim)
13 |         self.fc2 = nn.Linear(hidden_dim, hidden_dim)
14 |         self.fc_classifier = nn.Linear(hidden_dim, num_classes, bias=cls_fc_bias)
15 | 
16 |     def forward(self, x):
17 |         x = x.view(x.size(0), -1)
18 |         x = self.fc1(x)
19 |         x = F.relu(x)
20 |         x = self.fc2(x)
21 |         x = F.relu(x)
22 |         x = self.fc_classifier(x)
23 | 
24 |         return x
25 | 
26 | class simple3fc(torch.nn.Module):
27 | 
28 |     def __init__(self, num_classes=1231, cls_fc_bias=True):
29 |         super(simple3fc, self).__init__()
30 | 
31 |         self.feat_classifier = feat_classifier(num_classes=num_classes, hidden_dim=1024, cls_fc_bias=cls_fc_bias).cuda()
32 | 
33 |     def forward(self, input):
34 |         logits = self.feat_classifier(input)
35 |         return logits
36 | 
37 | 
38 | 
39 | # def ncm_sq_dist_bt_norm(a,b):
40 | #     anorm = tf.reshape(tf.reduce_sum(tf.square(a), 1),[-1, 1])
41 | #     bnorm = tf.reshape(tf.reduce_sum(tf.square(b), 0),[1, -1])
42 | #     d     = -2*tf.matmul(a,b,transpose_b=False)+anorm + bnorm
43 | #     return d, anorm
44 | #
45 | # def ncm_sq_dist_bt(a,b):
46 | #     d, bnorm = ncm_sq_dist_bt_norm(a,b)
47 | #     return d
48 | 
49 | 


--------------------------------------------------------------------------------
/instaboost/config.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class InstaBoostConfig:
 5 |     def __init__(self, action_candidate: tuple = ('normal', 'horizontal', 'skip'),
 6 |                  action_prob: tuple = (1, 0, 0), scale: tuple = (0.8, 1.2), dx: float = 15, dy: float = 15,
 7 |                  theta=(-1, 1), color_prob=0.5, heatmap_flag=False):
 8 |         """
 9 |         :param action_candidate: tuple of action candidates. 'normal', 'horizontal', 'vertical', 'skip' are supported
10 |         :param action_prob: tuple of corresponding action probabilities. Should be the same length as action_candidate
11 |         :param scale: tuple of (min scale, max scale)
12 |         :param dx: the maximum x-axis shift will be  (instance width) / dx
13 |         :param dy: the maximum y-axis shift will be  (instance height) / dy
14 |         :param theta: tuple of (min rotation degree, max rotation degree)
15 |         :param color_prob: the probability of images for color augmentation
16 |         :param heatmap_flag: whether to use heatmap guided
17 |         """
18 |         assert len(action_candidate) == len(action_prob), 'Candidate & probability length mismatch'
19 |         assert np.sum(action_prob) == 1, 'Probability must sum to 1'
20 |         assert len(scale) == 2, 'scale should have 2 items (min scale, max scale)'
21 |         assert len(theta) == 2, 'theta should have 2 items (min theta, max theta)'
22 | 
23 |         self.action_candidate = np.array(action_candidate)
24 |         self.action_prob = np.array(action_prob)
25 |         self.scale = scale
26 |         self.dx = dx
27 |         self.dy = dy
28 |         self.theta = theta
29 |         self.color_prob = color_prob
30 |         self.heatmap_flag = heatmap_flag
31 | 


--------------------------------------------------------------------------------
/instaboost/.svn/pristine/9a/9a5e7d16f998e55c36ca225a6b41887910f19387.svn-base:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class InstaBoostConfig:
 5 |     def __init__(self, action_candidate: tuple = ('normal', 'horizontal', 'skip'),
 6 |                  action_prob: tuple = (1, 0, 0), scale: tuple = (0.8, 1.2), dx: float = 15, dy: float = 15,
 7 |                  theta=(-1, 1), color_prob=0.5, heatmap_flag=False):
 8 |         """
 9 |         :param action_candidate: tuple of action candidates. 'normal', 'horizontal', 'vertical', 'skip' are supported
10 |         :param action_prob: tuple of corresponding action probabilities. Should be the same length as action_candidate
11 |         :param scale: tuple of (min scale, max scale)
12 |         :param dx: the maximum x-axis shift will be  (instance width) / dx
13 |         :param dy: the maximum y-axis shift will be  (instance height) / dy
14 |         :param theta: tuple of (min rotation degree, max rotation degree)
15 |         :param color_prob: the probability of images for color augmentation
16 |         :param heatmap_flag: whether to use heatmap guided
17 |         """
18 |         assert len(action_candidate) == len(action_prob), 'Candidate & probability length mismatch'
19 |         assert np.sum(action_prob) == 1, 'Probability must sum to 1'
20 |         assert len(scale) == 2, 'scale should have 2 items (min scale, max scale)'
21 |         assert len(theta) == 2, 'theta should have 2 items (min theta, max theta)'
22 | 
23 |         self.action_candidate = np.array(action_candidate)
24 |         self.action_prob = np.array(action_prob)
25 |         self.scale = scale
26 |         self.dx = dx
27 |         self.dy = dy
28 |         self.theta = theta
29 |         self.color_prob = color_prob
30 |         self.heatmap_flag = heatmap_flag
31 | 


--------------------------------------------------------------------------------
/tools/draw_ft_epoch_ablation.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import matplotlib
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import math
 6 | from matplotlib.ticker import FormatStrFormatter
 7 | from matplotlib import scale as mscale
 8 | from matplotlib import transforms as mtransforms
 9 | 
10 | # z = [0,0.1,0.3,0.9,1,2,5]
11 | # z = list(range(0, 30000, 1000))
12 | # with open('./ft_cat_epoch_ablation_for_drawing.txt', 'r') as f:
13 | with open('./ft_cal_epoch_ablation_for_drawing_compose.txt', 'r') as f:
14 |     epoch_results = f.readlines()
15 | epoch_results = [i.strip().split(' ') for i in epoch_results]
16 | epoch_results_array = np.array(epoch_results).astype(np.float)
17 | z = [0,1,2,3,4,5,6,7,8,9,10,11,13,15,20,25,30,35]
18 | # z = [0,1,2,3,4,5,6,7,8,9]
19 | 
20 | eAP = epoch_results_array[:, :4].mean(axis=1).tolist()
21 | bin1 = epoch_results_array[:, 0].tolist()
22 | bin2 = epoch_results_array[:, 1].tolist()
23 | bin3 = epoch_results_array[:, 2].tolist()
24 | bin4 = epoch_results_array[:, 3].tolist()
25 | 
26 | fig = plt.figure(figsize=(8,5))
27 | ax1 = fig.add_subplot(111)
28 | 
29 | matplotlib.rcParams.update({'font.size': 14})
30 | ax1.plot(z, bin4, marker='o', linewidth=2,  label='AP of class bin [1000, -)')
31 | ax1.plot(z, bin3, marker='o', linewidth=2,  label='AP of class bin [100, 1000)')
32 | ax1.plot(z, bin2, marker='o', linewidth=2,  label='AP of class bin [10, 100)')
33 | ax1.plot(z, bin1, marker='o', linewidth=2,  label='AP of class bin (0, 10)')
34 | 
35 | ax1.plot(z, eAP, linestyle='-', marker='o', linewidth=2,  label='bAP')
36 | 
37 | 
38 | # ax1.plot([0],[15.4], 'D', color = 'green')
39 | 
40 | plt.xlabel('calibration steps (k)', size=16)
41 | plt.ylabel('AP or bAP', size=16)
42 | # ax1.set_xscale('log')
43 | 
44 | plt.legend( loc='best')
45 | 
46 | plt.grid()
47 | plt.savefig('ablation_cal_steps.eps', format='eps', dpi=1000)
48 | plt.show()
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
 5 |     """Calculate the ious between each bbox of bboxes1 and bboxes2.
 6 | 
 7 |     Args:
 8 |         bboxes1(ndarray): shape (n, 4)
 9 |         bboxes2(ndarray): shape (k, 4)
10 |         mode(str): iou (intersection over union) or iof (intersection
11 |             over foreground)
12 | 
13 |     Returns:
14 |         ious(ndarray): shape (n, k)
15 |     """
16 | 
17 |     assert mode in ['iou', 'iof']
18 | 
19 |     bboxes1 = bboxes1.astype(np.float32)
20 |     bboxes2 = bboxes2.astype(np.float32)
21 |     rows = bboxes1.shape[0]
22 |     cols = bboxes2.shape[0]
23 |     ious = np.zeros((rows, cols), dtype=np.float32)
24 |     if rows * cols == 0:
25 |         return ious
26 |     exchange = False
27 |     if bboxes1.shape[0] > bboxes2.shape[0]:
28 |         bboxes1, bboxes2 = bboxes2, bboxes1
29 |         ious = np.zeros((cols, rows), dtype=np.float32)
30 |         exchange = True
31 |     area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
32 |         bboxes1[:, 3] - bboxes1[:, 1] + 1)
33 |     area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
34 |         bboxes2[:, 3] - bboxes2[:, 1] + 1)
35 |     for i in range(bboxes1.shape[0]):
36 |         x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
37 |         y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
38 |         x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
39 |         y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
40 |         overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
41 |             y_end - y_start + 1, 0)
42 |         if mode == 'iou':
43 |             union = area1[i] + area2 - overlap
44 |         else:
45 |             union = area1[i] if not exchange else area2
46 |         ious[i, :] = overlap / union
47 |     if exchange:
48 |         ious = ious.T
49 |     return ious
50 | 


--------------------------------------------------------------------------------
/mmdet/datasets/dataset_wrappers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 3 | 
 4 | from .registry import DATASETS
 5 | 
 6 | 
 7 | @DATASETS.register_module
 8 | class ConcatDataset(_ConcatDataset):
 9 |     """A wrapper of concatenated dataset.
10 | 
11 |     Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
12 |     concat the group flag for image aspect ratio.
13 | 
14 |     Args:
15 |         datasets (list[:obj:`Dataset`]): A list of datasets.
16 |     """
17 | 
18 |     def __init__(self, datasets):
19 |         super(ConcatDataset, self).__init__(datasets)
20 |         self.CLASSES = datasets[0].CLASSES
21 |         if hasattr(datasets[0], 'flag'):
22 |             flags = []
23 |             for i in range(0, len(datasets)):
24 |                 flags.append(datasets[i].flag)
25 |             self.flag = np.concatenate(flags)
26 | 
27 | 
28 | @DATASETS.register_module
29 | class RepeatDataset(object):
30 |     """A wrapper of repeated dataset.
31 | 
32 |     The length of repeated dataset will be `times` larger than the original
33 |     dataset. This is useful when the data loading time is long but the dataset
34 |     is small. Using RepeatDataset can reduce the data loading time between
35 |     epochs.
36 | 
37 |     Args:
38 |         dataset (:obj:`Dataset`): The dataset to be repeated.
39 |         times (int): Repeat times.
40 |     """
41 | 
42 |     def __init__(self, dataset, times):
43 |         self.dataset = dataset
44 |         self.times = times
45 |         self.CLASSES = dataset.CLASSES
46 |         if hasattr(self.dataset, 'flag'):
47 |             self.flag = np.tile(self.dataset.flag, times)
48 | 
49 |         self._ori_len = len(self.dataset)
50 | 
51 |     def __getitem__(self, idx):
52 |         return self.dataset[idx % self._ori_len]
53 | 
54 |     def __len__(self):
55 |         return self.times * self._ori_len
56 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | 
 5 | from . import sigmoid_focal_loss_cuda
 6 | 
 7 | 
 8 | class SigmoidFocalLossFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(ctx, input, target, gamma=2.0, alpha=0.25):
12 |         ctx.save_for_backward(input, target)
13 |         num_classes = input.shape[1]
14 |         ctx.num_classes = num_classes
15 |         ctx.gamma = gamma
16 |         ctx.alpha = alpha
17 | 
18 |         loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes,
19 |                                                gamma, alpha)
20 |         return loss
21 | 
22 |     @staticmethod
23 |     @once_differentiable
24 |     def backward(ctx, d_loss):
25 |         input, target = ctx.saved_tensors
26 |         num_classes = ctx.num_classes
27 |         gamma = ctx.gamma
28 |         alpha = ctx.alpha
29 |         d_loss = d_loss.contiguous()
30 |         d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss,
31 |                                                    num_classes, gamma, alpha)
32 |         return d_input, None, None, None, None
33 | 
34 | 
35 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply
36 | 
37 | 
38 | # TODO: remove this module
39 | class SigmoidFocalLoss(nn.Module):
40 | 
41 |     def __init__(self, gamma, alpha):
42 |         super(SigmoidFocalLoss, self).__init__()
43 |         self.gamma = gamma
44 |         self.alpha = alpha
45 | 
46 |     def forward(self, logits, targets):
47 |         assert logits.is_cuda
48 |         loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha)
49 |         return loss.sum()
50 | 
51 |     def __repr__(self):
52 |         tmpstr = self.__class__.__name__ + '(gamma={}, alpha={})'.format(
53 |             self.gamma, self.alpha)
54 |         return tmpstr
55 | 


--------------------------------------------------------------------------------
/configs/grid_rcnn/README.md:
--------------------------------------------------------------------------------
 1 | # Grid R-CNN
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{lu2019grid,
 7 |   title={Grid r-cnn},
 8 |   author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie},
 9 |   booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
10 |   year={2019}
11 | }
12 | 
13 | @article{lu2019grid,
14 |   title={Grid R-CNN Plus: Faster and Better},
15 |   author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie},
16 |   journal={arXiv preprint arXiv:1906.05688},
17 |   year={2019}
18 | }
19 | ```
20 | 
21 | ## Results and Models
22 | 
23 | | Backbone    | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
24 | |:-----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
25 | | R-50        | 2x      | 4.8      | 1.172               | 10.9           | 40.3   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x_20190619-5b29cf9d.pth) |
26 | | R-101       | 2x      | 6.7      | 1.214               | 10.0           | 41.7   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r101_fpn_2x_20190619-a4b61645.pth) |
27 | | X-101-32x4d | 2x      | 8.0      | 1.335               | 8.5            | 43.0   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x_20190619-0bbfd87a.pth) |
28 | | X-101-64x4d | 2x      | 10.9     | 1.753               | 6.4            | 43.1   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_64x4d_fpn_2x_20190619-8f4e20bb.pth) |
29 | 
30 | **Notes:**
31 | - All models are trained with 8 GPUs instead of 32 GPUs in the original paper.
32 | - The warming up lasts for 1 epoch and `2x` here indicates 25 epochs.
33 | 


--------------------------------------------------------------------------------
/tools/configs/grid_rcnn/README.md:
--------------------------------------------------------------------------------
 1 | # Grid R-CNN
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{lu2019grid,
 7 |   title={Grid r-cnn},
 8 |   author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie},
 9 |   booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
10 |   year={2019}
11 | }
12 | 
13 | @article{lu2019grid,
14 |   title={Grid R-CNN Plus: Faster and Better},
15 |   author={Lu, Xin and Li, Buyu and Yue, Yuxin and Li, Quanquan and Yan, Junjie},
16 |   journal={arXiv preprint arXiv:1906.05688},
17 |   year={2019}
18 | }
19 | ```
20 | 
21 | ## Results and Models
22 | 
23 | | Backbone    | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
24 | |:-----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
25 | | R-50        | 2x      | 4.8      | 1.172               | 10.9           | 40.3   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r50_fpn_2x_20190619-5b29cf9d.pth) |
26 | | R-101       | 2x      | 6.7      | 1.214               | 10.0           | 41.7   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_r101_fpn_2x_20190619-a4b61645.pth) |
27 | | X-101-32x4d | 2x      | 8.0      | 1.335               | 8.5            | 43.0   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_32x4d_fpn_2x_20190619-0bbfd87a.pth) |
28 | | X-101-64x4d | 2x      | 10.9     | 1.753               | 6.4            | 43.1   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/grid_rcnn/grid_rcnn_gn_head_x101_64x4d_fpn_2x_20190619-8f4e20bb.pth) |
29 | 
30 | **Notes:**
31 | - All models are trained with 8 GPUs instead of 32 GPUs in the original paper.
32 | - The warming up lasts for 1 epoch and `2x` here indicates 25 epochs.
33 | 


--------------------------------------------------------------------------------
/configs/cityscapes/README.md:
--------------------------------------------------------------------------------
 1 | ## Common settings
 2 | 
 3 | - All baselines were trained using 8 GPU with a batch size of 8 (1 images per GPU) using the [linear scaling rule](https://arxiv.org/abs/1706.02677) to scale the learning rate. 
 4 | - All models were trained on `cityscapes_train`, and tested on `cityscapes_val`.
 5 | - 1x training schedule indicates 64 epochs which corresponds to slightly less than the 24k iterations reported in the original schedule from the [Mask R-CNN paper](https://arxiv.org/abs/1703.06870)
 6 | - All pytorch-style pretrained backbones on ImageNet are from PyTorch model zoo.
 7 | 
 8 | 
 9 | ## Baselines
10 | 
11 | Download links and more models with different backbones and training schemes will be added to the model zoo.
12 | 
13 | 
14 | ### Faster R-CNN
15 | 
16 | |    Backbone     |  Style  | Lr schd | Scale    | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
17 | | :-------------: | :-----: | :-----: | :---:    | :------: | :-----------------: | :------------: | :----: | :------: |
18 | |    R-50-FPN     | pytorch |   1x    | 800-1024 | 4.9      | 0.345               | 8.8            | 36.0   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/cityscapes/faster_rcnn_r50_fpn_1x_city_20190727-7b9c0534.pth) |
19 | 
20 | ### Mask R-CNN
21 | 
22 | |    Backbone     |  Style  | Lr schd | Scale    | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
23 | | :-------------: | :-----: | :-----: | :------: | :------: | :-----------------: | :------------: | :----: | :-----: | :------: |
24 | |    R-50-FPN     | pytorch |   1x    | 800-1024 | 4.9      | 0.609               | 2.5            | 37.4  |  32.5   | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/cityscapes/mask_rcnn_r50_fpn_1x_city_20190727-9b3c56a5.pth) |
25 | 
26 | **Notes:**
27 | - In the original paper, the mask AP of Mask R-CNN R-50-FPN is 31.5.
28 | 
29 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/norm.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | norm_cfg = {
 4 |     # format: layer_type: (abbreviation, module)
 5 |     'BN': ('bn', nn.BatchNorm2d),
 6 |     'SyncBN': ('bn', nn.SyncBatchNorm),
 7 |     'GN': ('gn', nn.GroupNorm),
 8 |     # and potentially 'SN'
 9 | }
10 | 
11 | 
12 | def build_norm_layer(cfg, num_features, postfix=''):
13 |     """ Build normalization layer
14 | 
15 |     Args:
16 |         cfg (dict): cfg should contain:
17 |             type (str): identify norm layer type.
18 |             layer args: args needed to instantiate a norm layer.
19 |             requires_grad (bool): [optional] whether stop gradient updates
20 |         num_features (int): number of channels from input.
21 |         postfix (int, str): appended into norm abbreviation to
22 |             create named layer.
23 | 
24 |     Returns:
25 |         name (str): abbreviation + postfix
26 |         layer (nn.Module): created norm layer
27 |     """
28 |     assert isinstance(cfg, dict) and 'type' in cfg
29 |     cfg_ = cfg.copy()
30 | 
31 |     layer_type = cfg_.pop('type')
32 |     if layer_type not in norm_cfg:
33 |         raise KeyError('Unrecognized norm type {}'.format(layer_type))
34 |     else:
35 |         abbr, norm_layer = norm_cfg[layer_type]
36 |         if norm_layer is None:
37 |             raise NotImplementedError
38 | 
39 |     assert isinstance(postfix, (int, str))
40 |     name = abbr + str(postfix)
41 | 
42 |     requires_grad = cfg_.pop('requires_grad', True)
43 |     cfg_.setdefault('eps', 1e-5)
44 |     if layer_type != 'GN':
45 |         layer = norm_layer(num_features, **cfg_)
46 |         if layer_type == 'SyncBN':
47 |             layer._specify_ddp_gpu_num(1)
48 |     else:
49 |         assert 'num_groups' in cfg_
50 |         layer = norm_layer(num_channels=num_features, **cfg_)
51 | 
52 |     for param in layer.parameters():
53 |         param.requires_grad = requires_grad
54 | 
55 |     return name, layer
56 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .random_sampler import RandomSampler
 5 | 
 6 | 
 7 | class InstanceBalancedPosSampler(RandomSampler):
 8 | 
 9 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
10 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
11 |         if pos_inds.numel() != 0:
12 |             pos_inds = pos_inds.squeeze(1)
13 |         if pos_inds.numel() <= num_expected:
14 |             return pos_inds
15 |         else:
16 |             unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
17 |             num_gts = len(unique_gt_inds)
18 |             num_per_gt = int(round(num_expected / float(num_gts)) + 1)
19 |             sampled_inds = []
20 |             for i in unique_gt_inds:
21 |                 inds = torch.nonzero(assign_result.gt_inds == i.item())
22 |                 if inds.numel() != 0:
23 |                     inds = inds.squeeze(1)
24 |                 else:
25 |                     continue
26 |                 if len(inds) > num_per_gt:
27 |                     inds = self.random_choice(inds, num_per_gt)
28 |                 sampled_inds.append(inds)
29 |             sampled_inds = torch.cat(sampled_inds)
30 |             if len(sampled_inds) < num_expected:
31 |                 num_extra = num_expected - len(sampled_inds)
32 |                 extra_inds = np.array(
33 |                     list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
34 |                 if len(extra_inds) > num_extra:
35 |                     extra_inds = self.random_choice(extra_inds, num_extra)
36 |                 extra_inds = torch.from_numpy(extra_inds).to(
37 |                     assign_result.gt_inds.device).long()
38 |                 sampled_inds = torch.cat([sampled_inds, extra_inds])
39 |             elif len(sampled_inds) > num_expected:
40 |                 sampled_inds = self.random_choice(sampled_inds, num_expected)
41 |             return sampled_inds
42 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/fast_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FastRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  bbox_roi_extractor,
11 |                  bbox_head,
12 |                  train_cfg,
13 |                  test_cfg,
14 |                  neck=None,
15 |                  shared_head=None,
16 |                  mask_roi_extractor=None,
17 |                  mask_head=None,
18 |                  pretrained=None):
19 |         super(FastRCNN, self).__init__(
20 |             backbone=backbone,
21 |             neck=neck,
22 |             shared_head=shared_head,
23 |             bbox_roi_extractor=bbox_roi_extractor,
24 |             bbox_head=bbox_head,
25 |             train_cfg=train_cfg,
26 |             test_cfg=test_cfg,
27 |             mask_roi_extractor=mask_roi_extractor,
28 |             mask_head=mask_head,
29 |             pretrained=pretrained)
30 | 
31 |     def forward_test(self, imgs, img_metas, proposals, **kwargs):
32 |         for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
33 |             if not isinstance(var, list):
34 |                 raise TypeError('{} must be a list, but got {}'.format(
35 |                     name, type(var)))
36 | 
37 |         num_augs = len(imgs)
38 |         if num_augs != len(img_metas):
39 |             raise ValueError(
40 |                 'num of augmentations ({}) != num of image meta ({})'.format(
41 |                     len(imgs), len(img_metas)))
42 |         # TODO: remove the restriction of imgs_per_gpu == 1 when prepared
43 |         imgs_per_gpu = imgs[0].size(0)
44 |         assert imgs_per_gpu == 1
45 | 
46 |         if num_augs == 1:
47 |             return self.simple_test(imgs[0], img_metas[0], proposals[0],
48 |                                     **kwargs)
49 |         else:
50 |             return self.aug_test(imgs, img_metas, proposals, **kwargs)
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # cython generated cpp
107 | mmdet/ops/nms/src/soft_nms_cpu.cpp
108 | mmdet/version.py
109 | data
110 | .vscode
111 | .idea
112 | 
113 | # custom
114 | *.pkl
115 | *.pkl.json
116 | *.log.json
117 | work_dirs/
118 | 
119 | # Pytorch
120 | *.pth
121 | 


--------------------------------------------------------------------------------
/tools/voc_eval.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | import mmcv
 4 | import numpy as np
 5 | 
 6 | from mmdet import datasets
 7 | from mmdet.core import eval_map
 8 | 
 9 | 
10 | def voc_eval(result_file, dataset, iou_thr=0.5):
11 |     det_results = mmcv.load(result_file)
12 |     gt_bboxes = []
13 |     gt_labels = []
14 |     gt_ignore = []
15 |     for i in range(len(dataset)):
16 |         ann = dataset.get_ann_info(i)
17 |         bboxes = ann['bboxes']
18 |         labels = ann['labels']
19 |         if 'bboxes_ignore' in ann:
20 |             ignore = np.concatenate([
21 |                 np.zeros(bboxes.shape[0], dtype=np.bool),
22 |                 np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool)
23 |             ])
24 |             gt_ignore.append(ignore)
25 |             bboxes = np.vstack([bboxes, ann['bboxes_ignore']])
26 |             labels = np.concatenate([labels, ann['labels_ignore']])
27 |         gt_bboxes.append(bboxes)
28 |         gt_labels.append(labels)
29 |     if not gt_ignore:
30 |         gt_ignore = gt_ignore
31 |     if hasattr(dataset, 'year') and dataset.year == 2007:
32 |         dataset_name = 'voc07'
33 |     else:
34 |         dataset_name = dataset.CLASSES
35 |     eval_map(
36 |         det_results,
37 |         gt_bboxes,
38 |         gt_labels,
39 |         gt_ignore=gt_ignore,
40 |         scale_ranges=None,
41 |         iou_thr=iou_thr,
42 |         dataset=dataset_name,
43 |         print_summary=True)
44 | 
45 | 
46 | def main():
47 |     parser = ArgumentParser(description='VOC Evaluation')
48 |     parser.add_argument('result', help='result file path')
49 |     parser.add_argument('config', help='config file path')
50 |     parser.add_argument(
51 |         '--iou-thr',
52 |         type=float,
53 |         default=0.5,
54 |         help='IoU threshold for evaluation')
55 |     args = parser.parse_args()
56 |     cfg = mmcv.Config.fromfile(args.config)
57 |     test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets)
58 |     voc_eval(args.result, test_dataset, args.iou_thr)
59 | 
60 | 
61 | if __name__ == '__main__':
62 |     main()
63 | 


--------------------------------------------------------------------------------
/configs/libra_rcnn/README.md:
--------------------------------------------------------------------------------
 1 | # Libra R-CNN: Towards Balanced Learning for Object Detection
 2 | 
 3 | ## Introduction
 4 | 
 5 | We provide config files to reproduce the results in the CVPR 2019 paper [Libra R-CNN](https://arxiv.org/pdf/1904.02701.pdf).
 6 | 
 7 | ```
 8 | @inproceedings{pang2019libra,
 9 |   title={Libra R-CNN: Towards Balanced Learning for Object Detection},
10 |   author={Pang, Jiangmiao and Chen, Kai and Shi, Jianping and Feng, Huajun and Ouyang, Wanli and Dahua Lin},
11 |   booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
12 |   year={2019}
13 | }
14 | ```
15 | 
16 | ## Results and models
17 | 
18 | The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val)
19 | 
20 | | Architecture | Backbone  | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
21 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
22 | | Faster R-CNN | R-50-FPN        | pytorch | 1x | 4.2  | 0.375 | 12.0 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_20190610-bf0ea559.pth) |
23 | | Fast R-CNN   | R-50-FPN        | pytorch | 1x | 3.7  | 0.272 | 16.3 | 38.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_20190525-a43f88b5.pth) |
24 | | Faster R-CNN | R-101-FPN       | pytorch | 1x | 6.0  | 0.495 | 10.4 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_20190525-94e94051.pth) |
25 | | Faster R-CNN | X-101-64x4d-FPN | pytorch | 1x | 10.1 | 1.050 | 6.8  | 42.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_20190525-359c134a.pth) |
26 | | RetinaNet    | R-50-FPN        | pytorch | 1x | 3.7  | 0.328 | 11.8 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_retinanet_r50_fpn_1x_20190525-ead2a6bb.pth) |
27 | 


--------------------------------------------------------------------------------
/tools/configs/libra_rcnn/README.md:
--------------------------------------------------------------------------------
 1 | # Libra R-CNN: Towards Balanced Learning for Object Detection
 2 | 
 3 | ## Introduction
 4 | 
 5 | We provide config files to reproduce the results in the CVPR 2019 paper [Libra R-CNN](https://arxiv.org/pdf/1904.02701.pdf).
 6 | 
 7 | ```
 8 | @inproceedings{pang2019libra,
 9 |   title={Libra R-CNN: Towards Balanced Learning for Object Detection},
10 |   author={Pang, Jiangmiao and Chen, Kai and Shi, Jianping and Feng, Huajun and Ouyang, Wanli and Dahua Lin},
11 |   booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
12 |   year={2019}
13 | }
14 | ```
15 | 
16 | ## Results and models
17 | 
18 | The results on COCO 2017val are shown in the below table. (results on test-dev are usually slightly higher than val)
19 | 
20 | | Architecture | Backbone  | Style   | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
21 | |:---------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
22 | | Faster R-CNN | R-50-FPN        | pytorch | 1x | 4.2  | 0.375 | 12.0 | 38.5 | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_20190610-bf0ea559.pth) |
23 | | Fast R-CNN   | R-50-FPN        | pytorch | 1x | 3.7  | 0.272 | 16.3 | 38.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_20190525-a43f88b5.pth) |
24 | | Faster R-CNN | R-101-FPN       | pytorch | 1x | 6.0  | 0.495 | 10.4 | 40.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_20190525-94e94051.pth) |
25 | | Faster R-CNN | X-101-64x4d-FPN | pytorch | 1x | 10.1 | 1.050 | 6.8  | 42.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_20190525-359c134a.pth) |
26 | | RetinaNet    | R-50-FPN        | pytorch | 1x | 3.7  | 0.328 | 11.8 | 37.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/libra_rcnn/libra_retinanet_r50_fpn_1x_20190525-ead2a6bb.pth) |
27 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/balanced_l1_loss.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from ..registry import LOSSES
 6 | from .utils import weighted_loss
 7 | 
 8 | 
 9 | @weighted_loss
10 | def balanced_l1_loss(pred,
11 |                      target,
12 |                      beta=1.0,
13 |                      alpha=0.5,
14 |                      gamma=1.5,
15 |                      reduction='mean'):
16 |     assert beta > 0
17 |     assert pred.size() == target.size() and target.numel() > 0
18 | 
19 |     diff = torch.abs(pred - target)
20 |     b = np.e**(gamma / alpha) - 1
21 |     loss = torch.where(
22 |         diff < beta, alpha / b *
23 |         (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff,
24 |         gamma * diff + gamma / b - alpha * beta)
25 | 
26 |     return loss
27 | 
28 | 
29 | @LOSSES.register_module
30 | class BalancedL1Loss(nn.Module):
31 |     """Balanced L1 Loss
32 | 
33 |     arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
34 |     """
35 | 
36 |     def __init__(self,
37 |                  alpha=0.5,
38 |                  gamma=1.5,
39 |                  beta=1.0,
40 |                  reduction='mean',
41 |                  loss_weight=1.0):
42 |         super(BalancedL1Loss, self).__init__()
43 |         self.alpha = alpha
44 |         self.gamma = gamma
45 |         self.beta = beta
46 |         self.reduction = reduction
47 |         self.loss_weight = loss_weight
48 | 
49 |     def forward(self,
50 |                 pred,
51 |                 target,
52 |                 weight=None,
53 |                 avg_factor=None,
54 |                 reduction_override=None,
55 |                 **kwargs):
56 |         assert reduction_override in (None, 'none', 'mean', 'sum')
57 |         reduction = (
58 |             reduction_override if reduction_override else self.reduction)
59 |         loss_bbox = self.loss_weight * balanced_l1_loss(
60 |             pred,
61 |             target,
62 |             weight,
63 |             alpha=self.alpha,
64 |             gamma=self.gamma,
65 |             beta=self.beta,
66 |             reduction=reduction,
67 |             avg_factor=avg_factor,
68 |             **kwargs)
69 |         return loss_bbox
70 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/mask_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .two_stage import TwoStageDetector as TwoStageDetector_normal
 3 | from .two_stage_calibration import TwoStageDetector as TwoStageDetector_calibration
 4 | 
 5 | 
 6 | @DETECTORS.register_module
 7 | class MaskRCNN_normal(TwoStageDetector_normal):
 8 | 
 9 |     def __init__(self,
10 |                  backbone,
11 |                  rpn_head,
12 |                  bbox_roi_extractor,
13 |                  bbox_head,
14 |                  mask_roi_extractor,
15 |                  mask_head,
16 |                  train_cfg,
17 |                  test_cfg,
18 |                  neck=None,
19 |                  shared_head=None,
20 |                  pretrained=None):
21 |         super(MaskRCNN_normal, self).__init__(
22 |             backbone=backbone,
23 |             neck=neck,
24 |             shared_head=shared_head,
25 |             rpn_head=rpn_head,
26 |             bbox_roi_extractor=bbox_roi_extractor,
27 |             bbox_head=bbox_head,
28 |             mask_roi_extractor=mask_roi_extractor,
29 |             mask_head=mask_head,
30 |             train_cfg=train_cfg,
31 |             test_cfg=test_cfg,
32 |             pretrained=pretrained)
33 | 
34 | @DETECTORS.register_module
35 | class MaskRCNN_calibration(TwoStageDetector_calibration):
36 | 
37 |     def __init__(self,
38 |                  backbone,
39 |                  rpn_head,
40 |                  bbox_roi_extractor,
41 |                  bbox_head,
42 |                  mask_roi_extractor,
43 |                  mask_head,
44 |                  train_cfg,
45 |                  test_cfg,
46 |                  neck=None,
47 |                  shared_head=None,
48 |                  pretrained=None):
49 |         super(MaskRCNN_calibration, self).__init__(
50 |             backbone=backbone,
51 |             neck=neck,
52 |             shared_head=shared_head,
53 |             rpn_head=rpn_head,
54 |             bbox_roi_extractor=bbox_roi_extractor,
55 |             bbox_head=bbox_head,
56 |             mask_roi_extractor=mask_roi_extractor,
57 |             mask_head=mask_head,
58 |             train_cfg=train_cfg,
59 |             test_cfg=test_cfg,
60 |             pretrained=pretrained)


--------------------------------------------------------------------------------
/mmdet/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import torch.distributed as dist
 4 | from mmcv.runner import OptimizerHook
 5 | from torch._utils import (_flatten_dense_tensors, _take_tensors,
 6 |                           _unflatten_dense_tensors)
 7 | 
 8 | 
 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
10 |     if bucket_size_mb > 0:
11 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
12 |         buckets = _take_tensors(tensors, bucket_size_bytes)
13 |     else:
14 |         buckets = OrderedDict()
15 |         for tensor in tensors:
16 |             tp = tensor.type()
17 |             if tp not in buckets:
18 |                 buckets[tp] = []
19 |             buckets[tp].append(tensor)
20 |         buckets = buckets.values()
21 | 
22 |     for bucket in buckets:
23 |         flat_tensors = _flatten_dense_tensors(bucket)
24 |         dist.all_reduce(flat_tensors)
25 |         flat_tensors.div_(world_size)
26 |         for tensor, synced in zip(
27 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
28 |             tensor.copy_(synced)
29 | 
30 | 
31 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
32 |     grads = [
33 |         param.grad.data for param in params
34 |         if param.requires_grad and param.grad is not None
35 |     ]
36 |     world_size = dist.get_world_size()
37 |     if coalesce:
38 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
39 |     else:
40 |         for tensor in grads:
41 |             dist.all_reduce(tensor.div_(world_size))
42 | 
43 | 
44 | class DistOptimizerHook(OptimizerHook):
45 | 
46 |     def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
47 |         self.grad_clip = grad_clip
48 |         self.coalesce = coalesce
49 |         self.bucket_size_mb = bucket_size_mb
50 | 
51 |     def after_train_iter(self, runner):
52 |         runner.optimizer.zero_grad()
53 |         runner.outputs['loss'].backward()
54 |         allreduce_grads(runner.model.parameters(), self.coalesce,
55 |                         self.bucket_size_mb)
56 |         if self.grad_clip is not None:
57 |             self.clip_grads(runner.model.parameters())
58 |         runner.optimizer.step()
59 | 


--------------------------------------------------------------------------------
/configs/gn/README.md:
--------------------------------------------------------------------------------
 1 | # Group Normalization
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{wu2018group,
 7 |   title={Group Normalization},
 8 |   author={Wu, Yuxin and He, Kaiming},
 9 |   booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
10 |   year={2018}
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | | Backbone      | model      | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
18 | | R-50-FPN (d)  | Mask R-CNN | 2x      | 7.2      | 0.806               | 5.4            | 39.8   | 36.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) |
19 | | R-50-FPN (d)  | Mask R-CNN | 3x      | 7.2      | 0.806               | 5.4            | 40.1   | 36.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) |
20 | | R-101-FPN (d) | Mask R-CNN | 2x      | 9.9      | 0.970               | 4.8            | 41.5   | 37.0    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) |
21 | | R-101-FPN (d) | Mask R-CNN | 3x      | 9.9      | 0.970               | 4.8            | 41.6   | 37.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) |
22 | | R-50-FPN (c)  | Mask R-CNN | 2x      | 7.2      | 0.806               | 5.4            | 39.7   | 35.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) |
23 | | R-50-FPN (c)  | Mask R-CNN | 3x      | 7.2      | 0.806               | 5.4            | 40.0   | 36.2    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) |
24 | 
25 | **Notes:**
26 | - (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk).
27 | - The `3x` schedule is epoch [28, 34, 36].
28 | - **Memory, Train/Inf time is outdated.**


--------------------------------------------------------------------------------
/tools/configs/gn/README.md:
--------------------------------------------------------------------------------
 1 | # Group Normalization
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{wu2018group,
 7 |   title={Group Normalization},
 8 |   author={Wu, Yuxin and He, Kaiming},
 9 |   booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
10 |   year={2018}
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | | Backbone      | model      | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
18 | | R-50-FPN (d)  | Mask R-CNN | 2x      | 7.2      | 0.806               | 5.4            | 39.8   | 36.1    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_2x_20180113-86832cf2.pth) |
19 | | R-50-FPN (d)  | Mask R-CNN | 3x      | 7.2      | 0.806               | 5.4            | 40.1   | 36.4    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_3x_20180113-8e82f48d.pth) |
20 | | R-101-FPN (d) | Mask R-CNN | 2x      | 9.9      | 0.970               | 4.8            | 41.5   | 37.0    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_2x_20180113-9598649c.pth) |
21 | | R-101-FPN (d) | Mask R-CNN | 3x      | 9.9      | 0.970               | 4.8            | 41.6   | 37.3    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r101_fpn_gn_3x_20180113-a14ffb96.pth) |
22 | | R-50-FPN (c)  | Mask R-CNN | 2x      | 7.2      | 0.806               | 5.4            | 39.7   | 35.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_2x_20180113-ec93305c.pth) |
23 | | R-50-FPN (c)  | Mask R-CNN | 3x      | 7.2      | 0.806               | 5.4            | 40.0   | 36.2    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/gn/mask_rcnn_r50_fpn_gn_contrib_3x_20180113-9d230cab.pth) |
24 | 
25 | **Notes:**
26 | - (d) means pretrained model converted from Detectron, and (c) means the contributed model pretrained by [@thangvubk](https://github.com/thangvubk).
27 | - The `3x` schedule is epoch [28, 34, 36].
28 | - **Memory, Train/Inf time is outdated.**


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h
 3 | #include <torch/extension.h>
 4 | 
 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
 6 |                                          const at::Tensor &targets,
 7 |                                          const int num_classes,
 8 |                                          const float gamma, const float alpha);
 9 | 
10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
11 |                                           const at::Tensor &targets,
12 |                                           const at::Tensor &d_losses,
13 |                                           const int num_classes,
14 |                                           const float gamma, const float alpha);
15 | 
16 | // Interface for Python
17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits,
18 |                                     const at::Tensor &targets,
19 |                                     const int num_classes, const float gamma,
20 |                                     const float alpha) {
21 |   if (logits.type().is_cuda()) {
22 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma,
23 |                                          alpha);
24 |   }
25 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
26 | }
27 | 
28 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits,
29 |                                      const at::Tensor &targets,
30 |                                      const at::Tensor &d_losses,
31 |                                      const int num_classes, const float gamma,
32 |                                      const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses,
35 |                                           num_classes, gamma, alpha);
36 |   }
37 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
38 | }
39 | 
40 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
41 |   m.def("forward", &SigmoidFocalLoss_forward,
42 |         "SigmoidFocalLoss forward (CUDA)");
43 |   m.def("backward", &SigmoidFocalLoss_backward,
44 |         "SigmoidFocalLoss backward (CUDA)");
45 | }
46 | 


--------------------------------------------------------------------------------
/mmdet/apis/env.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import random
 4 | import subprocess
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | import torch.distributed as dist
 9 | import torch.multiprocessing as mp
10 | from mmcv.runner import get_dist_info
11 | 
12 | 
13 | def init_dist(launcher, backend='nccl', **kwargs):
14 |     if mp.get_start_method(allow_none=True) is None:
15 |         mp.set_start_method('spawn')
16 |     if launcher == 'pytorch':
17 |         _init_dist_pytorch(backend, **kwargs)
18 |     elif launcher == 'mpi':
19 |         _init_dist_mpi(backend, **kwargs)
20 |     elif launcher == 'slurm':
21 |         _init_dist_slurm(backend, **kwargs)
22 |     else:
23 |         raise ValueError('Invalid launcher type: {}'.format(launcher))
24 | 
25 | 
26 | def _init_dist_pytorch(backend, **kwargs):
27 |     # TODO: use local_rank instead of rank % num_gpus
28 |     rank = int(os.environ['RANK'])
29 |     num_gpus = torch.cuda.device_count()
30 |     torch.cuda.set_device(rank % num_gpus)
31 |     dist.init_process_group(backend=backend, **kwargs)
32 | 
33 | 
34 | def _init_dist_mpi(backend, **kwargs):
35 |     raise NotImplementedError
36 | 
37 | 
38 | def _init_dist_slurm(backend, port=29500, **kwargs):
39 |     proc_id = int(os.environ['SLURM_PROCID'])
40 |     ntasks = int(os.environ['SLURM_NTASKS'])
41 |     node_list = os.environ['SLURM_NODELIST']
42 |     num_gpus = torch.cuda.device_count()
43 |     torch.cuda.set_device(proc_id % num_gpus)
44 |     addr = subprocess.getoutput(
45 |         'scontrol show hostname {} | head -n1'.format(node_list))
46 |     os.environ['MASTER_PORT'] = str(port)
47 |     os.environ['MASTER_ADDR'] = addr
48 |     os.environ['WORLD_SIZE'] = str(ntasks)
49 |     os.environ['RANK'] = str(proc_id)
50 |     dist.init_process_group(backend=backend)
51 | 
52 | 
53 | def set_random_seed(seed):
54 |     random.seed(seed)
55 |     np.random.seed(seed)
56 |     torch.manual_seed(seed)
57 |     torch.cuda.manual_seed_all(seed)
58 | 
59 | 
60 | def get_root_logger(log_level=logging.INFO):
61 |     logger = logging.getLogger()
62 |     if not logger.hasHandlers():
63 |         logging.basicConfig(
64 |             format='%(asctime)s - %(levelname)s - %(message)s',
65 |             level=log_level)
66 |     rank, _ = get_dist_info()
67 |     if rank != 0:
68 |         logger.setLevel('ERROR')
69 |     return logger
70 | 


--------------------------------------------------------------------------------
/tools/draw_comparison_head_design_choices.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | 
 5 | 
 6 | labels = ['AP on bin (0,10)', 'AP on bin (10,100)']
 7 | baseline = [0.0, 13.3]
 8 | fc2_ncm = [6.0, 18.9]
 9 | fc2 = [8.6, 22.0]
10 | fc3_rand = [9.1, 18.8]
11 | fc3_ft = [13.2, 23.1]
12 | 
13 | x = np.arange(len(labels))  # the label locations
14 | width = 0.15  # the width of the bars
15 | 
16 | matplotlib.rcParams.update({'font.size': 16})
17 | # plt.rc('ytick', labelsize=10)
18 | 
19 | fig, ax = plt.subplots()
20 | # rects1 = ax.bar(x - width, baseline, width, label='baseline')
21 | # rects2 = ax.bar(x - width/2, fc2_ncm, width, label='2fc_ncm')
22 | # rects3 = ax.bar(x , baseline, fc2, label='baseline')
23 | # rects4 = ax.bar(x + width/2, fc3_rand, width, label='2fc_ncm')
24 | # rects5 = ax.bar(x + width, fc3_ft, width, label='baseline')
25 | 
26 | # Set position of bar on X axis
27 | r1 = np.arange(len(labels))
28 | r2 = [x + width for x in r1]
29 | r3 = [x + width for x in r2]
30 | r4 = [x + width for x in r3]
31 | r5 = [x + width for x in r4]
32 | 
33 | # Make the plot
34 | rects1 = ax.bar(r1, baseline, color='#7f6d5f', width=width, edgecolor='white', label='baseline')
35 | rects2 = ax.bar(r2, fc2_ncm, color='#557f2d', width=width, edgecolor='white', label='2fc_ncm')
36 | rects3 = ax.bar(r3, fc2,  width=width, edgecolor='white', label='2fc_rand')
37 | rects4 = ax.bar(r4, fc3_rand,  width=width, edgecolor='white', label='3fc_rand')
38 | rects5 = ax.bar(r5, fc3_ft,  width=width, edgecolor='white', label='3fc_ft')
39 | 
40 | ax.set_ylim([0,25])
41 | ax.set_xticks([0.3, 1.3])
42 | ax.set_xticklabels(labels)
43 | ax.legend()
44 | 
45 | 
46 | def autolabel(rects):
47 |     """Attach a text label above each bar in *rects*, displaying its height."""
48 |     for rect in rects:
49 |         height = rect.get_height()
50 |         ax.annotate('{}'.format(height),
51 |                     xy=(rect.get_x() + rect.get_width() / 2, height),
52 |                     xytext=(0, 3),  # 3 points vertical offset
53 |                     textcoords="offset points",
54 |                     ha='center', va='bottom')
55 | 
56 | 
57 | autolabel(rects1)
58 | autolabel(rects2)
59 | autolabel(rects3)
60 | autolabel(rects4)
61 | autolabel(rects5)
62 | 
63 | fig.tight_layout()
64 | plt.savefig('head_design_choices.eps', format='eps', dpi=1000)
65 | plt.show()
66 | 


--------------------------------------------------------------------------------
/configs/ms_rcnn/README.md:
--------------------------------------------------------------------------------
 1 | # Mask Scoring R-CNN
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{huang2019msrcnn,
 7 |     title={Mask Scoring R-CNN},
 8 |     author={Zhaojin Huang and Lichao Huang and Yongchao Gong and Chang Huang and Xinggang Wang},
 9 |     booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
10 |     year={2019},
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | | Backbone      | style      | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
18 | | R-50-FPN      | caffe      | 1x      | 4.3      | 0.537               | 10.1           | 37.4   | 35.5    | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_1x_20190624-619934b5.pth) |
19 | | R-50-FPN      | caffe      | 2x      | -        | -                   | -              | 38.2   | 35.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_2x_20190525-a07be31e.pth) |
20 | | R-101-FPN     | caffe      | 1x      | 6.2      | 0.682               |  9.1           | 39.8   | 37.2    | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_1x_20190624-677a5548.pth) |
21 | | R-101-FPN     | caffe      | 2x      | -        | -                   |  -             | 40.7   | 37.8    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_2x_20190525-4aee1528.pth) |
22 | | R-X101-32x4d  | pytorch    | 2x      | 7.6      | 0.844               |  8.0           | 41.7   | 38.5    | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_32x4d_fpn_2x_20190628-ab454d07.pth) |
23 | | R-X101-64x4d  | pytorch    | 1x      | 10.5     | 1.214               |  6.4           | 42.0   | 39.1    | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_1x_20190628-dec32bda.pth) |
24 | | R-X101-64x4d  | pytorch    | 2x      | -       | -                    |  -             | 42.2   | 38.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_2x_20190525-c044c25a.pth) |
25 | 


--------------------------------------------------------------------------------
/tools/configs/ms_rcnn/README.md:
--------------------------------------------------------------------------------
 1 | # Mask Scoring R-CNN
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{huang2019msrcnn,
 7 |     title={Mask Scoring R-CNN},
 8 |     author={Zhaojin Huang and Lichao Huang and Yongchao Gong and Chang Huang and Xinggang Wang},
 9 |     booktitle={IEEE Conference on Computer Vision and Pattern Recognition},
10 |     year={2019},
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | | Backbone      | style      | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | mask AP | Download |
17 | |:-------------:|:----------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:-------:|:--------:|
18 | | R-50-FPN      | caffe      | 1x      | 4.3      | 0.537               | 10.1           | 37.4   | 35.5    | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_1x_20190624-619934b5.pth) |
19 | | R-50-FPN      | caffe      | 2x      | -        | -                   | -              | 38.2   | 35.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r50_caffe_fpn_2x_20190525-a07be31e.pth) |
20 | | R-101-FPN     | caffe      | 1x      | 6.2      | 0.682               |  9.1           | 39.8   | 37.2    | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_1x_20190624-677a5548.pth) |
21 | | R-101-FPN     | caffe      | 2x      | -        | -                   |  -             | 40.7   | 37.8    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_r101_caffe_fpn_2x_20190525-4aee1528.pth) |
22 | | R-X101-32x4d  | pytorch    | 2x      | 7.6      | 0.844               |  8.0           | 41.7   | 38.5    | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_32x4d_fpn_2x_20190628-ab454d07.pth) |
23 | | R-X101-64x4d  | pytorch    | 1x      | 10.5     | 1.214               |  6.4           | 42.0   | 39.1    | [model](https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_1x_20190628-dec32bda.pth) |
24 | | R-X101-64x4d  | pytorch    | 2x      | -       | -                    |  -             | 42.2   | 38.9    | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ms-rcnn/ms_rcnn_x101_64x4d_fpn_2x_20190525-c044c25a.pth) |
25 | 


--------------------------------------------------------------------------------
/tools/draw_eAP_sensitivity_eap.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import matplotlib
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import math
 6 | from matplotlib.ticker import FormatStrFormatter
 7 | from matplotlib import scale as mscale
 8 | from matplotlib import transforms as mtransforms
 9 | 
10 | 
11 | epoch_results_ours =  [0.2055814944259203, 0.2070789429222732, 0.21312734490544888, 0.21330987617112582, 0.21681729633603414, 0.21868618147095492, 0.22139531986572789, 0.2207302553866501]
12 | epoch_results_imgsample = [0.20085911273955764, 0.20221658896725567, 0.2056222469806897, 0.2051128644774435, 0.208011478430485, 0.2117306883053619, 0.21469247380489614, 0.21509586079595858]
13 | z = [8,9,10,11,12,13,14,15]
14 | 
15 | # fig = plt.figure(figsize=(8,5))
16 | fig = plt.figure()
17 | ax1 = fig.add_subplot(111)
18 | 
19 | matplotlib.rcParams.update({'font.size': 16})
20 | ax1.plot(z, epoch_results_ours, marker='o', linewidth=2, color='darkorange',  label='r50-ours')
21 | ax1.plot(z, epoch_results_imgsample, marker='o', linewidth=2, color='blue',  label='r50-IS')
22 | 
23 | 
24 | 
25 | # ax1.plot([0],[15.4], 'D', color = 'green')
26 | 
27 | plt.xlabel('bAP f value (m=3)', size=16)
28 | plt.ylabel('bAP', size=16)
29 | # ax1.set_xscale('log')
30 | 
31 | plt.legend( loc='best')
32 | 
33 | plt.grid()
34 | plt.savefig('eap_sensitivity_eap_f.eps', format='eps', dpi=1000)
35 | plt.show()
36 | 
37 | ### eap m value
38 | # [0.19348653350208908, 0.20481586368658788, 0.20629655179889703]
39 | # [0.1960601492969575, 0.20901640943344768, 0.211437803122666]
40 | 
41 | epoch_results_imgsample = [0.1905114721878586, 0.2056222469806897, 0.2249353584074827]
42 | epoch_results_ours = [0.19661301291002395, 0.21312734490544888, 0.2335877606226477]
43 | 
44 | z = [2,3,4]
45 | 
46 | fig = plt.figure()
47 | ax1 = fig.add_subplot(111)
48 | 
49 | matplotlib.rcParams.update({'font.size': 16})
50 | ax1.plot(z, epoch_results_ours, marker='o', linewidth=3, color='darkorange',  label='r50-ours')
51 | ax1.plot(z, epoch_results_imgsample, marker='o', linewidth=3, color='blue',  label='r50-IS')
52 | 
53 | plt.xticks(np.arange(2, 5, step=1))
54 | 
55 | # ax1.plot([0],[15.4], 'D', color = 'green')
56 | 
57 | plt.xlabel('bAP m value (f=10)', size=16)
58 | plt.ylabel('bAP', size=16)
59 | # ax1.set_xscale('log')
60 | 
61 | plt.legend( loc='best')
62 | 
63 | plt.grid()
64 | plt.savefig('eap_sensitivity_eap_m.eps', format='eps', dpi=1000)
65 | plt.show()


--------------------------------------------------------------------------------
/mmdet/core/bbox/geometry.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
 5 |     """Calculate overlap between two set of bboxes.
 6 | 
 7 |     If ``is_aligned`` is ``False``, then calculate the ious between each bbox
 8 |     of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
 9 |     bboxes1 and bboxes2.
10 | 
11 |     Args:
12 |         bboxes1 (Tensor): shape (m, 4)
13 |         bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n
14 |             must be equal.
15 |         mode (str): "iou" (intersection over union) or iof (intersection over
16 |             foreground).
17 | 
18 |     Returns:
19 |         ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
20 |     """
21 | 
22 |     assert mode in ['iou', 'iof']
23 | 
24 |     rows = bboxes1.size(0)
25 |     cols = bboxes2.size(0)
26 |     if is_aligned:
27 |         assert rows == cols
28 | 
29 |     if rows * cols == 0:
30 |         return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
31 | 
32 |     if is_aligned:
33 |         lt = torch.max(bboxes1[:, :2], bboxes2[:, :2])  # [rows, 2]
34 |         rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:])  # [rows, 2]
35 | 
36 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, 2]
37 |         overlap = wh[:, 0] * wh[:, 1]
38 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
39 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
40 | 
41 |         if mode == 'iou':
42 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
43 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
44 |             ious = overlap / (area1 + area2 - overlap)
45 |         else:
46 |             ious = overlap / area1
47 |     else:
48 |         lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2])  # [rows, cols, 2]
49 |         rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:])  # [rows, cols, 2]
50 | 
51 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, cols, 2]
52 |         overlap = wh[:, :, 0] * wh[:, :, 1]
53 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
54 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
55 | 
56 |         if mode == 'iou':
57 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
58 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
59 |             ious = overlap / (area1[:, None] + area2 - overlap)
60 |         else:
61 |             ious = overlap / (area1[:, None])
62 | 
63 |     return ious
64 | 


--------------------------------------------------------------------------------
/mmdet/datasets/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import Sequence
 2 | 
 3 | import matplotlib.pyplot as plt
 4 | import mmcv
 5 | import numpy as np
 6 | import torch
 7 | 
 8 | 
 9 | def to_tensor(data):
10 |     """Convert objects of various python types to :obj:`torch.Tensor`.
11 | 
12 |     Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
13 |     :class:`Sequence`, :class:`int` and :class:`float`.
14 |     """
15 |     if isinstance(data, torch.Tensor):
16 |         return data
17 |     elif isinstance(data, np.ndarray):
18 |         return torch.from_numpy(data)
19 |     elif isinstance(data, Sequence) and not mmcv.is_str(data):
20 |         return torch.tensor(data)
21 |     elif isinstance(data, int):
22 |         return torch.LongTensor([data])
23 |     elif isinstance(data, float):
24 |         return torch.FloatTensor([data])
25 |     else:
26 |         raise TypeError('type {} cannot be converted to tensor.'.format(
27 |             type(data)))
28 | 
29 | 
30 | def random_scale(img_scales, mode='range'):
31 |     """Randomly select a scale from a list of scales or scale ranges.
32 | 
33 |     Args:
34 |         img_scales (list[tuple]): Image scale or scale range.
35 |         mode (str): "range" or "value".
36 | 
37 |     Returns:
38 |         tuple: Sampled image scale.
39 |     """
40 |     num_scales = len(img_scales)
41 |     if num_scales == 1:  # fixed scale is specified
42 |         img_scale = img_scales[0]
43 |     elif num_scales == 2:  # randomly sample a scale
44 |         if mode == 'range':
45 |             img_scale_long = [max(s) for s in img_scales]
46 |             img_scale_short = [min(s) for s in img_scales]
47 |             long_edge = np.random.randint(
48 |                 min(img_scale_long),
49 |                 max(img_scale_long) + 1)
50 |             short_edge = np.random.randint(
51 |                 min(img_scale_short),
52 |                 max(img_scale_short) + 1)
53 |             img_scale = (long_edge, short_edge)
54 |         elif mode == 'value':
55 |             img_scale = img_scales[np.random.randint(num_scales)]
56 |     else:
57 |         if mode != 'value':
58 |             raise ValueError(
59 |                 'Only "value" mode supports more than 2 image scales')
60 |         img_scale = img_scales[np.random.randint(num_scales)]
61 |     return img_scale
62 | 
63 | 
64 | def show_ann(coco, img, ann_info):
65 |     plt.imshow(mmcv.bgr2rgb(img))
66 |     plt.axis('off')
67 |     coco.showAnns(ann_info)
68 |     plt.show()
69 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/build_loader.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | from functools import partial
 3 | 
 4 | from mmcv.runner import get_dist_info
 5 | from mmcv.parallel import collate
 6 | from torch.utils.data import DataLoader
 7 | 
 8 | from .sampler import GroupSampler, DistributedGroupSampler, DistributedSampler, EpisodicSampler
 9 | 
10 | if platform.system() != 'Windows':
11 |     # https://github.com/pytorch/pytorch/issues/973
12 |     import resource
13 |     rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
14 |     resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
15 | 
16 | 
17 | def build_dataloader(dataset,
18 |                      imgs_per_gpu,
19 |                      workers_per_gpu,
20 |                      num_gpus=1,
21 |                      dist=True,
22 |                      cls_balanced_sampler=False,
23 |                      shuffle=True,
24 |                      **kwargs):
25 |     if dist:
26 |         rank, world_size = get_dist_info()
27 |         if shuffle:
28 |             sampler = DistributedGroupSampler(dataset, imgs_per_gpu,
29 |                                               world_size, rank)
30 |         else:
31 |             sampler = DistributedSampler(
32 |                 dataset, world_size, rank, shuffle=False)
33 |         batch_size = imgs_per_gpu
34 |         num_workers = workers_per_gpu
35 |     else:
36 |         # sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None
37 |         # batch_size = num_gpus * imgs_per_gpu
38 |         # num_workers = num_gpus * workers_per_gpu
39 | 
40 |         # sampler = GroupSampler_addrepeat(dataset, imgs_per_gpu) if shuffle else None
41 |         # batch_size = num_gpus * imgs_per_gpu
42 |         # num_workers = num_gpus * workers_per_gpu
43 | 
44 |         if cls_balanced_sampler==True:
45 |             batch_size = num_gpus * imgs_per_gpu
46 |             sampler = EpisodicSampler(dataset, batch_size, nc=16, episode=1000) if shuffle else None
47 |             num_workers = num_gpus * workers_per_gpu
48 |         else:
49 |             sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None
50 |             batch_size = num_gpus * imgs_per_gpu
51 |             num_workers = num_gpus * workers_per_gpu
52 | 
53 |     data_loader = DataLoader(
54 |         dataset,
55 |         batch_size=batch_size,
56 |         sampler=sampler,
57 |         num_workers=num_workers,
58 |         collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
59 |         pin_memory=False,
60 |         **kwargs)
61 | 
62 |     return data_loader
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/random_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .base_sampler import BaseSampler
 5 | 
 6 | 
 7 | class RandomSampler(BaseSampler):
 8 | 
 9 |     def __init__(self,
10 |                  num,
11 |                  pos_fraction,
12 |                  neg_pos_ub=-1,
13 |                  add_gt_as_proposals=True,
14 |                  **kwargs):
15 |         super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
16 |                                             add_gt_as_proposals)
17 | 
18 |     @staticmethod
19 |     def random_choice(gallery, num):
20 |         """Random select some elements from the gallery.
21 | 
22 |         It seems that Pytorch's implementation is slower than numpy so we use
23 |         numpy to randperm the indices.
24 |         """
25 |         assert len(gallery) >= num
26 |         if isinstance(gallery, list):
27 |             gallery = np.array(gallery)
28 |         cands = np.arange(len(gallery))
29 |         np.random.shuffle(cands)
30 |         rand_inds = cands[:num]
31 |         if not isinstance(gallery, np.ndarray):
32 |             rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
33 |         return gallery[rand_inds]
34 | 
35 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
36 |         """Randomly sample some positive samples."""
37 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
38 |         if pos_inds.numel() != 0:
39 |             pos_inds = pos_inds.squeeze(1)
40 |         if pos_inds.numel() <= num_expected:
41 |             return pos_inds
42 |         else:
43 |             return self.random_choice(pos_inds, num_expected)
44 | 
45 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
46 |         """Randomly sample some negative samples."""
47 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
48 |         if neg_inds.numel() != 0:
49 |             neg_inds = neg_inds.squeeze(1)
50 |         if len(neg_inds) <= num_expected:
51 |             return neg_inds
52 |         else:
53 |             return self.random_choice(neg_inds, num_expected)
54 | 
55 |     def _sample_neg_lowthr(self, assign_result, num_expected, lowthr=0.1, **kwargs):
56 |         neg_inds = torch.nonzero((assign_result.max_overlaps>=0.1)*(assign_result.gt_inds == 0))
57 |         if neg_inds.numel() != 0:
58 |             neg_inds = neg_inds.squeeze(1)
59 |         if len(neg_inds) <= num_expected:
60 |             return neg_inds
61 |         else:
62 |             return self.random_choice(neg_inds, num_expected)
63 | 


--------------------------------------------------------------------------------
/mmdet/models/shared_heads/res_layer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import torch.nn as nn
 4 | from mmcv.cnn import constant_init, kaiming_init
 5 | from mmcv.runner import load_checkpoint
 6 | 
 7 | from mmdet.core import auto_fp16
 8 | from ..backbones import ResNet, make_res_layer
 9 | from ..registry import SHARED_HEADS
10 | 
11 | 
12 | @SHARED_HEADS.register_module
13 | class ResLayer(nn.Module):
14 | 
15 |     def __init__(self,
16 |                  depth,
17 |                  stage=3,
18 |                  stride=2,
19 |                  dilation=1,
20 |                  style='pytorch',
21 |                  norm_cfg=dict(type='BN', requires_grad=True),
22 |                  norm_eval=True,
23 |                  with_cp=False,
24 |                  dcn=None):
25 |         super(ResLayer, self).__init__()
26 |         self.norm_eval = norm_eval
27 |         self.norm_cfg = norm_cfg
28 |         self.stage = stage
29 |         self.fp16_enabled = False
30 |         block, stage_blocks = ResNet.arch_settings[depth]
31 |         stage_block = stage_blocks[stage]
32 |         planes = 64 * 2**stage
33 |         inplanes = 64 * 2**(stage - 1) * block.expansion
34 | 
35 |         res_layer = make_res_layer(
36 |             block,
37 |             inplanes,
38 |             planes,
39 |             stage_block,
40 |             stride=stride,
41 |             dilation=dilation,
42 |             style=style,
43 |             with_cp=with_cp,
44 |             norm_cfg=self.norm_cfg,
45 |             dcn=dcn)
46 |         self.add_module('layer{}'.format(stage + 1), res_layer)
47 | 
48 |     def init_weights(self, pretrained=None):
49 |         if isinstance(pretrained, str):
50 |             logger = logging.getLogger()
51 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
52 |         elif pretrained is None:
53 |             for m in self.modules():
54 |                 if isinstance(m, nn.Conv2d):
55 |                     kaiming_init(m)
56 |                 elif isinstance(m, nn.BatchNorm2d):
57 |                     constant_init(m, 1)
58 |         else:
59 |             raise TypeError('pretrained must be a str or None')
60 | 
61 |     @auto_fp16()
62 |     def forward(self, x):
63 |         res_layer = getattr(self, 'layer{}'.format(self.stage + 1))
64 |         out = res_layer(x)
65 |         return out
66 | 
67 |     def train(self, mode=True):
68 |         super(ResLayer, self).train(mode)
69 |         if self.norm_eval:
70 |             for m in self.modules():
71 |                 if isinstance(m, nn.BatchNorm2d):
72 |                     m.eval()
73 | 


--------------------------------------------------------------------------------
/mmdet/utils/registry.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | 
 3 | import mmcv
 4 | 
 5 | 
 6 | class Registry(object):
 7 | 
 8 |     def __init__(self, name):
 9 |         self._name = name
10 |         self._module_dict = dict()
11 | 
12 |     def __repr__(self):
13 |         format_str = self.__class__.__name__ + '(name={}, items={})'.format(
14 |             self._name, list(self._module_dict.keys()))
15 |         return format_str
16 | 
17 |     @property
18 |     def name(self):
19 |         return self._name
20 | 
21 |     @property
22 |     def module_dict(self):
23 |         return self._module_dict
24 | 
25 |     def get(self, key):
26 |         return self._module_dict.get(key, None)
27 | 
28 |     def _register_module(self, module_class):
29 |         """Register a module.
30 | 
31 |         Args:
32 |             module (:obj:`nn.Module`): Module to be registered.
33 |         """
34 |         if not inspect.isclass(module_class):
35 |             raise TypeError('module must be a class, but got {}'.format(
36 |                 type(module_class)))
37 |         module_name = module_class.__name__
38 |         if module_name in self._module_dict:
39 |             raise KeyError('{} is already registered in {}'.format(
40 |                 module_name, self.name))
41 |         self._module_dict[module_name] = module_class
42 | 
43 |     def register_module(self, cls):
44 |         self._register_module(cls)
45 |         return cls
46 | 
47 | 
48 | def build_from_cfg(cfg, registry, default_args=None):
49 |     """Build a module from config dict.
50 | 
51 |     Args:
52 |         cfg (dict): Config dict. It should at least contain the key "type".
53 |         registry (:obj:`Registry`): The registry to search the type from.
54 |         default_args (dict, optional): Default initialization arguments.
55 | 
56 |     Returns:
57 |         obj: The constructed object.
58 |     """
59 |     assert isinstance(cfg, dict) and 'type' in cfg
60 |     assert isinstance(default_args, dict) or default_args is None
61 |     args = cfg.copy()
62 |     obj_type = args.pop('type')
63 |     if mmcv.is_str(obj_type):
64 |         obj_type = registry.get(obj_type)
65 |         if obj_type is None:
66 |             raise KeyError('{} is not in the {} registry'.format(
67 |                 obj_type, registry.name))
68 |     elif not inspect.isclass(obj_type):
69 |         raise TypeError('type must be a str or valid type, but got {}'.format(
70 |             type(obj_type)))
71 |     if default_args is not None:
72 |         for name, value in default_args.items():
73 |             args.setdefault(name, value)
74 |     return obj_type(**args)
75 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/src/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | template <typename scalar_t>
 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
 6 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
 7 | 
 8 |   if (dets.numel() == 0) {
 9 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
10 |   }
11 | 
12 |   auto x1_t = dets.select(1, 0).contiguous();
13 |   auto y1_t = dets.select(1, 1).contiguous();
14 |   auto x2_t = dets.select(1, 2).contiguous();
15 |   auto y2_t = dets.select(1, 3).contiguous();
16 |   auto scores = dets.select(1, 4).contiguous();
17 | 
18 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
19 | 
20 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
21 | 
22 |   auto ndets = dets.size(0);
23 |   at::Tensor suppressed_t =
24 |       at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
25 | 
26 |   auto suppressed = suppressed_t.data<uint8_t>();
27 |   auto order = order_t.data<int64_t>();
28 |   auto x1 = x1_t.data<scalar_t>();
29 |   auto y1 = y1_t.data<scalar_t>();
30 |   auto x2 = x2_t.data<scalar_t>();
31 |   auto y2 = y2_t.data<scalar_t>();
32 |   auto areas = areas_t.data<scalar_t>();
33 | 
34 |   for (int64_t _i = 0; _i < ndets; _i++) {
35 |     auto i = order[_i];
36 |     if (suppressed[i] == 1) continue;
37 |     auto ix1 = x1[i];
38 |     auto iy1 = y1[i];
39 |     auto ix2 = x2[i];
40 |     auto iy2 = y2[i];
41 |     auto iarea = areas[i];
42 | 
43 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
44 |       auto j = order[_j];
45 |       if (suppressed[j] == 1) continue;
46 |       auto xx1 = std::max(ix1, x1[j]);
47 |       auto yy1 = std::max(iy1, y1[j]);
48 |       auto xx2 = std::min(ix2, x2[j]);
49 |       auto yy2 = std::min(iy2, y2[j]);
50 | 
51 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
52 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
53 |       auto inter = w * h;
54 |       auto ovr = inter / (iarea + areas[j] - inter);
55 |       if (ovr >= threshold) suppressed[j] = 1;
56 |     }
57 |   }
58 |   return at::nonzero(suppressed_t == 0).squeeze(1);
59 | }
60 | 
61 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
62 |   at::Tensor result;
63 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
64 |     result = nms_cpu_kernel<scalar_t>(dets, threshold);
65 |   });
66 |   return result;
67 | }
68 | 
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 |   m.def("nms", &nms, "non-maximum suppression");
71 | }


--------------------------------------------------------------------------------
/mmdet/core/post_processing/bbox_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmdet.ops.nms import nms_wrapper
 4 | 
 5 | 
 6 | def multiclass_nms(multi_bboxes,
 7 |                    multi_scores,
 8 |                    score_thr,
 9 |                    nms_cfg,
10 |                    max_num=-1,
11 |                    score_factors=None):
12 |     """NMS for multi-class bboxes.
13 | 
14 |     Args:
15 |         multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
16 |         multi_scores (Tensor): shape (n, #class)
17 |         score_thr (float): bbox threshold, bboxes with scores lower than it
18 |             will not be considered.
19 |         nms_thr (float): NMS IoU threshold
20 |         max_num (int): if there are more than max_num bboxes after NMS,
21 |             only top max_num will be kept.
22 |         score_factors (Tensor): The factors multiplied to scores before
23 |             applying NMS
24 | 
25 |     Returns:
26 |         tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
27 |             are 0-based.
28 |     """
29 |     num_classes = multi_scores.shape[1]
30 |     bboxes, labels = [], []
31 |     nms_cfg_ = nms_cfg.copy()
32 |     nms_type = nms_cfg_.pop('type', 'nms')
33 |     nms_op = getattr(nms_wrapper, nms_type)
34 |     for i in range(1, num_classes):
35 |         cls_inds = multi_scores[:, i] > score_thr
36 |         if not cls_inds.any():
37 |             continue
38 |         # get bboxes and scores of this class
39 |         if multi_bboxes.shape[1] == 4:
40 |             _bboxes = multi_bboxes[cls_inds, :]
41 |         else:
42 |             _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
43 |         _scores = multi_scores[cls_inds, i]
44 |         if score_factors is not None:
45 |             _scores *= score_factors[cls_inds]
46 |         cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
47 |         cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
48 |         cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ),
49 |                                            i - 1,
50 |                                            dtype=torch.long)
51 |         bboxes.append(cls_dets)
52 |         labels.append(cls_labels)
53 |     if bboxes:
54 |         bboxes = torch.cat(bboxes)
55 |         labels = torch.cat(labels)
56 |         if bboxes.shape[0] > max_num:
57 |             _, inds = bboxes[:, -1].sort(descending=True)
58 |             inds = inds[:max_num]
59 |             bboxes = bboxes[inds]
60 |             labels = labels[inds]
61 |     else:
62 |         bboxes = multi_bboxes.new_zeros((0, 5))
63 |         labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
64 | 
65 |     return bboxes, labels
66 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/single_stage.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from mmdet.core import bbox2result
 4 | from .. import builder
 5 | from ..registry import DETECTORS
 6 | from .base import BaseDetector
 7 | 
 8 | 
 9 | @DETECTORS.register_module
10 | class SingleStageDetector(BaseDetector):
11 | 
12 |     def __init__(self,
13 |                  backbone,
14 |                  neck=None,
15 |                  bbox_head=None,
16 |                  train_cfg=None,
17 |                  test_cfg=None,
18 |                  pretrained=None):
19 |         super(SingleStageDetector, self).__init__()
20 |         self.backbone = builder.build_backbone(backbone)
21 |         if neck is not None:
22 |             self.neck = builder.build_neck(neck)
23 |         self.bbox_head = builder.build_head(bbox_head)
24 |         self.train_cfg = train_cfg
25 |         self.test_cfg = test_cfg
26 |         self.init_weights(pretrained=pretrained)
27 | 
28 |     def init_weights(self, pretrained=None):
29 |         super(SingleStageDetector, self).init_weights(pretrained)
30 |         self.backbone.init_weights(pretrained=pretrained)
31 |         if self.with_neck:
32 |             if isinstance(self.neck, nn.Sequential):
33 |                 for m in self.neck:
34 |                     m.init_weights()
35 |             else:
36 |                 self.neck.init_weights()
37 |         self.bbox_head.init_weights()
38 | 
39 |     def extract_feat(self, img):
40 |         x = self.backbone(img)
41 |         if self.with_neck:
42 |             x = self.neck(x)
43 |         return x
44 | 
45 |     def forward_train(self,
46 |                       img,
47 |                       img_metas,
48 |                       gt_bboxes,
49 |                       gt_labels,
50 |                       gt_bboxes_ignore=None):
51 |         x = self.extract_feat(img)
52 |         outs = self.bbox_head(x)
53 |         loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
54 |         losses = self.bbox_head.loss(
55 |             *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
56 |         return losses
57 | 
58 |     def simple_test(self, img, img_meta, rescale=False):
59 |         x = self.extract_feat(img)
60 |         outs = self.bbox_head(x)
61 |         bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
62 |         bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
63 |         bbox_results = [
64 |             bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
65 |             for det_bboxes, det_labels in bbox_list
66 |         ]
67 |         return bbox_results[0]
68 | 
69 |     def aug_test(self, imgs, img_metas, rescale=False):
70 |         raise NotImplementedError
71 | 


--------------------------------------------------------------------------------
/lvis_api/README.md:
--------------------------------------------------------------------------------
 1 | # <img src="images/lvis_icon.svg" height="40"> LVIS API
 2 | 
 3 | 
 4 | LVIS (pronounced ‘el-vis’): is a new dataset for Large Vocabulary Instance Segmentation.
 5 | When complete, it will feature more than 2 million high-quality instance segmentation masks for over 1200 entry-level object categories in 164k images. The LVIS API enables reading and interacting with annotation files, visualizing annotations, and evaluating results.
 6 | 
 7 | <img src="images/examples.png"/>
 8 | 
 9 | ## LVIS v0.5
10 | 
11 | LVIS v0.5 marks the halfway point in data collection. For this release, we have annotated an additional 82k images (57k train, 20k test, 5k val). Release v0.5 is publicly available at [LVIS website](http://www.lvisdataset.org) and will be used in the first LVIS Challenge to be held in conjunction with the COCO Workshop at ICCV 2019.
12 | 
13 | ## Setup
14 | You can setup a virtual environment and then install `lvisapi` using pip:
15 | 
16 | ```bash
17 | python3 -m venv env               # Create a virtual environment
18 | source env/bin/activate           # Activate virtual environment
19 | 
20 | # install COCO API. COCO API requires numpy to install. Ensure that you installed numpy.
21 | pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
22 | # install LVIS API
23 | pip install lvis
24 | # Work for a while ...
25 | deactivate  # Exit virtual environment
26 | ```
27 | 
28 | You can also clone the repo first and then do the following steps inside the repo:
29 | ```bash
30 | python3 -m venv env               # Create a virtual environment
31 | source env/bin/activate           # Activate virtual environment
32 | 
33 | # install COCO API. COCO API requires numpy to install. Ensure that you installed numpy.
34 | pip install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
35 | # install LVIS API
36 | pip install .
37 | # test if the installation was correct
38 | python test.py
39 | # Work for a while ...
40 | deactivate  # Exit virtual environment
41 | ```
42 | ## Citing LVIS
43 | 
44 | If you find this code/data useful in your research then please cite our [paper](http://www.lvisdataset.org/assets/lvis_v0.5.pdf):
45 | ```
46 | @inproceedings{gupta2019lvis,
47 |   title={{LVIS}: A Dataset for Large Vocabulary Instance Segmentation},
48 |   author={Gupta, Agrim and Dollar, Piotr and Girshick, Ross},
49 |   booktitle={Proceedings of the {IEEE} Conference on Computer Vision and Pattern Recognition},
50 |   year={2019}
51 | }
52 | ```
53 | 
54 | ## Credit
55 | 
56 | The code is a re-write of PythonAPI for [COCO](https://github.com/cocodataset/cocoapi).
57 | The core functionality is the same with LVIS specific changes.  
58 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from . import roi_pool_cuda
 8 | 
 9 | 
10 | class RoIPoolFunction(Function):
11 | 
12 |     @staticmethod
13 |     def forward(ctx, features, rois, out_size, spatial_scale):
14 |         assert features.is_cuda
15 |         out_h, out_w = _pair(out_size)
16 |         assert isinstance(out_h, int) and isinstance(out_w, int)
17 |         ctx.save_for_backward(rois)
18 |         num_channels = features.size(1)
19 |         num_rois = rois.size(0)
20 |         out_size = (num_rois, num_channels, out_h, out_w)
21 |         output = features.new_zeros(out_size)
22 |         argmax = features.new_zeros(out_size, dtype=torch.int)
23 |         roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale,
24 |                               output, argmax)
25 |         ctx.spatial_scale = spatial_scale
26 |         ctx.feature_size = features.size()
27 |         ctx.argmax = argmax
28 | 
29 |         return output
30 | 
31 |     @staticmethod
32 |     @once_differentiable
33 |     def backward(ctx, grad_output):
34 |         assert grad_output.is_cuda
35 |         spatial_scale = ctx.spatial_scale
36 |         feature_size = ctx.feature_size
37 |         argmax = ctx.argmax
38 |         rois = ctx.saved_tensors[0]
39 |         assert feature_size is not None
40 | 
41 |         grad_input = grad_rois = None
42 |         if ctx.needs_input_grad[0]:
43 |             grad_input = grad_output.new_zeros(feature_size)
44 |             roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax,
45 |                                    spatial_scale, grad_input)
46 | 
47 |         return grad_input, grad_rois, None, None
48 | 
49 | 
50 | roi_pool = RoIPoolFunction.apply
51 | 
52 | 
53 | class RoIPool(nn.Module):
54 | 
55 |     def __init__(self, out_size, spatial_scale, use_torchvision=False):
56 |         super(RoIPool, self).__init__()
57 | 
58 |         self.out_size = _pair(out_size)
59 |         self.spatial_scale = float(spatial_scale)
60 |         self.use_torchvision = use_torchvision
61 | 
62 |     def forward(self, features, rois):
63 |         if self.use_torchvision:
64 |             from torchvision.ops import roi_pool as tv_roi_pool
65 |             return tv_roi_pool(features, rois, self.out_size,
66 |                                self.spatial_scale)
67 |         else:
68 |             return roi_pool(features, rois, self.out_size, self.spatial_scale)
69 | 
70 |     def __repr__(self):
71 |         format_str = self.__class__.__name__
72 |         format_str += '(out_size={}, spatial_scale={}'.format(
73 |             self.out_size, self.spatial_scale)
74 |         format_str += ', use_torchvision={})'.format(self.use_torchvision)
75 |         return format_str
76 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from . import nms_cpu, nms_cuda
 5 | from .soft_nms_cpu import soft_nms_cpu
 6 | 
 7 | 
 8 | def nms(dets, iou_thr, device_id=None):
 9 |     """Dispatch to either CPU or GPU NMS implementations.
10 | 
11 |     The input can be either a torch tensor or numpy array. GPU NMS will be used
12 |     if the input is a gpu tensor or device_id is specified, otherwise CPU NMS
13 |     will be used. The returned type will always be the same as inputs.
14 | 
15 |     Arguments:
16 |         dets (torch.Tensor or np.ndarray): bboxes with scores.
17 |         iou_thr (float): IoU threshold for NMS.
18 |         device_id (int, optional): when `dets` is a numpy array, if `device_id`
19 |             is None, then cpu nms is used, otherwise gpu_nms will be used.
20 | 
21 |     Returns:
22 |         tuple: kept bboxes and indice, which is always the same data type as
23 |             the input.
24 |     """
25 |     # convert dets (tensor or numpy array) to tensor
26 |     if isinstance(dets, torch.Tensor):
27 |         is_numpy = False
28 |         dets_th = dets
29 |     elif isinstance(dets, np.ndarray):
30 |         is_numpy = True
31 |         device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)
32 |         dets_th = torch.from_numpy(dets).to(device)
33 |     else:
34 |         raise TypeError(
35 |             'dets must be either a Tensor or numpy array, but got {}'.format(
36 |                 type(dets)))
37 | 
38 |     # execute cpu or cuda nms
39 |     if dets_th.shape[0] == 0:
40 |         inds = dets_th.new_zeros(0, dtype=torch.long)
41 |     else:
42 |         if dets_th.is_cuda:
43 |             inds = nms_cuda.nms(dets_th, iou_thr)
44 |         else:
45 |             inds = nms_cpu.nms(dets_th, iou_thr)
46 | 
47 |     if is_numpy:
48 |         inds = inds.cpu().numpy()
49 |     return dets[inds, :], inds
50 | 
51 | 
52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
53 |     if isinstance(dets, torch.Tensor):
54 |         is_tensor = True
55 |         dets_np = dets.detach().cpu().numpy()
56 |     elif isinstance(dets, np.ndarray):
57 |         is_tensor = False
58 |         dets_np = dets
59 |     else:
60 |         raise TypeError(
61 |             'dets must be either a Tensor or numpy array, but got {}'.format(
62 |                 type(dets)))
63 | 
64 |     method_codes = {'linear': 1, 'gaussian': 2}
65 |     if method not in method_codes:
66 |         raise ValueError('Invalid method for SoftNMS: {}'.format(method))
67 |     new_dets, inds = soft_nms_cpu(
68 |         dets_np,
69 |         iou_thr,
70 |         method=method_codes[method],
71 |         sigma=sigma,
72 |         min_score=min_score)
73 | 
74 |     if is_tensor:
75 |         return dets.new_tensor(new_dets), dets.new_tensor(
76 |             inds, dtype=torch.long)
77 |     else:
78 |         return new_dets.astype(np.float32), inds.astype(np.int64)
79 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/ohem_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..transforms import bbox2roi
 4 | from .base_sampler import BaseSampler
 5 | 
 6 | 
 7 | class OHEMSampler(BaseSampler):
 8 | 
 9 |     def __init__(self,
10 |                  num,
11 |                  pos_fraction,
12 |                  context,
13 |                  neg_pos_ub=-1,
14 |                  add_gt_as_proposals=True,
15 |                  **kwargs):
16 |         super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
17 |                                           add_gt_as_proposals)
18 |         if not hasattr(context, 'num_stages'):
19 |             self.bbox_roi_extractor = context.bbox_roi_extractor
20 |             self.bbox_head = context.bbox_head
21 |         else:
22 |             self.bbox_roi_extractor = context.bbox_roi_extractor[
23 |                 context.current_stage]
24 |             self.bbox_head = context.bbox_head[context.current_stage]
25 | 
26 |     def hard_mining(self, inds, num_expected, bboxes, labels, feats):
27 |         with torch.no_grad():
28 |             rois = bbox2roi([bboxes])
29 |             bbox_feats = self.bbox_roi_extractor(
30 |                 feats[:self.bbox_roi_extractor.num_inputs], rois)
31 |             cls_score, _ = self.bbox_head(bbox_feats)
32 |             loss = self.bbox_head.loss(
33 |                 cls_score=cls_score,
34 |                 bbox_pred=None,
35 |                 labels=labels,
36 |                 label_weights=cls_score.new_ones(cls_score.size(0)),
37 |                 bbox_targets=None,
38 |                 bbox_weights=None,
39 |                 reduction_override='none')['loss_cls']
40 |             _, topk_loss_inds = loss.topk(num_expected)
41 |         return inds[topk_loss_inds]
42 | 
43 |     def _sample_pos(self,
44 |                     assign_result,
45 |                     num_expected,
46 |                     bboxes=None,
47 |                     feats=None,
48 |                     **kwargs):
49 |         # Sample some hard positive samples
50 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
51 |         if pos_inds.numel() != 0:
52 |             pos_inds = pos_inds.squeeze(1)
53 |         if pos_inds.numel() <= num_expected:
54 |             return pos_inds
55 |         else:
56 |             return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
57 |                                     assign_result.labels[pos_inds], feats)
58 | 
59 |     def _sample_neg(self,
60 |                     assign_result,
61 |                     num_expected,
62 |                     bboxes=None,
63 |                     feats=None,
64 |                     **kwargs):
65 |         # Sample some hard negative samples
66 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
67 |         if neg_inds.numel() != 0:
68 |             neg_inds = neg_inds.squeeze(1)
69 |         if len(neg_inds) <= num_expected:
70 |             return neg_inds
71 |         else:
72 |             return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
73 |                                     assign_result.labels[neg_inds], feats)
74 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss
 5 | from ..registry import LOSSES
 6 | from .utils import weight_reduce_loss
 7 | 
 8 | 
 9 | # This method is only for debugging
10 | def py_sigmoid_focal_loss(pred,
11 |                           target,
12 |                           weight=None,
13 |                           gamma=2.0,
14 |                           alpha=0.25,
15 |                           reduction='mean',
16 |                           avg_factor=None):
17 |     pred_sigmoid = pred.sigmoid()
18 |     target = target.type_as(pred)
19 |     pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
20 |     focal_weight = (alpha * target + (1 - alpha) *
21 |                     (1 - target)) * pt.pow(gamma)
22 |     loss = F.binary_cross_entropy_with_logits(
23 |         pred, target, reduction='none') * focal_weight
24 |     loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
25 |     return loss
26 | 
27 | 
28 | def sigmoid_focal_loss(pred,
29 |                        target,
30 |                        weight=None,
31 |                        gamma=2.0,
32 |                        alpha=0.25,
33 |                        reduction='mean',
34 |                        avg_factor=None):
35 |     # Function.apply does not accept keyword arguments, so the decorator
36 |     # "weighted_loss" is not applicable
37 |     loss = _sigmoid_focal_loss(pred, target, gamma, alpha)
38 |     # TODO: find a proper way to handle the shape of weight
39 |     if weight is not None:
40 |         weight = weight.view(-1, 1)
41 |     loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
42 |     return loss
43 | 
44 | 
45 | @LOSSES.register_module
46 | class FocalLoss(nn.Module):
47 | 
48 |     def __init__(self,
49 |                  use_sigmoid=True,
50 |                  gamma=2.0,
51 |                  alpha=0.25,
52 |                  reduction='mean',
53 |                  loss_weight=1.0):
54 |         super(FocalLoss, self).__init__()
55 |         assert use_sigmoid is True, 'Only sigmoid focal loss supported now.'
56 |         self.use_sigmoid = use_sigmoid
57 |         self.gamma = gamma
58 |         self.alpha = alpha
59 |         self.reduction = reduction
60 |         self.loss_weight = loss_weight
61 | 
62 |     def forward(self,
63 |                 pred,
64 |                 target,
65 |                 weight=None,
66 |                 avg_factor=None,
67 |                 reduction_override=None):
68 |         assert reduction_override in (None, 'none', 'mean', 'sum')
69 |         reduction = (
70 |             reduction_override if reduction_override else self.reduction)
71 |         if self.use_sigmoid:
72 |             loss_cls = self.loss_weight * sigmoid_focal_loss(
73 |                 pred,
74 |                 target,
75 |                 weight,
76 |                 gamma=self.gamma,
77 |                 alpha=self.alpha,
78 |                 reduction=reduction,
79 |                 avg_factor=avg_factor)
80 |         else:
81 |             raise NotImplementedError
82 |         return loss_cls
83 | 


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int MaskedIm2colForwardLaucher(const at::Tensor im, const int height,
 7 |                                const int width, const int channels,
 8 |                                const int kernel_h, const int kernel_w,
 9 |                                const int pad_h, const int pad_w,
10 |                                const at::Tensor mask_h_idx,
11 |                                const at::Tensor mask_w_idx, const int mask_cnt,
12 |                                at::Tensor col);
13 | 
14 | int MaskedCol2imForwardLaucher(const at::Tensor col, const int height,
15 |                                const int width, const int channels,
16 |                                const at::Tensor mask_h_idx,
17 |                                const at::Tensor mask_w_idx, const int mask_cnt,
18 |                                at::Tensor im);
19 | 
20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
21 | #define CHECK_CONTIGUOUS(x) \
22 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
23 | #define CHECK_INPUT(x) \
24 |   CHECK_CUDA(x);       \
25 |   CHECK_CONTIGUOUS(x)
26 | 
27 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx,
28 |                                const at::Tensor mask_w_idx, const int kernel_h,
29 |                                const int kernel_w, const int pad_h,
30 |                                const int pad_w, at::Tensor col) {
31 |   CHECK_INPUT(im);
32 |   CHECK_INPUT(mask_h_idx);
33 |   CHECK_INPUT(mask_w_idx);
34 |   CHECK_INPUT(col);
35 |   // im: (n, ic, h, w), kernel size (kh, kw)
36 |   // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
37 | 
38 |   int channels = im.size(1);
39 |   int height = im.size(2);
40 |   int width = im.size(3);
41 |   int mask_cnt = mask_h_idx.size(0);
42 | 
43 |   MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w,
44 |                              pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt,
45 |                              col);
46 | 
47 |   return 1;
48 | }
49 | 
50 | int masked_col2im_forward_cuda(const at::Tensor col,
51 |                                const at::Tensor mask_h_idx,
52 |                                const at::Tensor mask_w_idx, int height,
53 |                                int width, int channels, at::Tensor im) {
54 |   CHECK_INPUT(col);
55 |   CHECK_INPUT(mask_h_idx);
56 |   CHECK_INPUT(mask_w_idx);
57 |   CHECK_INPUT(im);
58 |   // im: (n, ic, h, w), kernel size (kh, kw)
59 |   // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
60 | 
61 |   int mask_cnt = mask_h_idx.size(0);
62 | 
63 |   MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx,
64 |                              mask_w_idx, mask_cnt, im);
65 | 
66 |   return 1;
67 | }
68 | 
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 |   m.def("masked_im2col_forward", &masked_im2col_forward_cuda,
71 |         "masked_im2col forward (CUDA)");
72 |   m.def("masked_col2im_forward", &masked_col2im_forward_cuda,
73 |         "masked_col2im forward (CUDA)");
74 | }


--------------------------------------------------------------------------------
/configs/fcos/README.md:
--------------------------------------------------------------------------------
 1 | # FCOS: Fully Convolutional One-Stage Object Detection
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @article{tian2019fcos,
 7 |   title={FCOS: Fully Convolutional One-Stage Object Detection},
 8 |   author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},
 9 |   journal={arXiv preprint arXiv:1904.01355},
10 |   year={2019}
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | | Backbone  | Style   | GN  | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
17 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
18 | | R-50      | caffe   | N       | N       | 1x      | 5.5      | 0.373               | 13.7           | 35.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_1x_4gpu_20190516-a7cac5ff.pth) |
19 | | R-50      | caffe   | Y       | N       | 1x      | 6.9      | 0.396               | 13.6           | 36.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu_20190516-9f253a93.pth) |
20 | | R-50      | caffe   | Y       | N       | 2x      | -        | -                   | -              | 36.9   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_2x_4gpu_20190516_-93484354.pth) |
21 | | R-101     | caffe   | Y       | N       | 1x      | 10.4     | 0.558               | 11.6           | 39.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_1x_4gpu_20190516-e4889733.pth) |
22 | | R-101     | caffe   | Y       | N       | 2x      | -        | -                   | -              | 39.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_2x_4gpu_20190516-c03af97b.pth) |
23 | 
24 | 
25 | | Backbone  | Style   | GN  | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
26 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
27 | | R-50      | caffe   | Y       | Y       | 2x      | -        | -                   | -              | 38.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r50_caffe_fpn_gn_2x_4gpu_20190516-f7329d80.pth) |
28 | | R-101     | caffe   | Y       | Y       | 2x      | -        | -                   | -              | 40.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu_20190516-42e6f62d.pth) |
29 | | X-101     | caffe   | Y       | Y       | 2x      | 9.7      | 0.892               | 7.0            | 42.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x_20190516-a36c0872.pth) |
30 | 
31 | **Notes:**
32 | - To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU for R-50 and R-101 models, and 8 GPUs with 2 image/GPU for X-101 models.
33 | - The X-101 backbone is X-101-64x4d.
34 | 


--------------------------------------------------------------------------------
/tools/configs/fcos/README.md:
--------------------------------------------------------------------------------
 1 | # FCOS: Fully Convolutional One-Stage Object Detection
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @article{tian2019fcos,
 7 |   title={FCOS: Fully Convolutional One-Stage Object Detection},
 8 |   author={Tian, Zhi and Shen, Chunhua and Chen, Hao and He, Tong},
 9 |   journal={arXiv preprint arXiv:1904.01355},
10 |   year={2019}
11 | }
12 | ```
13 | 
14 | ## Results and Models
15 | 
16 | | Backbone  | Style   | GN  | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
17 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
18 | | R-50      | caffe   | N       | N       | 1x      | 5.5      | 0.373               | 13.7           | 35.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_1x_4gpu_20190516-a7cac5ff.pth) |
19 | | R-50      | caffe   | Y       | N       | 1x      | 6.9      | 0.396               | 13.6           | 36.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_1x_4gpu_20190516-9f253a93.pth) |
20 | | R-50      | caffe   | Y       | N       | 2x      | -        | -                   | -              | 36.9   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r50_caffe_fpn_gn_2x_4gpu_20190516_-93484354.pth) |
21 | | R-101     | caffe   | Y       | N       | 1x      | 10.4     | 0.558               | 11.6           | 39.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_1x_4gpu_20190516-e4889733.pth) |
22 | | R-101     | caffe   | Y       | N       | 2x      | -        | -                   | -              | 39.1   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_r101_caffe_fpn_gn_2x_4gpu_20190516-c03af97b.pth) |
23 | 
24 | 
25 | | Backbone  | Style   | GN  | MS train | Lr schd | Mem (GB) | Train time (s/iter) | Inf time (fps) | box AP | Download |
26 | |:---------:|:-------:|:-------:|:-------:|:-------:|:--------:|:-------------------:|:--------------:|:------:|:--------:|
27 | | R-50      | caffe   | Y       | Y       | 2x      | -        | -                   | -              | 38.7   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r50_caffe_fpn_gn_2x_4gpu_20190516-f7329d80.pth) |
28 | | R-101     | caffe   | Y       | Y       | 2x      | -        | -                   | -              | 40.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_r101_caffe_fpn_gn_2x_4gpu_20190516-42e6f62d.pth) |
29 | | X-101     | caffe   | Y       | Y       | 2x      | 9.7      | 0.892               | 7.0            | 42.8   | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/fcos/fcos_mstrain_640_800_x101_64x4d_fpn_gn_2x_20190516-a36c0872.pth) |
30 | 
31 | **Notes:**
32 | - To be consistent with the author's implementation, we use 4 GPUs with 4 images/GPU for R-50 and R-101 models, and 8 GPUs with 2 image/GPU for X-101 models.
33 | - The X-101 backbone is X-101-64x4d.
34 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/retina_head.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch.nn as nn
 3 | from mmcv.cnn import normal_init
 4 | 
 5 | from ..registry import HEADS
 6 | from ..utils import ConvModule, bias_init_with_prob
 7 | from .anchor_head import AnchorHead
 8 | 
 9 | 
10 | @HEADS.register_module
11 | class RetinaHead(AnchorHead):
12 | 
13 |     def __init__(self,
14 |                  num_classes,
15 |                  in_channels,
16 |                  stacked_convs=4,
17 |                  octave_base_scale=4,
18 |                  scales_per_octave=3,
19 |                  conv_cfg=None,
20 |                  norm_cfg=None,
21 |                  **kwargs):
22 |         self.stacked_convs = stacked_convs
23 |         self.octave_base_scale = octave_base_scale
24 |         self.scales_per_octave = scales_per_octave
25 |         self.conv_cfg = conv_cfg
26 |         self.norm_cfg = norm_cfg
27 |         octave_scales = np.array(
28 |             [2**(i / scales_per_octave) for i in range(scales_per_octave)])
29 |         anchor_scales = octave_scales * octave_base_scale
30 |         super(RetinaHead, self).__init__(
31 |             num_classes, in_channels, anchor_scales=anchor_scales, **kwargs)
32 | 
33 |     def _init_layers(self):
34 |         self.relu = nn.ReLU(inplace=True)
35 |         self.cls_convs = nn.ModuleList()
36 |         self.reg_convs = nn.ModuleList()
37 |         for i in range(self.stacked_convs):
38 |             chn = self.in_channels if i == 0 else self.feat_channels
39 |             self.cls_convs.append(
40 |                 ConvModule(
41 |                     chn,
42 |                     self.feat_channels,
43 |                     3,
44 |                     stride=1,
45 |                     padding=1,
46 |                     conv_cfg=self.conv_cfg,
47 |                     norm_cfg=self.norm_cfg))
48 |             self.reg_convs.append(
49 |                 ConvModule(
50 |                     chn,
51 |                     self.feat_channels,
52 |                     3,
53 |                     stride=1,
54 |                     padding=1,
55 |                     conv_cfg=self.conv_cfg,
56 |                     norm_cfg=self.norm_cfg))
57 |         self.retina_cls = nn.Conv2d(
58 |             self.feat_channels,
59 |             self.num_anchors * self.cls_out_channels,
60 |             3,
61 |             padding=1)
62 |         self.retina_reg = nn.Conv2d(
63 |             self.feat_channels, self.num_anchors * 4, 3, padding=1)
64 | 
65 |     def init_weights(self):
66 |         for m in self.cls_convs:
67 |             normal_init(m.conv, std=0.01)
68 |         for m in self.reg_convs:
69 |             normal_init(m.conv, std=0.01)
70 |         bias_cls = bias_init_with_prob(0.01)
71 |         normal_init(self.retina_cls, std=0.01, bias=bias_cls)
72 |         normal_init(self.retina_reg, std=0.01)
73 | 
74 |     def forward_single(self, x):
75 |         cls_feat = x
76 |         reg_feat = x
77 |         for cls_conv in self.cls_convs:
78 |             cls_feat = cls_conv(cls_feat)
79 |         for reg_conv in self.reg_convs:
80 |             reg_feat = reg_conv(reg_feat)
81 |         cls_score = self.retina_cls(cls_feat)
82 |         bbox_pred = self.retina_reg(reg_feat)
83 |         return cls_score, bbox_pred
84 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/base_sampler.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | import torch
 4 | 
 5 | from .sampling_result import SamplingResult
 6 | 
 7 | 
 8 | class BaseSampler(metaclass=ABCMeta):
 9 | 
10 |     def __init__(self,
11 |                  num,
12 |                  pos_fraction,
13 |                  neg_pos_ub=-1,
14 |                  add_gt_as_proposals=True,
15 |                  **kwargs):
16 |         self.num = num
17 |         self.pos_fraction = pos_fraction
18 |         self.neg_pos_ub = neg_pos_ub
19 |         self.add_gt_as_proposals = add_gt_as_proposals
20 |         self.pos_sampler = self
21 |         self.neg_sampler = self
22 | 
23 |     @abstractmethod
24 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
25 |         pass
26 | 
27 |     @abstractmethod
28 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
29 |         pass
30 | 
31 |     @abstractmethod
32 |     def _sample_neg_lowthr(self, assign_result, num_expected, lowthr=0.1, **kwargs):
33 |         pass
34 | 
35 |     def sample(self,
36 |                assign_result,
37 |                bboxes,
38 |                gt_bboxes,
39 |                gt_labels=None,
40 |                **kwargs):
41 |         """Sample positive and negative bboxes.
42 | 
43 |         This is a simple implementation of bbox sampling given candidates,
44 |         assigning results and ground truth bboxes.
45 | 
46 |         Args:
47 |             assign_result (:obj:`AssignResult`): Bbox assigning results.
48 |             bboxes (Tensor): Boxes to be sampled from.
49 |             gt_bboxes (Tensor): Ground truth bboxes.
50 |             gt_labels (Tensor, optional): Class labels of ground truth bboxes.
51 | 
52 |         Returns:
53 |             :obj:`SamplingResult`: Sampling result.
54 |         """
55 |         bboxes = bboxes[:, :4]
56 | 
57 |         gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
58 |         if self.add_gt_as_proposals:
59 |             bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
60 |             assign_result.add_gt_(gt_labels)
61 |             gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
62 |             gt_flags = torch.cat([gt_ones, gt_flags])
63 | 
64 |         num_expected_pos = int(self.num * self.pos_fraction)
65 |         pos_inds = self.pos_sampler._sample_pos(
66 |             assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
67 |         # We found that sampled indices have duplicated items occasionally.
68 |         # (may be a bug of PyTorch)
69 |         pos_inds = pos_inds.unique()
70 |         num_sampled_pos = pos_inds.numel()
71 |         num_expected_neg = self.num - num_sampled_pos
72 |         if self.neg_pos_ub >= 0:
73 |             _pos = max(1, num_sampled_pos)
74 |             neg_upper_bound = int(self.neg_pos_ub * _pos)
75 |             if num_expected_neg > neg_upper_bound:
76 |                 num_expected_neg = neg_upper_bound
77 |         neg_inds = self.neg_sampler._sample_neg_lowthr(
78 |             assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
79 |         neg_inds = neg_inds.unique()
80 | 
81 |         return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
82 |                               assign_result, gt_flags)
83 | 


--------------------------------------------------------------------------------
/configs/hrnet/README.md:
--------------------------------------------------------------------------------
 1 | # High-resolution networks (HRNets) for object detection
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{SunXLW19,
 7 |   title={Deep High-Resolution Representation Learning for Human Pose Estimation},
 8 |   author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang},
 9 |   booktitle={CVPR},
10 |   year={2019}
11 | }
12 | 
13 | @article{SunZJCXLMWLW19,
14 |   title={High-Resolution Representations for Labeling Pixels and Regions},
15 |   author={Ke Sun and Yang Zhao and Borui Jiang and Tianheng Cheng and Bin Xiao 
16 |   and Dong Liu and Yadong Mu and Xinggang Wang and Wenyu Liu and Jingdong Wang},
17 |   journal   = {CoRR},
18 |   volume    = {abs/1904.04514},
19 |   year={2019}
20 | }
21 | ```
22 | 
23 | ## Results and Models
24 | 
25 | Faster R-CNN
26 | 
27 | | Backbone|#Params|GFLOPs|Lr sched|mAP|Download|
28 | | :--:|:--:|:--:|:--:|:--:|:--:|
29 | | HRNetV2-W18 |26.2M|159.1| 1x | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_1x_20190522-e368c387.pth)|
30 | | HRNetV2-W18 |26.2M|159.1| 20-23-24e | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-ed3c0293.pth)|
31 | | HRNetV2-W32 |45.0M|245.3| 1x | 39.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_1x_20190522-d22f1fef.pth)|
32 | | HRNetV2-W32 |45.0M|245.3| 20-23-24e | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-2d67a5eb.pth)|
33 | | HRNetV2-W40 |60.5M|314.9| 1x | 40.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_1x_20190522-30502318.pth)|
34 | | HRNetV2-W40 |60.5M|314.9| 20-23-24e | 41.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_20_23_24e_20190522-050a7c7f.pth)|
35 | 
36 | 
37 | Mask R-CNN
38 | 
39 | |Backbone|Lr sched|mask mAP|box mAP|Download|
40 | |:--:|:--:|:--:|:--:|:--:|
41 | | HRNetV2-W18 | 1x | 34.2 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_1x_20190522-c8ad459f.pth)|
42 | | HRNetV2-W18 | 20-23-24e | 35.7 | 39.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-5c11b7f2.pth)|
43 | | HRNetV2-W32 | 1x | 36.8 | 40.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_1x_20190522-374aaa00.pth)|
44 | | HRNetV2-W32 | 20-23-24e | 37.6 | 42.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-4dd02a79.pth)|
45 | 
46 | Cascade R-CNN
47 | 
48 | |Backbone|Lr sched|mAP|Download|
49 | |:--:|:--:|:--:|:--:|
50 | | HRNetV2-W32 | 20e | 43.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/cascade_rcnn_hrnetv2_w32_fpn_20e_20190522-55bec4ee.pth)|
51 | 
52 | **Note:**
53 | 
54 | - HRNetV2 ImageNet pretrained models are in [HRNets for Image Classification](https://github.com/HRNet/HRNet-Image-Classification).
55 | 


--------------------------------------------------------------------------------
/tools/configs/hrnet/README.md:
--------------------------------------------------------------------------------
 1 | # High-resolution networks (HRNets) for object detection
 2 | 
 3 | ## Introduction
 4 | 
 5 | ```
 6 | @inproceedings{SunXLW19,
 7 |   title={Deep High-Resolution Representation Learning for Human Pose Estimation},
 8 |   author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang},
 9 |   booktitle={CVPR},
10 |   year={2019}
11 | }
12 | 
13 | @article{SunZJCXLMWLW19,
14 |   title={High-Resolution Representations for Labeling Pixels and Regions},
15 |   author={Ke Sun and Yang Zhao and Borui Jiang and Tianheng Cheng and Bin Xiao 
16 |   and Dong Liu and Yadong Mu and Xinggang Wang and Wenyu Liu and Jingdong Wang},
17 |   journal   = {CoRR},
18 |   volume    = {abs/1904.04514},
19 |   year={2019}
20 | }
21 | ```
22 | 
23 | ## Results and Models
24 | 
25 | Faster R-CNN
26 | 
27 | | Backbone|#Params|GFLOPs|Lr sched|mAP|Download|
28 | | :--:|:--:|:--:|:--:|:--:|:--:|
29 | | HRNetV2-W18 |26.2M|159.1| 1x | 36.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_1x_20190522-e368c387.pth)|
30 | | HRNetV2-W18 |26.2M|159.1| 20-23-24e | 38.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-ed3c0293.pth)|
31 | | HRNetV2-W32 |45.0M|245.3| 1x | 39.5 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_1x_20190522-d22f1fef.pth)|
32 | | HRNetV2-W32 |45.0M|245.3| 20-23-24e | 40.8 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-2d67a5eb.pth)|
33 | | HRNetV2-W40 |60.5M|314.9| 1x | 40.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_1x_20190522-30502318.pth)|
34 | | HRNetV2-W40 |60.5M|314.9| 20-23-24e | 41.4 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/faster_rcnn_hrnetv2_w40_fpn_20_23_24e_20190522-050a7c7f.pth)|
35 | 
36 | 
37 | Mask R-CNN
38 | 
39 | |Backbone|Lr sched|mask mAP|box mAP|Download|
40 | |:--:|:--:|:--:|:--:|:--:|
41 | | HRNetV2-W18 | 1x | 34.2 | 37.3 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_1x_20190522-c8ad459f.pth)|
42 | | HRNetV2-W18 | 20-23-24e | 35.7 | 39.2 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w18_fpn_20_23_24e_20190522-5c11b7f2.pth)|
43 | | HRNetV2-W32 | 1x | 36.8 | 40.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_1x_20190522-374aaa00.pth)|
44 | | HRNetV2-W32 | 20-23-24e | 37.6 | 42.1 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/mask_rcnn_hrnetv2_w32_fpn_20_23_24e_20190522-4dd02a79.pth)|
45 | 
46 | Cascade R-CNN
47 | 
48 | |Backbone|Lr sched|mAP|Download|
49 | |:--:|:--:|:--:|:--:|
50 | | HRNetV2-W32 | 20e | 43.7 | [model](https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/hrnet/cascade_rcnn_hrnetv2_w32_fpn_20e_20190522-55bec4ee.pth)|
51 | 
52 | **Note:**
53 | 
54 | - HRNetV2 ImageNet pretrained models are in [HRNets for Image Classification](https://github.com/HRNet/HRNet-Image-Classification).
55 | 


--------------------------------------------------------------------------------