├── .gitignore ├── LICENSE ├── README.md ├── configs ├── efficientPS_multigpu_sample.py └── efficientPS_singlegpu_sample.py ├── efficientNet ├── geffnet │ ├── __init__.py │ ├── activations │ │ ├── __init__.py │ │ ├── activations.py │ │ ├── activations_jit.py │ │ └── activations_me.py │ ├── config.py │ ├── conv2d_layers.py │ ├── efficientnet_builder.py │ ├── gen_efficientnet.py │ ├── helpers.py │ ├── mobilenetv3.py │ ├── model_factory.py │ └── version.py └── setup.py ├── environment.yml ├── images ├── intro.png └── opendr_logo.png ├── mmdet ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ ├── test.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── anchor_target.py │ │ ├── guided_anchor_target.py │ │ ├── point_generator.py │ │ └── point_target.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assign_sampling.py │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── approx_max_iou_assigner.py │ │ │ ├── assign_result.py │ │ │ ├── atss_assigner.py │ │ │ ├── base_assigner.py │ │ │ ├── max_iou_assigner.py │ │ │ └── point_assigner.py │ │ ├── bbox_target.py │ │ ├── demodata.py │ │ ├── geometry.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── base_sampler.py │ │ │ ├── combined_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── random_sampler.py │ │ │ └── sampling_result.py │ │ └── transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── bbox_overlaps.py │ │ ├── class_names.py │ │ ├── eval_hooks.py │ │ ├── mean_ap.py │ │ ├── panoptic.py │ │ └── recall.py │ ├── fp16 │ │ ├── __init__.py │ │ ├── decorators.py │ │ ├── hooks.py │ │ └── utils.py │ ├── mask │ │ ├── __init__.py │ │ ├── mask_target.py │ │ └── utils.py │ ├── optimizer │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── copy_of_sgd.py │ │ └── registry.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ └── merge_augs.py │ └── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ └── misc.py ├── datasets │ ├── __init__.py │ ├── builder.py │ ├── cityscapes.py │ ├── coco.py │ ├── custom.py │ ├── dataset_wrappers.py │ ├── loader │ │ ├── __init__.py │ │ ├── build_loader.py │ │ └── sampler.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── auto_augment.py │ │ ├── compose.py │ │ ├── formating.py │ │ ├── instaboost.py │ │ ├── loading.py │ │ ├── test_aug.py │ │ └── transforms.py │ ├── registry.py │ ├── voc.py │ ├── wider_face.py │ └── xml_style.py ├── models │ ├── __init__.py │ ├── anchor_heads │ │ ├── __init__.py │ │ ├── anchor_head.py │ │ ├── atss_head.py │ │ ├── fcos_head.py │ │ ├── fovea_head.py │ │ ├── free_anchor_retina_head.py │ │ ├── ga_retina_head.py │ │ ├── ga_rpn_head.py │ │ ├── guided_anchor_head.py │ │ ├── reppoints_head.py │ │ ├── retina_head.py │ │ ├── retina_sepbn_head.py │ │ ├── rpn_head.py │ │ ├── sep_rpn_head.py │ │ └── ssd_head.py │ ├── backbones │ │ ├── __init__.py │ │ └── resnet.py │ ├── bbox_heads │ │ ├── __init__.py │ │ ├── bbox_head.py │ │ ├── convfc_bbox_head.py │ │ └── double_bbox_head.py │ ├── builder.py │ ├── efficientps │ │ ├── __init__.py │ │ ├── base.py │ │ ├── efficientPS.py │ │ ├── rpn.py │ │ ├── test_mixins.py │ │ └── two_stage.py │ ├── losses │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── balanced_l1_loss.py │ │ ├── cross_entropy_loss.py │ │ ├── focal_loss.py │ │ ├── ghm_loss.py │ │ ├── iou_loss.py │ │ ├── mse_loss.py │ │ ├── smooth_l1_loss.py │ │ └── utils.py │ ├── mask_heads │ │ ├── __init__.py │ │ ├── efficientps_semantic_head.py │ │ ├── fcn_mask_head.py │ │ ├── fcn_sep_mask_head.py │ │ ├── fused_semantic_head.py │ │ ├── grid_head.py │ │ ├── htc_mask_head.py │ │ └── maskiou_head.py │ ├── necks │ │ ├── __init__.py │ │ └── two_way_fpn.py │ ├── registry.py │ ├── roi_extractors │ │ ├── __init__.py │ │ └── single_level.py │ ├── shared_heads │ │ ├── __init__.py │ │ └── res_layer.py │ └── utils │ │ ├── __init__.py │ │ └── weight_init.py ├── ops │ ├── __init__.py │ ├── activation.py │ ├── affine_grid │ │ ├── __init__.py │ │ ├── affine_grid.py │ │ └── src │ │ │ └── affine_grid_cuda.cpp │ ├── carafe │ │ ├── __init__.py │ │ ├── carafe.py │ │ ├── grad_check.py │ │ ├── setup.py │ │ └── src │ │ │ ├── carafe_cuda.cpp │ │ │ ├── carafe_cuda_kernel.cu │ │ │ ├── carafe_naive_cuda.cpp │ │ │ └── carafe_naive_cuda_kernel.cu │ ├── context_block.py │ ├── conv.py │ ├── conv_module.py │ ├── conv_ws.py │ ├── dcn │ │ ├── __init__.py │ │ ├── deform_conv.py │ │ ├── deform_pool.py │ │ └── src │ │ │ ├── deform_conv_cuda.cpp │ │ │ ├── deform_conv_cuda_kernel.cu │ │ │ ├── deform_pool_cuda.cpp │ │ │ └── deform_pool_cuda_kernel.cu │ ├── depthwise_separable_conv_module.py │ ├── generalized_attention.py │ ├── grid_sampler │ │ ├── __init__.py │ │ ├── grid_sampler.py │ │ └── src │ │ │ ├── cpu │ │ │ ├── grid_sampler_cpu.cpp │ │ │ └── grid_sampler_cpu.h │ │ │ ├── cuda │ │ │ ├── grid_sampler_cuda.cu │ │ │ └── grid_sampler_cuda.cuh │ │ │ ├── cudnn │ │ │ └── grid_sampler_cudnn.cpp │ │ │ └── grid_sampler.cpp │ ├── masked_conv │ │ ├── __init__.py │ │ ├── masked_conv.py │ │ └── src │ │ │ ├── masked_conv2d_cuda.cpp │ │ │ └── masked_conv2d_kernel.cu │ ├── nms │ │ ├── __init__.py │ │ ├── nms_wrapper.py │ │ └── src │ │ │ ├── nms_cpu.cpp │ │ │ ├── nms_cuda.cpp │ │ │ └── nms_kernel.cu │ ├── non_local.py │ ├── norm.py │ ├── roi_align │ │ ├── __init__.py │ │ ├── gradcheck.py │ │ ├── roi_align.py │ │ └── src │ │ │ ├── roi_align_cuda.cpp │ │ │ ├── roi_align_kernel.cu │ │ │ └── roi_align_kernel_v2.cu │ ├── roi_pool │ │ ├── __init__.py │ │ ├── gradcheck.py │ │ ├── roi_pool.py │ │ └── src │ │ │ ├── roi_pool_cuda.cpp │ │ │ └── roi_pool_kernel.cu │ ├── roi_sampling │ │ ├── __init__.py │ │ ├── functions.py │ │ └── src │ │ │ ├── roi_sampling.cpp │ │ │ ├── roi_sampling.h │ │ │ ├── roi_sampling_cpu.cpp │ │ │ ├── roi_sampling_cuda.cu │ │ │ └── utils │ │ │ ├── checks.h │ │ │ ├── common.h │ │ │ └── cuda.cuh │ ├── saconv.py │ ├── scale.py │ ├── sigmoid_focal_loss │ │ ├── __init__.py │ │ ├── sigmoid_focal_loss.py │ │ └── src │ │ │ ├── sigmoid_focal_loss.cpp │ │ │ └── sigmoid_focal_loss_cuda.cu │ ├── upsample.py │ └── utils │ │ ├── __init__.py │ │ └── src │ │ └── compiling_info.cpp └── utils │ ├── __init__.py │ ├── collect_env.py │ ├── contextmanagers.py │ ├── flops_counter.py │ ├── logger.py │ ├── profiling.py │ ├── registry.py │ └── util_mixins.py ├── pytest.ini ├── requirements.txt ├── setup.py ├── tests ├── async_benchmark.py ├── test_assigner.py ├── test_async.py ├── test_config.py ├── test_forward.py ├── test_heads.py ├── test_nms.py ├── test_roi_sampling.py ├── test_sampler.py ├── test_soft_nms.py └── test_utils.py └── tools ├── cityscapes_demo.py ├── cityscapes_inference.py ├── cityscapes_save_predictions.py ├── convert_cityscapes.py ├── convert_kitti.py ├── dist_test.sh ├── dist_train.sh ├── fuse_conv_bn.py ├── kitti_demo.py ├── test.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | #custom 132 | work_dirs/ 133 | data 134 | mmdet/version.py 135 | *.pkl 136 | *.pkl.json 137 | *.log.json 138 | *.pth 139 | tmpDir 140 | 141 | -------------------------------------------------------------------------------- /efficientNet/geffnet/__init__.py: -------------------------------------------------------------------------------- 1 | from .gen_efficientnet import * 2 | from .mobilenetv3 import * 3 | from .model_factory import create_model 4 | from .config import is_exportable, is_scriptable, set_exportable, set_scriptable 5 | from .activations import * -------------------------------------------------------------------------------- /efficientNet/geffnet/activations/activations.py: -------------------------------------------------------------------------------- 1 | """ Activations 2 | 3 | A collection of activations fn and modules with a common interface so that they can 4 | easily be swapped. All have an `inplace` arg even if not used. 5 | 6 | Copyright 2020 Ross Wightman 7 | """ 8 | from torch import nn as nn 9 | from torch.nn import functional as F 10 | 11 | 12 | def swish(x, inplace: bool = False): 13 | """Swish - Described originally as SiLU (https://arxiv.org/abs/1702.03118v3) 14 | and also as Swish (https://arxiv.org/abs/1710.05941). 15 | 16 | TODO Rename to SiLU with addition to PyTorch 17 | """ 18 | return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid()) 19 | 20 | 21 | class Swish(nn.Module): 22 | def __init__(self, inplace: bool = False): 23 | super(Swish, self).__init__() 24 | self.inplace = inplace 25 | 26 | def forward(self, x): 27 | return swish(x, self.inplace) 28 | 29 | 30 | def mish(x, inplace: bool = False): 31 | """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 32 | """ 33 | return x.mul(F.softplus(x).tanh()) 34 | 35 | 36 | class Mish(nn.Module): 37 | def __init__(self, inplace: bool = False): 38 | super(Mish, self).__init__() 39 | self.inplace = inplace 40 | 41 | def forward(self, x): 42 | return mish(x, self.inplace) 43 | 44 | 45 | def sigmoid(x, inplace: bool = False): 46 | return x.sigmoid_() if inplace else x.sigmoid() 47 | 48 | 49 | # PyTorch has this, but not with a consistent inplace argmument interface 50 | class Sigmoid(nn.Module): 51 | def __init__(self, inplace: bool = False): 52 | super(Sigmoid, self).__init__() 53 | self.inplace = inplace 54 | 55 | def forward(self, x): 56 | return x.sigmoid_() if self.inplace else x.sigmoid() 57 | 58 | class Identity(nn.Module): 59 | def __init__(self, inplace: bool = False): 60 | super(Identity, self).__init__() 61 | self.inplace = inplace 62 | 63 | def forward(self, x): 64 | return x 65 | 66 | 67 | def tanh(x, inplace: bool = False): 68 | return x.tanh_() if inplace else x.tanh() 69 | 70 | # PyTorch has this, but not with a consistent inplace argmument interface 71 | class Tanh(nn.Module): 72 | def __init__(self, inplace: bool = False): 73 | super(Tanh, self).__init__() 74 | self.inplace = inplace 75 | 76 | def forward(self, x): 77 | return x.tanh_() if self.inplace else x.tanh() 78 | 79 | 80 | def hard_swish(x, inplace: bool = False): 81 | inner = F.relu6(x + 3.).div_(6.) 82 | return x.mul_(inner) if inplace else x.mul(inner) 83 | 84 | 85 | class HardSwish(nn.Module): 86 | def __init__(self, inplace: bool = False): 87 | super(HardSwish, self).__init__() 88 | self.inplace = inplace 89 | 90 | def forward(self, x): 91 | return hard_swish(x, self.inplace) 92 | 93 | 94 | def hard_sigmoid(x, inplace: bool = False): 95 | if inplace: 96 | return x.add_(3.).clamp_(0., 6.).div_(6.) 97 | else: 98 | return F.relu6(x + 3.) / 6. 99 | 100 | 101 | class HardSigmoid(nn.Module): 102 | def __init__(self, inplace: bool = False): 103 | super(HardSigmoid, self).__init__() 104 | self.inplace = inplace 105 | 106 | def forward(self, x): 107 | return hard_sigmoid(x, self.inplace) 108 | 109 | 110 | -------------------------------------------------------------------------------- /efficientNet/geffnet/activations/activations_jit.py: -------------------------------------------------------------------------------- 1 | """ Activations (jit) 2 | 3 | A collection of jit-scripted activations fn and modules with a common interface so that they can 4 | easily be swapped. All have an `inplace` arg even if not used. 5 | 6 | All jit scripted activations are lacking in-place variations on purpose, scripted kernel fusion does not 7 | currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted 8 | versions if they contain in-place ops. 9 | 10 | Copyright 2020 Ross Wightman 11 | """ 12 | 13 | import torch 14 | from torch import nn as nn 15 | from torch.nn import functional as F 16 | 17 | __all__ = ['swish_jit', 'SwishJit', 'mish_jit', 'MishJit', 18 | 'hard_sigmoid_jit', 'HardSigmoidJit', 'hard_swish_jit', 'HardSwishJit'] 19 | 20 | 21 | @torch.jit.script 22 | def swish_jit(x, inplace: bool = False): 23 | """Swish - Described originally as SiLU (https://arxiv.org/abs/1702.03118v3) 24 | and also as Swish (https://arxiv.org/abs/1710.05941). 25 | 26 | TODO Rename to SiLU with addition to PyTorch 27 | """ 28 | return x.mul(x.sigmoid()) 29 | 30 | 31 | @torch.jit.script 32 | def mish_jit(x, _inplace: bool = False): 33 | """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 34 | """ 35 | return x.mul(F.softplus(x).tanh()) 36 | 37 | 38 | class SwishJit(nn.Module): 39 | def __init__(self, inplace: bool = False): 40 | super(SwishJit, self).__init__() 41 | 42 | def forward(self, x): 43 | return swish_jit(x) 44 | 45 | 46 | class MishJit(nn.Module): 47 | def __init__(self, inplace: bool = False): 48 | super(MishJit, self).__init__() 49 | 50 | def forward(self, x): 51 | return mish_jit(x) 52 | 53 | 54 | @torch.jit.script 55 | def hard_sigmoid_jit(x, inplace: bool = False): 56 | # return F.relu6(x + 3.) / 6. 57 | return (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster? 58 | 59 | 60 | class HardSigmoidJit(nn.Module): 61 | def __init__(self, inplace: bool = False): 62 | super(HardSigmoidJit, self).__init__() 63 | 64 | def forward(self, x): 65 | return hard_sigmoid_jit(x) 66 | 67 | 68 | @torch.jit.script 69 | def hard_swish_jit(x, inplace: bool = False): 70 | # return x * (F.relu6(x + 3.) / 6) 71 | return x * (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster? 72 | 73 | 74 | class HardSwishJit(nn.Module): 75 | def __init__(self, inplace: bool = False): 76 | super(HardSwishJit, self).__init__() 77 | 78 | def forward(self, x): 79 | return hard_swish_jit(x) 80 | -------------------------------------------------------------------------------- /efficientNet/geffnet/helpers.py: -------------------------------------------------------------------------------- 1 | """ Checkpoint loading / state_dict helpers 2 | Copyright 2020 Ross Wightman 3 | """ 4 | import torch 5 | import os 6 | from collections import OrderedDict 7 | from mmdet.utils import get_root_logger 8 | from mmcv.runner.checkpoint import load_state_dict 9 | 10 | try: 11 | from torch.hub import load_state_dict_from_url 12 | except ImportError: 13 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 14 | 15 | 16 | def load_checkpoint(model, checkpoint_path): 17 | if checkpoint_path and os.path.isfile(checkpoint_path): 18 | print("=> Loading checkpoint '{}'".format(checkpoint_path)) 19 | checkpoint = torch.load(checkpoint_path) 20 | if isinstance(checkpoint, dict) and 'state_dict' in checkpoint: 21 | new_state_dict = OrderedDict() 22 | for k, v in checkpoint['state_dict'].items(): 23 | if k.startswith('module'): 24 | name = k[7:] # remove `module.` 25 | else: 26 | name = k 27 | new_state_dict[name] = v 28 | model.load_state_dict(new_state_dict) 29 | else: 30 | model.load_state_dict(checkpoint) 31 | print("=> Loaded checkpoint '{}'".format(checkpoint_path)) 32 | else: 33 | print("=> Error: No checkpoint found at '{}'".format(checkpoint_path)) 34 | raise FileNotFoundError() 35 | 36 | 37 | def load_pretrained(model, url, filter_fn=None, strict=False, logger=None): 38 | if not url: 39 | print("=> Warning: Pretrained model URL is empty, using random initialization.") 40 | return 41 | 42 | state_dict = load_state_dict_from_url(url, progress=False, map_location='cpu') 43 | 44 | input_conv = 'conv_stem' 45 | classifier = 'classifier' 46 | in_chans = getattr(model, input_conv).weight.shape[1] 47 | #num_classes = getattr(model, classifier).weight.shape[0] 48 | 49 | input_conv_weight = input_conv + '.weight' 50 | pretrained_in_chans = state_dict[input_conv_weight].shape[1] 51 | if in_chans != pretrained_in_chans: 52 | if in_chans == 1: 53 | print('=> Converting pretrained input conv {} from {} to 1 channel'.format( 54 | input_conv_weight, pretrained_in_chans)) 55 | conv1_weight = state_dict[input_conv_weight] 56 | state_dict[input_conv_weight] = conv1_weight.sum(dim=1, keepdim=True) 57 | else: 58 | print('=> Discarding pretrained input conv {} since input channel count != {}'.format( 59 | input_conv_weight, pretrained_in_chans)) 60 | del state_dict[input_conv_weight] 61 | strict = False 62 | 63 | # classifier_weight = classifier + '.weight' 64 | # pretrained_num_classes = state_dict[classifier_weight].shape[0] 65 | # if num_classes != pretrained_num_classes: 66 | # print('=> Discarding pretrained classifier since num_classes != {}'.format(pretrained_num_classes)) 67 | # del state_dict[classifier_weight] 68 | # del state_dict[classifier + '.bias'] 69 | # strict = False 70 | 71 | if filter_fn is not None: 72 | state_dict = filter_fn(state_dict) 73 | 74 | load_state_dict(model, state_dict, strict=strict, logger=get_root_logger()) 75 | -------------------------------------------------------------------------------- /efficientNet/geffnet/model_factory.py: -------------------------------------------------------------------------------- 1 | from .config import set_layer_config 2 | from .helpers import load_checkpoint 3 | 4 | from .gen_efficientnet import * 5 | from .mobilenetv3 import * 6 | 7 | 8 | def create_model( 9 | model_name='mnasnet_100', 10 | pretrained=None, 11 | num_classes=1000, 12 | in_chans=3, 13 | checkpoint_path='', 14 | **kwargs): 15 | 16 | model_kwargs = dict(num_classes=num_classes, in_chans=in_chans, pretrained=pretrained, **kwargs) 17 | 18 | if model_name in globals(): 19 | create_fn = globals()[model_name] 20 | model = create_fn(**model_kwargs) 21 | else: 22 | raise RuntimeError('Unknown model (%s)' % model_name) 23 | 24 | if checkpoint_path and not pretrained: 25 | load_checkpoint(model, checkpoint_path) 26 | 27 | return model 28 | -------------------------------------------------------------------------------- /efficientNet/geffnet/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.1' 2 | -------------------------------------------------------------------------------- /efficientNet/setup.py: -------------------------------------------------------------------------------- 1 | """ Setup 2 | """ 3 | from setuptools import setup, find_packages 4 | from codecs import open 5 | from os import path 6 | 7 | here = path.abspath(path.dirname(__file__)) 8 | 9 | # Get the long description from the README file 10 | #with open(path.join(here, 'README.md'), encoding='utf-8') as f: 11 | # long_description = f.read() 12 | 13 | exec(open('geffnet/version.py').read()) 14 | setup( 15 | name='geffnet', 16 | version=__version__, 17 | description='(Generic) EfficientNets for PyTorch', 18 | url='https://github.com/rwightman/gen-efficientnet-pytorch', 19 | author='Ross Wightman', 20 | author_email='hello@rwightman.com', 21 | classifiers=[ 22 | # How mature is this project? Common values are 23 | # 3 - Alpha 24 | # 4 - Beta 25 | # 5 - Production/Stable 26 | 'Development Status :: 3 - Alpha', 27 | 'Intended Audience :: Education', 28 | 'Intended Audience :: Science/Research', 29 | 'License :: OSI Approved :: Apache Software License', 30 | 'Programming Language :: Python :: 3.6', 31 | 'Programming Language :: Python :: 3.7', 32 | 'Programming Language :: Python :: 3.8', 33 | 'Topic :: Scientific/Engineering', 34 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 35 | 'Topic :: Software Development', 36 | 'Topic :: Software Development :: Libraries', 37 | 'Topic :: Software Development :: Libraries :: Python Modules', 38 | ], 39 | 40 | # Note that this is a string of words separated by whitespace, not a list. 41 | keywords='pytorch pretrained models efficientnet mixnet mobilenetv3 mnasnet', 42 | packages=find_packages(exclude=['data']), 43 | install_requires=['torch >= 1.4', 'torchvision'], 44 | python_requires='>=3.6', 45 | ) 46 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: efficientPS_env 2 | channels: 3 | - defaults 4 | dependencies: 5 | - _libgcc_mutex=0.1=main 6 | - ca-certificates=2020.12.8=h06a4308_0 7 | - certifi=2020.12.5=py37h06a4308_0 8 | - cython=0.29.21=py37h2531618_0 9 | - ld_impl_linux-64=2.33.1=h53a641e_7 10 | - libedit=3.1.20191231=h14c3975_1 11 | - libffi=3.3=he6710b0_2 12 | - libgcc-ng=9.1.0=hdf63c60_0 13 | - libstdcxx-ng=9.1.0=hdf63c60_0 14 | - ncurses=6.2=he6710b0_1 15 | - openssl=1.1.1i=h27cfd23_0 16 | - pip=20.3.3=py37h06a4308_0 17 | - python=3.7.9=h7579374_0 18 | - readline=8.0=h7b6447c_0 19 | - setuptools=51.0.0=py37h06a4308_2 20 | - sqlite=3.33.0=h62c20be_0 21 | - tk=8.6.10=hbc83047_0 22 | - wheel=0.36.2=pyhd3eb1b0_0 23 | - xz=5.2.5=h7b6447c_0 24 | - zlib=1.2.11=h7b6447c_3 25 | - pip: 26 | - absl-py==0.11.0 27 | - addict==2.4.0 28 | - appdirs==1.4.4 29 | - attrs==20.3.0 30 | - cachetools==4.2.0 31 | - chardet==4.0.0 32 | - cityscapesscripts==2.2.0 33 | - coloredlogs==15.0 34 | - cycler==0.10.0 35 | - decorator==4.4.2 36 | - future==0.18.2 37 | - google-auth==1.24.0 38 | - google-auth-oauthlib==0.4.2 39 | - grpcio==1.34.0 40 | - humanfriendly==9.1 41 | - idna==2.10 42 | - imageio==2.9.0 43 | - importlib-metadata==3.4.0 44 | - iniconfig==1.1.1 45 | - kiwisolver==1.3.1 46 | - markdown==3.3.3 47 | - matplotlib==3.3.3 48 | - mmcv==0.5.9 49 | - networkx==2.5 50 | - numpy==1.19.5 51 | - oauthlib==3.1.0 52 | - opencv-python==4.5.1.48 53 | - packaging==20.8 54 | - pandas==1.2.0 55 | - pillow==6.2.2 56 | - pluggy==0.13.1 57 | - protobuf==3.14.0 58 | - py==1.10.0 59 | - pyasn1==0.4.8 60 | - pyasn1-modules==0.2.8 61 | - pyparsing==2.4.7 62 | - pyquaternion==0.9.9 63 | - pytest==6.2.1 64 | - python-dateutil==2.8.1 65 | - pytz==2020.5 66 | - pywavelets==1.1.1 67 | - pyyaml==5.3.1 68 | - requests==2.25.1 69 | - requests-oauthlib==1.3.0 70 | - rsa==4.7 71 | - scikit-image==0.18.1 72 | - scipy==1.6.0 73 | - seaborn==0.11.1 74 | - six==1.15.0 75 | - tensorboard==2.4.0 76 | - tensorboard-plugin-wit==1.7.0 77 | - terminaltables==3.1.0 78 | - tifffile==2021.1.14 79 | - toml==0.10.2 80 | - tqdm==4.56.0 81 | - typing==3.7.4.3 82 | - typing-extensions==3.7.4.3 83 | - urllib3==1.26.2 84 | - werkzeug==1.0.1 85 | - xdoctest==0.15.0 86 | - yapf==0.30.0 87 | - zipp==3.4.0 88 | -------------------------------------------------------------------------------- /images/intro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepSceneSeg/EfficientPS/e1c92c301b8d2a9c582797ab3cad203909f2fa9d/images/intro.png -------------------------------------------------------------------------------- /images/opendr_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepSceneSeg/EfficientPS/e1c92c301b8d2a9c582797ab3cad203909f2fa9d/images/opendr_logo.png -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import (async_inference_detector, inference_detector, 2 | init_detector, show_result, show_result_pyplot) 3 | from .test import multi_gpu_test, single_gpu_test 4 | from .train import get_root_logger, set_random_seed, train_detector 5 | 6 | __all__ = [ 7 | 'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector', 8 | 'async_inference_detector', 'inference_detector', 'show_result', 9 | 'show_result_pyplot', 'multi_gpu_test', 'single_gpu_test' 10 | ] 11 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .evaluation import * # noqa: F401, F403 4 | from .fp16 import * # noqa: F401, F403 5 | from .mask import * # noqa: F401, F403 6 | from .optimizer import * # noqa: F401, F403 7 | from .post_processing import * # noqa: F401, F403 8 | from .utils import * # noqa: F401, F403 9 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import (anchor_inside_flags, anchor_target, 3 | images_to_levels, unmap) 4 | from .guided_anchor_target import ga_loc_target, ga_shape_target 5 | from .point_generator import PointGenerator 6 | from .point_target import point_target 7 | 8 | __all__ = [ 9 | 'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target', 10 | 'ga_shape_target', 'PointGenerator', 'point_target', 'images_to_levels', 11 | 'unmap' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/core/anchor/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AnchorGenerator(object): 5 | """ 6 | Examples: 7 | >>> from mmdet.core import AnchorGenerator 8 | >>> self = AnchorGenerator(9, [1.], [1.]) 9 | >>> all_anchors = self.grid_anchors((2, 2), device='cpu') 10 | >>> print(all_anchors) 11 | tensor([[ 0., 0., 8., 8.], 12 | [16., 0., 24., 8.], 13 | [ 0., 16., 8., 24.], 14 | [16., 16., 24., 24.]]) 15 | """ 16 | 17 | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None): 18 | self.base_size = base_size 19 | self.scales = torch.Tensor(scales) 20 | self.ratios = torch.Tensor(ratios) 21 | self.scale_major = scale_major 22 | self.ctr = ctr 23 | self.base_anchors = self.gen_base_anchors() 24 | 25 | @property 26 | def num_base_anchors(self): 27 | return self.base_anchors.size(0) 28 | 29 | def gen_base_anchors(self): 30 | w = self.base_size 31 | h = self.base_size 32 | if self.ctr is None: 33 | x_ctr = 0.5 * (w - 1) 34 | y_ctr = 0.5 * (h - 1) 35 | else: 36 | x_ctr, y_ctr = self.ctr 37 | 38 | h_ratios = torch.sqrt(self.ratios) 39 | w_ratios = 1 / h_ratios 40 | if self.scale_major: 41 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1) 42 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1) 43 | else: 44 | ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1) 45 | hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1) 46 | 47 | # yapf: disable 48 | base_anchors = torch.stack( 49 | [ 50 | x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), 51 | x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) 52 | ], 53 | dim=-1).round() 54 | # yapf: enable 55 | 56 | return base_anchors 57 | 58 | def _meshgrid(self, x, y, row_major=True): 59 | xx = x.repeat(len(y)) 60 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 61 | if row_major: 62 | return xx, yy 63 | else: 64 | return yy, xx 65 | 66 | def grid_anchors(self, featmap_size, stride=16, device='cuda'): 67 | base_anchors = self.base_anchors.to(device) 68 | 69 | feat_h, feat_w = featmap_size 70 | shift_x = torch.arange(0, feat_w, device=device) * stride 71 | shift_y = torch.arange(0, feat_h, device=device) * stride 72 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 73 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) 74 | shifts = shifts.type_as(base_anchors) 75 | # first feat_w elements correspond to the first row of shifts 76 | # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get 77 | # shifted anchors (K, A, 4), reshape to (K*A, 4) 78 | 79 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :] 80 | all_anchors = all_anchors.view(-1, 4) 81 | # first A rows correspond to A anchors of (0, 0) in feature map, 82 | # then (0, 1), (0, 2), ... 83 | return all_anchors 84 | 85 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 86 | feat_h, feat_w = featmap_size 87 | valid_h, valid_w = valid_size 88 | assert valid_h <= feat_h and valid_w <= feat_w 89 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) 90 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) 91 | valid_x[:valid_w] = 1 92 | valid_y[:valid_h] = 1 93 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 94 | valid = valid_xx & valid_yy 95 | valid = valid[:, 96 | None].expand(valid.size(0), 97 | self.num_base_anchors).contiguous().view(-1) 98 | return valid 99 | -------------------------------------------------------------------------------- /mmdet/core/anchor/point_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class PointGenerator(object): 5 | 6 | def _meshgrid(self, x, y, row_major=True): 7 | xx = x.repeat(len(y)) 8 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 9 | if row_major: 10 | return xx, yy 11 | else: 12 | return yy, xx 13 | 14 | def grid_points(self, featmap_size, stride=16, device='cuda'): 15 | feat_h, feat_w = featmap_size 16 | shift_x = torch.arange(0., feat_w, device=device) * stride 17 | shift_y = torch.arange(0., feat_h, device=device) * stride 18 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 19 | stride = shift_x.new_full((shift_xx.shape[0], ), stride) 20 | shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1) 21 | all_points = shifts.to(device) 22 | return all_points 23 | 24 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 25 | feat_h, feat_w = featmap_size 26 | valid_h, valid_w = valid_size 27 | assert valid_h <= feat_h and valid_w <= feat_w 28 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) 29 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) 30 | valid_x[:valid_w] = 1 31 | valid_y[:valid_h] = 1 32 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 33 | valid = valid_xx & valid_yy 34 | return valid 35 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner 2 | from .bbox_target import bbox_target 3 | from .geometry import bbox_overlaps 4 | from .samplers import (BaseSampler, CombinedSampler, 5 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 6 | PseudoSampler, RandomSampler, SamplingResult) 7 | from .transforms import (bbox2delta, bbox2result, bbox2roi, bbox_flip, 8 | bbox_mapping, bbox_mapping_back, delta2bbox, 9 | distance2bbox, roi2bbox) 10 | 11 | from .assign_sampling import ( # isort:skip, avoid recursive imports 12 | assign_and_sample, build_assigner, build_sampler) 13 | 14 | __all__ = [ 15 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 16 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 17 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 18 | 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample', 19 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', 20 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 21 | 'distance2bbox', 'bbox_target' 22 | ] 23 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assign_sampling.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import assigners, samplers 4 | 5 | 6 | def build_assigner(cfg, **kwargs): 7 | if isinstance(cfg, assigners.BaseAssigner): 8 | return cfg 9 | elif isinstance(cfg, dict): 10 | return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs) 11 | else: 12 | raise TypeError('Invalid type {} for building a sampler'.format( 13 | type(cfg))) 14 | 15 | 16 | def build_sampler(cfg, **kwargs): 17 | if isinstance(cfg, samplers.BaseSampler): 18 | return cfg 19 | elif isinstance(cfg, dict): 20 | return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs) 21 | else: 22 | raise TypeError('Invalid type {} for building a sampler'.format( 23 | type(cfg))) 24 | 25 | 26 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): 27 | bbox_assigner = build_assigner(cfg.assigner) 28 | bbox_sampler = build_sampler(cfg.sampler) 29 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore, 30 | gt_labels) 31 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes, 32 | gt_labels) 33 | return assign_result, sampling_result 34 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 2 | from .assign_result import AssignResult 3 | from .atss_assigner import ATSSAssigner 4 | from .base_assigner import BaseAssigner 5 | from .max_iou_assigner import MaxIoUAssigner 6 | from .point_assigner import PointAssigner 7 | 8 | __all__ = [ 9 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult', 10 | 'PointAssigner', 'ATSSAssigner' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | 6 | @abstractmethod 7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 8 | pass 9 | -------------------------------------------------------------------------------- /mmdet/core/bbox/bbox_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..utils import multi_apply 4 | from .transforms import bbox2delta 5 | 6 | 7 | def bbox_target(pos_bboxes_list, 8 | neg_bboxes_list, 9 | pos_gt_bboxes_list, 10 | pos_gt_labels_list, 11 | cfg, 12 | reg_classes=1, 13 | target_means=[.0, .0, .0, .0], 14 | target_stds=[1.0, 1.0, 1.0, 1.0], 15 | concat=True): 16 | labels, label_weights, bbox_targets, bbox_weights = multi_apply( 17 | bbox_target_single, 18 | pos_bboxes_list, 19 | neg_bboxes_list, 20 | pos_gt_bboxes_list, 21 | pos_gt_labels_list, 22 | cfg=cfg, 23 | reg_classes=reg_classes, 24 | target_means=target_means, 25 | target_stds=target_stds) 26 | 27 | if concat: 28 | labels = torch.cat(labels, 0) 29 | label_weights = torch.cat(label_weights, 0) 30 | bbox_targets = torch.cat(bbox_targets, 0) 31 | bbox_weights = torch.cat(bbox_weights, 0) 32 | return labels, label_weights, bbox_targets, bbox_weights 33 | 34 | 35 | def bbox_target_single(pos_bboxes, 36 | neg_bboxes, 37 | pos_gt_bboxes, 38 | pos_gt_labels, 39 | cfg, 40 | reg_classes=1, 41 | target_means=[.0, .0, .0, .0], 42 | target_stds=[1.0, 1.0, 1.0, 1.0]): 43 | num_pos = pos_bboxes.size(0) 44 | num_neg = neg_bboxes.size(0) 45 | num_samples = num_pos + num_neg 46 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long) 47 | label_weights = pos_bboxes.new_zeros(num_samples) 48 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4) 49 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4) 50 | if num_pos > 0: 51 | labels[:num_pos] = pos_gt_labels 52 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 53 | label_weights[:num_pos] = pos_weight 54 | pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means, 55 | target_stds) 56 | bbox_targets[:num_pos, :] = pos_bbox_targets 57 | bbox_weights[:num_pos, :] = 1 58 | if num_neg > 0: 59 | label_weights[-num_neg:] = 1.0 60 | 61 | return labels, label_weights, bbox_targets, bbox_weights 62 | 63 | 64 | def expand_target(bbox_targets, bbox_weights, labels, num_classes): 65 | bbox_targets_expand = bbox_targets.new_zeros( 66 | (bbox_targets.size(0), 4 * num_classes)) 67 | bbox_weights_expand = bbox_weights.new_zeros( 68 | (bbox_weights.size(0), 4 * num_classes)) 69 | for i in torch.nonzero(labels > 0).squeeze(-1): 70 | start, end = labels[i] * 4, (labels[i] + 1) * 4 71 | bbox_targets_expand[i, start:end] = bbox_targets[i, :] 72 | bbox_weights_expand[i, start:end] = bbox_weights[i, :] 73 | return bbox_targets_expand, bbox_weights_expand 74 | -------------------------------------------------------------------------------- /mmdet/core/bbox/demodata.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def ensure_rng(rng=None): 6 | """ 7 | Simple version of the ``kwarray.ensure_rng`` 8 | 9 | Args: 10 | rng (int | numpy.random.RandomState | None): 11 | if None, then defaults to the global rng. Otherwise this can be an 12 | integer or a RandomState class 13 | Returns: 14 | (numpy.random.RandomState) : rng - 15 | a numpy random number generator 16 | 17 | References: 18 | https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270 19 | """ 20 | 21 | if rng is None: 22 | rng = np.random.mtrand._rand 23 | elif isinstance(rng, int): 24 | rng = np.random.RandomState(rng) 25 | else: 26 | rng = rng 27 | return rng 28 | 29 | 30 | def random_boxes(num=1, scale=1, rng=None): 31 | """ 32 | Simple version of ``kwimage.Boxes.random`` 33 | 34 | Returns: 35 | Tensor: shape (n, 4) in x1, y1, x2, y2 format. 36 | 37 | References: 38 | https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390 39 | 40 | Example: 41 | >>> num = 3 42 | >>> scale = 512 43 | >>> rng = 0 44 | >>> boxes = random_boxes(num, scale, rng) 45 | >>> print(boxes) 46 | tensor([[280.9925, 278.9802, 308.6148, 366.1769], 47 | [216.9113, 330.6978, 224.0446, 456.5878], 48 | [405.3632, 196.3221, 493.3953, 270.7942]]) 49 | """ 50 | rng = ensure_rng(rng) 51 | 52 | tlbr = rng.rand(num, 4).astype(np.float32) 53 | 54 | tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2]) 55 | tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3]) 56 | br_x = np.maximum(tlbr[:, 0], tlbr[:, 2]) 57 | br_y = np.maximum(tlbr[:, 1], tlbr[:, 3]) 58 | 59 | tlbr[:, 0] = tl_x * scale 60 | tlbr[:, 1] = tl_y * scale 61 | tlbr[:, 2] = br_x * scale 62 | tlbr[:, 3] = br_y * scale 63 | 64 | boxes = torch.from_numpy(tlbr) 65 | return boxes 66 | -------------------------------------------------------------------------------- /mmdet/core/bbox/geometry.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): 5 | """Calculate overlap between two set of bboxes. 6 | 7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox 8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of 9 | bboxes1 and bboxes2. 10 | 11 | Args: 12 | bboxes1 (Tensor): shape (m, 4) in format. 13 | bboxes2 (Tensor): shape (n, 4) in format. 14 | If is_aligned is ``True``, then m and n must be equal. 15 | mode (str): "iou" (intersection over union) or iof (intersection over 16 | foreground). 17 | 18 | Returns: 19 | ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1) 20 | 21 | Example: 22 | >>> bboxes1 = torch.FloatTensor([ 23 | >>> [0, 0, 10, 10], 24 | >>> [10, 10, 20, 20], 25 | >>> [32, 32, 38, 42], 26 | >>> ]) 27 | >>> bboxes2 = torch.FloatTensor([ 28 | >>> [0, 0, 10, 20], 29 | >>> [0, 10, 10, 19], 30 | >>> [10, 10, 20, 20], 31 | >>> ]) 32 | >>> bbox_overlaps(bboxes1, bboxes2) 33 | tensor([[0.5238, 0.0500, 0.0041], 34 | [0.0323, 0.0452, 1.0000], 35 | [0.0000, 0.0000, 0.0000]]) 36 | 37 | Example: 38 | >>> empty = torch.FloatTensor([]) 39 | >>> nonempty = torch.FloatTensor([ 40 | >>> [0, 0, 10, 9], 41 | >>> ]) 42 | >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1) 43 | >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0) 44 | >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0) 45 | """ 46 | 47 | assert mode in ['iou', 'iof'] 48 | 49 | rows = bboxes1.size(0) 50 | cols = bboxes2.size(0) 51 | if is_aligned: 52 | assert rows == cols 53 | 54 | if rows * cols == 0: 55 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) 56 | 57 | if is_aligned: 58 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 59 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 60 | 61 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2] 62 | overlap = wh[:, 0] * wh[:, 1] 63 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 64 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 65 | 66 | if mode == 'iou': 67 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 68 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 69 | ious = overlap / (area1 + area2 - overlap) 70 | else: 71 | ious = overlap / area1 72 | else: 73 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] 74 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] 75 | 76 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] 77 | overlap = wh[:, :, 0] * wh[:, :, 1] 78 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 79 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 80 | 81 | if mode == 'iou': 82 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 83 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 84 | ious = overlap / (area1[:, None] + area2 - overlap) 85 | else: 86 | ious = overlap / (area1[:, None]) 87 | 88 | return ious 89 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .combined_sampler import CombinedSampler 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 5 | from .ohem_sampler import OHEMSampler 6 | from .pseudo_sampler import PseudoSampler 7 | from .random_sampler import RandomSampler 8 | from .sampling_result import SamplingResult 9 | 10 | __all__ = [ 11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 12 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 13 | 'OHEMSampler', 'SamplingResult' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from ..assign_sampling import build_sampler 2 | from .base_sampler import BaseSampler 3 | 4 | 5 | class CombinedSampler(BaseSampler): 6 | 7 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 8 | super(CombinedSampler, self).__init__(**kwargs) 9 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 10 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class InstanceBalancedPosSampler(RandomSampler): 8 | 9 | def _sample_pos(self, assign_result, num_expected, **kwargs): 10 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 11 | if pos_inds.numel() != 0: 12 | pos_inds = pos_inds.squeeze(1) 13 | if pos_inds.numel() <= num_expected: 14 | return pos_inds 15 | else: 16 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 17 | num_gts = len(unique_gt_inds) 18 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 19 | sampled_inds = [] 20 | for i in unique_gt_inds: 21 | inds = torch.nonzero(assign_result.gt_inds == i.item()) 22 | if inds.numel() != 0: 23 | inds = inds.squeeze(1) 24 | else: 25 | continue 26 | if len(inds) > num_per_gt: 27 | inds = self.random_choice(inds, num_per_gt) 28 | sampled_inds.append(inds) 29 | sampled_inds = torch.cat(sampled_inds) 30 | if len(sampled_inds) < num_expected: 31 | num_extra = num_expected - len(sampled_inds) 32 | extra_inds = np.array( 33 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 34 | if len(extra_inds) > num_extra: 35 | extra_inds = self.random_choice(extra_inds, num_extra) 36 | extra_inds = torch.from_numpy(extra_inds).to( 37 | assign_result.gt_inds.device).long() 38 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 39 | elif len(sampled_inds) > num_expected: 40 | sampled_inds = self.random_choice(sampled_inds, num_expected) 41 | return sampled_inds 42 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..transforms import bbox2roi 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class OHEMSampler(BaseSampler): 8 | """ 9 | Online Hard Example Mining Sampler described in [1]_. 10 | 11 | References: 12 | .. [1] https://arxiv.org/pdf/1604.03540.pdf 13 | """ 14 | 15 | def __init__(self, 16 | num, 17 | pos_fraction, 18 | context, 19 | neg_pos_ub=-1, 20 | add_gt_as_proposals=True, 21 | **kwargs): 22 | super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub, 23 | add_gt_as_proposals) 24 | if not hasattr(context, 'num_stages'): 25 | self.bbox_roi_extractor = context.bbox_roi_extractor 26 | self.bbox_head = context.bbox_head 27 | else: 28 | self.bbox_roi_extractor = context.bbox_roi_extractor[ 29 | context.current_stage] 30 | self.bbox_head = context.bbox_head[context.current_stage] 31 | 32 | def hard_mining(self, inds, num_expected, bboxes, labels, feats): 33 | with torch.no_grad(): 34 | rois = bbox2roi([bboxes]) 35 | bbox_feats = self.bbox_roi_extractor( 36 | feats[:self.bbox_roi_extractor.num_inputs], rois) 37 | cls_score, _ = self.bbox_head(bbox_feats) 38 | loss = self.bbox_head.loss( 39 | cls_score=cls_score, 40 | bbox_pred=None, 41 | labels=labels, 42 | label_weights=cls_score.new_ones(cls_score.size(0)), 43 | bbox_targets=None, 44 | bbox_weights=None, 45 | reduction_override='none')['loss_cls'] 46 | _, topk_loss_inds = loss.topk(num_expected) 47 | return inds[topk_loss_inds] 48 | 49 | def _sample_pos(self, 50 | assign_result, 51 | num_expected, 52 | bboxes=None, 53 | feats=None, 54 | **kwargs): 55 | # Sample some hard positive samples 56 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 57 | if pos_inds.numel() != 0: 58 | pos_inds = pos_inds.squeeze(1) 59 | if pos_inds.numel() <= num_expected: 60 | return pos_inds 61 | else: 62 | return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds], 63 | assign_result.labels[pos_inds], feats) 64 | 65 | def _sample_neg(self, 66 | assign_result, 67 | num_expected, 68 | bboxes=None, 69 | feats=None, 70 | **kwargs): 71 | # Sample some hard negative samples 72 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 73 | if neg_inds.numel() != 0: 74 | neg_inds = neg_inds.squeeze(1) 75 | if len(neg_inds) <= num_expected: 76 | return neg_inds 77 | else: 78 | return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds], 79 | assign_result.labels[neg_inds], feats) 80 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | 18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 19 | pos_inds = torch.nonzero( 20 | assign_result.gt_inds > 0).squeeze(-1).unique() 21 | neg_inds = torch.nonzero( 22 | assign_result.gt_inds == 0).squeeze(-1).unique() 23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 25 | assign_result, gt_flags) 26 | return sampling_result 27 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | 5 | 6 | class RandomSampler(BaseSampler): 7 | 8 | def __init__(self, 9 | num, 10 | pos_fraction, 11 | neg_pos_ub=-1, 12 | add_gt_as_proposals=True, 13 | **kwargs): 14 | from mmdet.core.bbox import demodata 15 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 16 | add_gt_as_proposals) 17 | self.rng = demodata.ensure_rng(kwargs.get('rng', None)) 18 | 19 | def random_choice(self, gallery, num): 20 | """Random select some elements from the gallery. 21 | 22 | If `gallery` is a Tensor, the returned indices will be a Tensor; 23 | If `gallery` is a ndarray or list, the returned indices will be a 24 | ndarray. 25 | 26 | Args: 27 | gallery (Tensor | ndarray | list): indices pool. 28 | num (int): expected sample num. 29 | 30 | Returns: 31 | Tensor or ndarray: sampled indices. 32 | """ 33 | assert len(gallery) >= num 34 | 35 | is_tensor = isinstance(gallery, torch.Tensor) 36 | if not is_tensor: 37 | gallery = torch.tensor( 38 | gallery, dtype=torch.long, device=torch.cuda.current_device()) 39 | perm = torch.randperm(gallery.numel(), device=gallery.device)[:num] 40 | rand_inds = gallery[perm] 41 | if not is_tensor: 42 | rand_inds = rand_inds.cpu().numpy() 43 | return rand_inds 44 | 45 | def _sample_pos(self, assign_result, num_expected, **kwargs): 46 | """Randomly sample some positive samples.""" 47 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 48 | if pos_inds.numel() != 0: 49 | pos_inds = pos_inds.squeeze(1) 50 | if pos_inds.numel() <= num_expected: 51 | return pos_inds 52 | else: 53 | return self.random_choice(pos_inds, num_expected) 54 | 55 | def _sample_neg(self, assign_result, num_expected, **kwargs): 56 | """Randomly sample some negative samples.""" 57 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 58 | if neg_inds.numel() != 0: 59 | neg_inds = neg_inds.squeeze(1) 60 | if len(neg_inds) <= num_expected: 61 | return neg_inds 62 | else: 63 | return self.random_choice(neg_inds, num_expected) 64 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (cityscapes_classes, coco_classes, dataset_aliases, 2 | get_classes, imagenet_det_classes, 3 | imagenet_vid_classes, voc_classes, 4 | cityscapes_originalIds) 5 | from .eval_hooks import DistEvalHook, EvalHook 6 | from .mean_ap import average_precision, eval_map, print_map_summary 7 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall, 8 | print_recall_summary) 9 | from .panoptic import save_panoptic_eval 10 | 11 | __all__ = [ 12 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 13 | 'coco_classes', 'cityscapes_classes', 'cityscapes_originalIds', 'dataset_aliases', 'get_classes', 14 | 'DistEvalHook', 'EvalHook', 'average_precision', 'eval_map', 15 | 'print_map_summary', 'eval_recalls', 'print_recall_summary', 16 | 'plot_num_recall', 'plot_iou_recall', 'save_panoptic_eval' 17 | ] 18 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 32 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 34 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 35 | for i in range(bboxes1.shape[0]): 36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 41 | y_end - y_start + 1, 0) 42 | if mode == 'iou': 43 | union = area1[i] + area2 - overlap 44 | else: 45 | union = area1[i] if not exchange else area2 46 | ious[i, :] = overlap / union 47 | if exchange: 48 | ious = ious.T 49 | return ious 50 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/eval_hooks.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from mmcv.runner import Hook 4 | from torch.utils.data import DataLoader 5 | 6 | 7 | class EvalHook(Hook): 8 | """Evaluation hook. 9 | 10 | Attributes: 11 | dataloader (DataLoader): A PyTorch dataloader. 12 | interval (int): Evaluation interval (by epochs). Default: 1. 13 | """ 14 | 15 | def __init__(self, dataloader, interval=1, **eval_kwargs): 16 | if not isinstance(dataloader, DataLoader): 17 | raise TypeError( 18 | 'dataloader must be a pytorch DataLoader, but got {}'.format( 19 | type(dataloader))) 20 | self.dataloader = dataloader 21 | self.interval = interval 22 | self.eval_kwargs = eval_kwargs 23 | 24 | def after_train_epoch(self, runner): 25 | if not self.every_n_epochs(runner, self.interval): 26 | return 27 | from mmdet.apis import single_gpu_test 28 | evalm = self.eval_kwargs['metric'] 29 | results = single_gpu_test(runner.model, self.dataloader, show=False, 30 | eval=evalm if evalm[0]=='panoptic' else None) 31 | self.evaluate(runner, results) 32 | 33 | def evaluate(self, runner, results): 34 | eval_res = self.dataloader.dataset.evaluate( 35 | results, logger=runner.logger, **self.eval_kwargs) 36 | for name, val in eval_res.items(): 37 | runner.log_buffer.output[name] = val 38 | runner.log_buffer.ready = True 39 | 40 | 41 | class DistEvalHook(EvalHook): 42 | """Distributed evaluation hook. 43 | 44 | Attributes: 45 | dataloader (DataLoader): A PyTorch dataloader. 46 | interval (int): Evaluation interval (by epochs). Default: 1. 47 | tmpdir (str | None): Temporary directory to save the results of all 48 | processes. Default: None. 49 | gpu_collect (bool): Whether to use gpu or cpu to collect results. 50 | Default: False. 51 | """ 52 | 53 | def __init__(self, 54 | dataloader, 55 | interval=1, 56 | gpu_collect=False, 57 | **eval_kwargs): 58 | if not isinstance(dataloader, DataLoader): 59 | raise TypeError( 60 | 'dataloader must be a pytorch DataLoader, but got {}'.format( 61 | type(dataloader))) 62 | self.dataloader = dataloader 63 | self.interval = interval 64 | self.gpu_collect = gpu_collect 65 | self.eval_kwargs = eval_kwargs 66 | 67 | def after_train_epoch(self, runner): 68 | if not self.every_n_epochs(runner, self.interval): 69 | return 70 | from mmdet.apis import multi_gpu_test 71 | evalm = self.eval_kwargs['metric'] 72 | results = multi_gpu_test( 73 | runner.model, 74 | self.dataloader, 75 | tmpdir=osp.join(runner.work_dir, '.eval_hook'), 76 | gpu_collect=self.gpu_collect, 77 | eval=evalm if evalm[0]=='panoptic' else None) 78 | 79 | if runner.rank == 0: 80 | print('\n') 81 | self.evaluate(runner, results) 82 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/panoptic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import json 4 | 5 | from . import cityscapes_originalIds 6 | from PIL import Image 7 | 8 | def createDir(path): 9 | if not os.path.exists(path): 10 | os.makedirs(path, exist_ok = True) 11 | 12 | def save_panoptic_eval(results): 13 | tmpDir = 'tmpDir' 14 | createDir(tmpDir) 15 | base_path = os.path.join(tmpDir, 'tmp') 16 | base_json = os.path.join(tmpDir, 'tmp_json') 17 | createDir(base_path) 18 | createDir(base_json) 19 | originalIds = cityscapes_originalIds() 20 | 21 | for result in results: 22 | images = [] 23 | annotations = [] 24 | pan_pred, cat_pred, meta = result 25 | pan_pred, cat_pred = pan_pred.numpy(), cat_pred.numpy() 26 | imgName = meta[0]['filename'].split('/')[-1] 27 | imageId = imgName.replace(".png", "") 28 | inputFileName = imgName 29 | outputFileName = imgName.replace(".png", "_panoptic.png") 30 | images.append({"id": imageId, 31 | "width": int(pan_pred.shape[1]), 32 | "height": int(pan_pred.shape[0]), 33 | "file_name": inputFileName}) 34 | 35 | pan_format = np.zeros( 36 | (pan_pred.shape[0], pan_pred.shape[1], 3), dtype=np.uint8 37 | ) 38 | 39 | panPredIds = np.unique(pan_pred) 40 | segmInfo = [] 41 | for panPredId in panPredIds: 42 | if cat_pred[panPredId] == 255: 43 | continue 44 | elif cat_pred[panPredId] <= 10: 45 | semanticId = segmentId = originalIds[cat_pred[panPredId]] 46 | else: 47 | semanticId = originalIds[cat_pred[panPredId]] 48 | segmentId = semanticId * 1000 + panPredId 49 | 50 | isCrowd = 0 51 | categoryId = semanticId 52 | 53 | mask = pan_pred == panPredId 54 | color = [segmentId % 256, segmentId // 256, segmentId // 256 // 256] 55 | pan_format[mask] = color 56 | 57 | area = np.sum(mask) 58 | 59 | # bbox computation for a segment 60 | hor = np.sum(mask, axis=0) 61 | hor_idx = np.nonzero(hor)[0] 62 | x = hor_idx[0] 63 | width = hor_idx[-1] - x + 1 64 | vert = np.sum(mask, axis=1) 65 | vert_idx = np.nonzero(vert)[0] 66 | y = vert_idx[0] 67 | height = vert_idx[-1] - y + 1 68 | bbox = [int(x), int(y), int(width), int(height)] 69 | 70 | segmInfo.append({"id": int(segmentId), 71 | "category_id": int(categoryId), 72 | "area": int(area), 73 | "bbox": bbox, 74 | "iscrowd": isCrowd}) 75 | annotations.append({'image_id': imageId, 76 | 'file_name': outputFileName, 77 | "segments_info": segmInfo}) 78 | 79 | Image.fromarray(pan_format).save(os.path.join(base_path, outputFileName)) 80 | d = {'images': images, 81 | 'annotations': annotations, 82 | 'categories': {}} 83 | with open(os.path.join(base_json, imageId + '.json'), 'w') as f: 84 | json.dump(d, f, sort_keys=True, indent=4) 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /mmdet/core/fp16/__init__.py: -------------------------------------------------------------------------------- 1 | from .decorators import auto_fp16, force_fp32 2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model 3 | 4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model'] 5 | -------------------------------------------------------------------------------- /mmdet/core/fp16/utils.py: -------------------------------------------------------------------------------- 1 | from collections import abc 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def cast_tensor_type(inputs, src_type, dst_type): 8 | if isinstance(inputs, torch.Tensor): 9 | return inputs.to(dst_type) 10 | elif isinstance(inputs, str): 11 | return inputs 12 | elif isinstance(inputs, np.ndarray): 13 | return inputs 14 | elif isinstance(inputs, abc.Mapping): 15 | return type(inputs)({ 16 | k: cast_tensor_type(v, src_type, dst_type) 17 | for k, v in inputs.items() 18 | }) 19 | elif isinstance(inputs, abc.Iterable): 20 | return type(inputs)( 21 | cast_tensor_type(item, src_type, dst_type) for item in inputs) 22 | else: 23 | return inputs 24 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .mask_target import mask_target 2 | from .utils import split_combined_polys 3 | 4 | __all__ = ['split_combined_polys', 'mask_target'] 5 | -------------------------------------------------------------------------------- /mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import torch 4 | from torch.nn.modules.utils import _pair 5 | 6 | 7 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 8 | cfg): 9 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 10 | mask_targets = map(mask_target_single, pos_proposals_list, 11 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 12 | mask_targets = torch.cat(list(mask_targets)) 13 | return mask_targets 14 | 15 | 16 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 17 | mask_size = _pair(cfg.mask_size) 18 | num_pos = pos_proposals.size(0) 19 | mask_targets = [] 20 | if num_pos > 0: 21 | proposals_np = pos_proposals.cpu().numpy() 22 | _, maxh, maxw = gt_masks.shape 23 | proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw - 1) 24 | proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh - 1) 25 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 26 | for i in range(num_pos): 27 | gt_mask = gt_masks[pos_assigned_gt_inds[i]] 28 | bbox = proposals_np[i, :].astype(np.int32) 29 | x1, y1, x2, y2 = bbox 30 | w = np.maximum(x2 - x1 + 1, 1) 31 | h = np.maximum(y2 - y1 + 1, 1) 32 | # mask is uint8 both before and after resizing 33 | # mask_size (h, w) to (w, h) 34 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], 35 | mask_size[::-1]) 36 | mask_targets.append(target) 37 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( 38 | pos_proposals.device) 39 | else: 40 | mask_targets = pos_proposals.new_zeros((0, ) + mask_size) 41 | return mask_targets 42 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def split_combined_polys(polys, poly_lens, polys_per_mask): 5 | """Split the combined 1-D polys into masks. 6 | 7 | A mask is represented as a list of polys, and a poly is represented as 8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 9 | tensor. Here we need to split the tensor into original representations. 10 | 11 | Args: 12 | polys (list): a list (length = image num) of 1-D tensors 13 | poly_lens (list): a list (length = image num) of poly length 14 | polys_per_mask (list): a list (length = image num) of poly number 15 | of each mask 16 | 17 | Returns: 18 | list: a list (length = image num) of list (length = mask num) of 19 | list (length = poly num) of numpy array 20 | """ 21 | mask_polys_list = [] 22 | for img_id in range(len(polys)): 23 | polys_single = polys[img_id] 24 | polys_lens_single = poly_lens[img_id].tolist() 25 | polys_per_mask_single = polys_per_mask[img_id].tolist() 26 | 27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 29 | mask_polys_list.append(mask_polys) 30 | return mask_polys_list 31 | -------------------------------------------------------------------------------- /mmdet/core/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_optimizer 2 | from .copy_of_sgd import CopyOfSGD 3 | from .registry import OPTIMIZERS 4 | 5 | __all__ = ['OPTIMIZERS', 'build_optimizer', 'CopyOfSGD'] 6 | -------------------------------------------------------------------------------- /mmdet/core/optimizer/copy_of_sgd.py: -------------------------------------------------------------------------------- 1 | from torch.optim import SGD 2 | 3 | from .registry import OPTIMIZERS 4 | 5 | 6 | @OPTIMIZERS.register_module 7 | class CopyOfSGD(SGD): 8 | """A clone of torch.optim.SGD. 9 | 10 | A customized optimizer could be defined like CopyOfSGD. 11 | You may derive from built-in optimizers in torch.optim, 12 | or directly implement a new optimizer. 13 | """ 14 | -------------------------------------------------------------------------------- /mmdet/core/optimizer/registry.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | import torch 4 | 5 | from mmdet.utils import Registry 6 | 7 | OPTIMIZERS = Registry('optimizer') 8 | 9 | 10 | def register_torch_optimizers(): 11 | torch_optimizers = [] 12 | for module_name in dir(torch.optim): 13 | if module_name.startswith('__'): 14 | continue 15 | _optim = getattr(torch.optim, module_name) 16 | if inspect.isclass(_optim) and issubclass(_optim, 17 | torch.optim.Optimizer): 18 | OPTIMIZERS.register_module(_optim) 19 | torch_optimizers.append(module_name) 20 | return torch_optimizers 21 | 22 | 23 | TORCH_OPTIMIZERS = register_torch_optimizers() 24 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms 2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks, 3 | merge_aug_proposals, merge_aug_scores) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores', 'merge_aug_masks' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/bbox_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.ops.nms import nms_wrapper 4 | 5 | 6 | def multiclass_nms(multi_bboxes, 7 | multi_scores, 8 | score_thr, 9 | nms_cfg, 10 | max_num=-1, 11 | score_factors=None): 12 | """NMS for multi-class bboxes. 13 | 14 | Args: 15 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) 16 | multi_scores (Tensor): shape (n, #class), where the 0th column 17 | contains scores of the background class, but this will be ignored. 18 | score_thr (float): bbox threshold, bboxes with scores lower than it 19 | will not be considered. 20 | nms_thr (float): NMS IoU threshold 21 | max_num (int): if there are more than max_num bboxes after NMS, 22 | only top max_num will be kept. 23 | score_factors (Tensor): The factors multiplied to scores before 24 | applying NMS 25 | 26 | Returns: 27 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels 28 | are 0-based. 29 | """ 30 | num_classes = multi_scores.size(1) - 1 31 | # exclude background category 32 | if multi_bboxes.shape[1] > 4: 33 | bboxes = multi_bboxes.view(multi_scores.size(0), -1, 4)[:, 1:] 34 | else: 35 | bboxes = multi_bboxes[:, None].expand(-1, num_classes, 4) 36 | scores = multi_scores[:, 1:] 37 | 38 | # filter out boxes with low scores 39 | valid_mask = scores > score_thr 40 | bboxes = bboxes[valid_mask] 41 | if score_factors is not None: 42 | scores = scores * score_factors[:, None] 43 | scores = scores[valid_mask] 44 | labels = valid_mask.nonzero(as_tuple=False)[:, 1] 45 | 46 | if bboxes.numel() == 0: 47 | bboxes = multi_bboxes.new_zeros((0, 5)) 48 | labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) 49 | return bboxes, labels 50 | 51 | # Modified from https://github.com/pytorch/vision/blob 52 | # /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. 53 | # strategy: in order to perform NMS independently per class. 54 | # we add an offset to all the boxes. The offset is dependent 55 | # only on the class idx, and is large enough so that boxes 56 | # from different classes do not overlap 57 | max_coordinate = bboxes.max() 58 | offsets = labels.to(bboxes) * (max_coordinate + 1) 59 | bboxes_for_nms = bboxes + offsets[:, None] 60 | nms_cfg_ = nms_cfg.copy() 61 | nms_type = nms_cfg_.pop('type', 'nms') 62 | nms_op = getattr(nms_wrapper, nms_type) 63 | dets, keep = nms_op( 64 | torch.cat([bboxes_for_nms, scores[:, None]], 1), **nms_cfg_) 65 | bboxes = bboxes[keep] 66 | scores = dets[:, -1] # soft_nms will modify scores 67 | labels = labels[keep] 68 | 69 | if keep.size(0) > max_num: 70 | _, inds = scores.sort(descending=True) 71 | inds = inds[:max_num] 72 | bboxes = bboxes[inds] 73 | scores = scores[inds] 74 | labels = labels[inds] 75 | 76 | return torch.cat([bboxes, scores[:, None]], 1), labels 77 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import DistOptimizerHook, allreduce_grads 2 | from .misc import multi_apply, tensor2imgs, unmap 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', 6 | 'multi_apply' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from mmcv.runner import OptimizerHook 5 | from torch._utils import (_flatten_dense_tensors, _take_tensors, 6 | _unflatten_dense_tensors) 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | if bucket_size_mb > 0: 11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 12 | buckets = _take_tensors(tensors, bucket_size_bytes) 13 | else: 14 | buckets = OrderedDict() 15 | for tensor in tensors: 16 | tp = tensor.type() 17 | if tp not in buckets: 18 | buckets[tp] = [] 19 | buckets[tp].append(tensor) 20 | buckets = buckets.values() 21 | 22 | for bucket in buckets: 23 | flat_tensors = _flatten_dense_tensors(bucket) 24 | dist.all_reduce(flat_tensors) 25 | flat_tensors.div_(world_size) 26 | for tensor, synced in zip( 27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data for param in params 34 | if param.requires_grad and param.grad is not None 35 | ] 36 | world_size = dist.get_world_size() 37 | if coalesce: 38 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 39 | else: 40 | for tensor in grads: 41 | dist.all_reduce(tensor.div_(world_size)) 42 | 43 | 44 | class DistOptimizerHook(OptimizerHook): 45 | 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs['loss'].backward() 54 | if self.grad_clip is not None: 55 | self.clip_grads(runner.model.parameters()) 56 | runner.optimizer.step() 57 | -------------------------------------------------------------------------------- /mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import mmcv 4 | import numpy as np 5 | from six.moves import map, zip 6 | 7 | 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 9 | num_imgs = tensor.size(0) 10 | mean = np.array(mean, dtype=np.float32) 11 | std = np.array(std, dtype=np.float32) 12 | imgs = [] 13 | for img_id in range(num_imgs): 14 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 15 | img = mmcv.imdenormalize( 16 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 17 | imgs.append(np.ascontiguousarray(img)) 18 | return imgs 19 | 20 | 21 | def multi_apply(func, *args, **kwargs): 22 | pfunc = partial(func, **kwargs) if kwargs else func 23 | map_results = map(pfunc, *args) 24 | return tuple(map(list, zip(*map_results))) 25 | 26 | 27 | def unmap(data, count, inds, fill=0): 28 | """ Unmap a subset of item (data) back to the original set of items (of 29 | size count) """ 30 | if data.dim() == 1: 31 | ret = data.new_full((count, ), fill) 32 | ret[inds] = data 33 | else: 34 | new_size = (count, ) + data.size()[1:] 35 | ret = data.new_full(new_size, fill) 36 | ret[inds, :] = data 37 | return ret 38 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_dataset 2 | from .cityscapes import CityscapesDataset 3 | from .coco import CocoDataset 4 | from .custom import CustomDataset 5 | from .dataset_wrappers import ConcatDataset, RepeatDataset 6 | from .loader import DistributedGroupSampler, GroupSampler, build_dataloader 7 | from .registry import DATASETS 8 | from .voc import VOCDataset 9 | from .wider_face import WIDERFaceDataset 10 | from .xml_style import XMLDataset 11 | 12 | __all__ = [ 13 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 14 | 'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler', 15 | 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'WIDERFaceDataset', 16 | 'DATASETS', 'build_dataset' 17 | ] 18 | -------------------------------------------------------------------------------- /mmdet/datasets/builder.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from mmdet.utils import build_from_cfg 4 | from .dataset_wrappers import ConcatDataset, RepeatDataset 5 | from .registry import DATASETS 6 | 7 | 8 | def _concat_dataset(cfg, default_args=None): 9 | ann_files = cfg['ann_file'] 10 | img_prefixes = cfg.get('img_prefix', None) 11 | seg_prefixes = cfg.get('seg_prefix', None) 12 | proposal_files = cfg.get('proposal_file', None) 13 | 14 | datasets = [] 15 | num_dset = len(ann_files) 16 | for i in range(num_dset): 17 | data_cfg = copy.deepcopy(cfg) 18 | data_cfg['ann_file'] = ann_files[i] 19 | if isinstance(img_prefixes, (list, tuple)): 20 | data_cfg['img_prefix'] = img_prefixes[i] 21 | if isinstance(seg_prefixes, (list, tuple)): 22 | data_cfg['seg_prefix'] = seg_prefixes[i] 23 | if isinstance(proposal_files, (list, tuple)): 24 | data_cfg['proposal_file'] = proposal_files[i] 25 | datasets.append(build_dataset(data_cfg, default_args)) 26 | 27 | return ConcatDataset(datasets) 28 | 29 | 30 | def build_dataset(cfg, default_args=None): 31 | if isinstance(cfg, (list, tuple)): 32 | dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) 33 | elif cfg['type'] == 'RepeatDataset': 34 | dataset = RepeatDataset( 35 | build_dataset(cfg['dataset'], default_args), cfg['times']) 36 | elif isinstance(cfg.get('ann_file'), (list, tuple)): 37 | dataset = _concat_dataset(cfg, default_args) 38 | else: 39 | dataset = build_from_cfg(cfg, DATASETS, default_args) 40 | 41 | return dataset 42 | -------------------------------------------------------------------------------- /mmdet/datasets/dataset_wrappers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | from .registry import DATASETS 5 | 6 | 7 | @DATASETS.register_module 8 | class ConcatDataset(_ConcatDataset): 9 | """A wrapper of concatenated dataset. 10 | 11 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 12 | concat the group flag for image aspect ratio. 13 | 14 | Args: 15 | datasets (list[:obj:`Dataset`]): A list of datasets. 16 | """ 17 | 18 | def __init__(self, datasets): 19 | super(ConcatDataset, self).__init__(datasets) 20 | self.CLASSES = datasets[0].CLASSES 21 | if hasattr(datasets[0], 'flag'): 22 | flags = [] 23 | for i in range(0, len(datasets)): 24 | flags.append(datasets[i].flag) 25 | self.flag = np.concatenate(flags) 26 | 27 | 28 | @DATASETS.register_module 29 | class RepeatDataset(object): 30 | """A wrapper of repeated dataset. 31 | 32 | The length of repeated dataset will be `times` larger than the original 33 | dataset. This is useful when the data loading time is long but the dataset 34 | is small. Using RepeatDataset can reduce the data loading time between 35 | epochs. 36 | 37 | Args: 38 | dataset (:obj:`Dataset`): The dataset to be repeated. 39 | times (int): Repeat times. 40 | """ 41 | 42 | def __init__(self, dataset, times): 43 | self.dataset = dataset 44 | self.times = times 45 | self.CLASSES = dataset.CLASSES 46 | if hasattr(self.dataset, 'flag'): 47 | self.flag = np.tile(self.dataset.flag, times) 48 | 49 | self._ori_len = len(self.dataset) 50 | 51 | def __getitem__(self, idx): 52 | return self.dataset[idx % self._ori_len] 53 | 54 | def __len__(self): 55 | return self.times * self._ori_len 56 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .build_loader import build_dataloader 2 | from .sampler import DistributedGroupSampler, GroupSampler 3 | 4 | __all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader'] 5 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | import platform 2 | import random 3 | from functools import partial 4 | 5 | import numpy as np 6 | from mmcv.parallel import collate 7 | from mmcv.runner import get_dist_info 8 | from torch.utils.data import DataLoader 9 | 10 | from .sampler import DistributedGroupSampler, DistributedSampler, GroupSampler 11 | 12 | if platform.system() != 'Windows': 13 | # https://github.com/pytorch/pytorch/issues/973 14 | import resource 15 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 16 | hard_limit = rlimit[1] 17 | soft_limit = min(4096, hard_limit) 18 | resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit)) 19 | 20 | 21 | def build_dataloader(dataset, 22 | imgs_per_gpu, 23 | workers_per_gpu, 24 | num_gpus=1, 25 | dist=True, 26 | shuffle=True, 27 | seed=None, 28 | **kwargs): 29 | """Build PyTorch DataLoader. 30 | 31 | In distributed training, each GPU/process has a dataloader. 32 | In non-distributed training, there is only one dataloader for all GPUs. 33 | 34 | Args: 35 | dataset (Dataset): A PyTorch dataset. 36 | imgs_per_gpu (int): Number of images on each GPU, i.e., batch size of 37 | each GPU. 38 | workers_per_gpu (int): How many subprocesses to use for data loading 39 | for each GPU. 40 | num_gpus (int): Number of GPUs. Only used in non-distributed training. 41 | dist (bool): Distributed training/test or not. Default: True. 42 | shuffle (bool): Whether to shuffle the data at every epoch. 43 | Default: True. 44 | kwargs: any keyword argument to be used to initialize DataLoader 45 | 46 | Returns: 47 | DataLoader: A PyTorch dataloader. 48 | """ 49 | rank, world_size = get_dist_info() 50 | if dist: 51 | # DistributedGroupSampler will definitely shuffle the data to satisfy 52 | # that images on each GPU are in the same group 53 | if shuffle: 54 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, 55 | world_size, rank) 56 | else: 57 | sampler = DistributedSampler( 58 | dataset, world_size, rank, shuffle=False) 59 | batch_size = imgs_per_gpu 60 | num_workers = workers_per_gpu 61 | else: 62 | sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None 63 | batch_size = num_gpus * imgs_per_gpu 64 | num_workers = num_gpus * workers_per_gpu 65 | 66 | init_fn = partial( 67 | worker_init_fn, num_workers=num_workers, rank=rank, 68 | seed=seed) if seed is not None else None 69 | 70 | data_loader = DataLoader( 71 | dataset, 72 | batch_size=batch_size, 73 | sampler=sampler, 74 | num_workers=num_workers, 75 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), 76 | pin_memory=False, 77 | worker_init_fn=init_fn, 78 | **kwargs) 79 | 80 | return data_loader 81 | 82 | 83 | def worker_init_fn(worker_id, num_workers, rank, seed): 84 | # The seed of each worker equals to 85 | # num_worker * rank + worker_id + user_seed 86 | worker_seed = num_workers * rank + worker_id + seed 87 | np.random.seed(worker_seed) 88 | random.seed(worker_seed) 89 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .auto_augment import (AutoAugment, BrightnessTransform, ColorTransform, 2 | ContrastTransform, EqualizeTransform, Rotate, Shear, 3 | Translate) 4 | 5 | from .compose import Compose 6 | from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor, 7 | Transpose, to_tensor) 8 | from .instaboost import InstaBoost 9 | from .loading import LoadAnnotations, LoadImageFromFile, LoadProposals 10 | from .test_aug import MultiScaleFlipAug 11 | from .transforms import (Albu, Expand, MinIoURandomCrop, Normalize, Pad, 12 | PhotoMetricDistortion, RandomCrop, RandomFlip, Resize, 13 | SegRescale) 14 | 15 | __all__ = [ 16 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', 17 | 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', 18 | 'LoadProposals', 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 19 | 'RandomCrop', 'Normalize', 'SegRescale', 'MinIoURandomCrop', 'Expand', 20 | 'PhotoMetricDistortion', 'Albu', 'InstaBoost', 'AutoAugment', 21 | 'BrightnessTransform', 'ColorTransform', 'ContrastTransform', 22 | 'EqualizeTransform', 'Rotate', 'Shear', 'Translate' 23 | ] 24 | 25 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from mmdet.utils import build_from_cfg 4 | from ..registry import PIPELINES 5 | 6 | 7 | @PIPELINES.register_module 8 | class Compose(object): 9 | 10 | def __init__(self, transforms): 11 | assert isinstance(transforms, collections.abc.Sequence) 12 | self.transforms = [] 13 | for transform in transforms: 14 | if isinstance(transform, dict): 15 | transform = build_from_cfg(transform, PIPELINES) 16 | self.transforms.append(transform) 17 | elif callable(transform): 18 | self.transforms.append(transform) 19 | else: 20 | raise TypeError('transform must be callable or a dict') 21 | 22 | def __call__(self, data): 23 | for t in self.transforms: 24 | data = t(data) 25 | if data is None: 26 | return None 27 | return data 28 | 29 | def __repr__(self): 30 | format_string = self.__class__.__name__ + '(' 31 | for t in self.transforms: 32 | format_string += '\n' 33 | format_string += ' {0}'.format(t) 34 | format_string += '\n)' 35 | return format_string 36 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/instaboost.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..registry import PIPELINES 4 | 5 | 6 | @PIPELINES.register_module 7 | class InstaBoost(object): 8 | """ 9 | Data augmentation method in paper "InstaBoost: Boosting Instance 10 | Segmentation Via Probability Map Guided Copy-Pasting" 11 | Implementation details can refer to https://github.com/GothicAi/Instaboost. 12 | """ 13 | 14 | def __init__(self, 15 | action_candidate=('normal', 'horizontal', 'skip'), 16 | action_prob=(1, 0, 0), 17 | scale=(0.8, 1.2), 18 | dx=15, 19 | dy=15, 20 | theta=(-1, 1), 21 | color_prob=0.5, 22 | hflag=False, 23 | aug_ratio=0.5): 24 | try: 25 | import instaboostfast as instaboost 26 | except ImportError: 27 | raise ImportError( 28 | 'Please run "pip install instaboostfast" ' 29 | 'to install instaboostfast first for instaboost augmentation.') 30 | self.cfg = instaboost.InstaBoostConfig(action_candidate, action_prob, 31 | scale, dx, dy, theta, 32 | color_prob, hflag) 33 | self.aug_ratio = aug_ratio 34 | 35 | def _load_anns(self, results): 36 | labels = results['ann_info']['labels'] 37 | masks = results['ann_info']['masks'] 38 | bboxes = results['ann_info']['bboxes'] 39 | n = len(labels) 40 | 41 | anns = [] 42 | for i in range(n): 43 | label = labels[i] 44 | bbox = bboxes[i] 45 | mask = masks[i] 46 | x1, y1, x2, y2 = bbox 47 | bbox = [x1, y1, x2 - x1 + 1, y2 - y1 + 1] 48 | anns.append({ 49 | 'category_id': label, 50 | 'segmentation': mask, 51 | 'bbox': bbox 52 | }) 53 | 54 | return anns 55 | 56 | def _parse_anns(self, results, anns, img): 57 | gt_bboxes = [] 58 | gt_labels = [] 59 | gt_masks_ann = [] 60 | for ann in anns: 61 | x1, y1, w, h = ann['bbox'] 62 | bbox = [x1, y1, x1 + w - 1, y1 + h - 1] 63 | gt_bboxes.append(bbox) 64 | gt_labels.append(ann['category_id']) 65 | gt_masks_ann.append(ann['segmentation']) 66 | gt_bboxes = np.array(gt_bboxes, dtype=np.float32) 67 | gt_labels = np.array(gt_labels, dtype=np.int64) 68 | results['ann_info']['labels'] = gt_labels 69 | results['ann_info']['bboxes'] = gt_bboxes 70 | results['ann_info']['masks'] = gt_masks_ann 71 | results['img'] = img 72 | return results 73 | 74 | def __call__(self, results): 75 | img = results['img'] 76 | anns = self._load_anns(results) 77 | if np.random.choice([0, 1], p=[1 - self.aug_ratio, self.aug_ratio]): 78 | try: 79 | import instaboostfast as instaboost 80 | except ImportError: 81 | raise ImportError('Please run "pip install instaboostfast" ' 82 | 'to install instaboostfast first.') 83 | anns, img = instaboost.get_new_data( 84 | anns, img, self.cfg, background=None) 85 | results = self._parse_anns(results, anns, img) 86 | return results 87 | 88 | def __repr__(self): 89 | repr_str = self.__class__.__name__ 90 | repr_str += ('(cfg={}, aug_ratio={})').format(self.cfg, self.aug_ratio) 91 | return repr_str 92 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/test_aug.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from ..registry import PIPELINES 4 | from .compose import Compose 5 | 6 | 7 | @PIPELINES.register_module 8 | class MultiScaleFlipAug(object): 9 | 10 | def __init__(self, transforms, img_scale, flip=False): 11 | self.transforms = Compose(transforms) 12 | self.img_scale = img_scale if isinstance(img_scale, 13 | list) else [img_scale] 14 | assert mmcv.is_list_of(self.img_scale, tuple) 15 | self.flip = flip 16 | 17 | def __call__(self, results): 18 | aug_data = [] 19 | flip_aug = [False, True] if self.flip else [False] 20 | for scale in self.img_scale: 21 | for flip in flip_aug: 22 | _results = results.copy() 23 | _results['scale'] = scale 24 | _results['flip'] = flip 25 | data = self.transforms(_results) 26 | aug_data.append(data) 27 | # list of dict to dict of list 28 | aug_data_dict = {key: [] for key in aug_data[0]} 29 | for data in aug_data: 30 | for key, val in data.items(): 31 | aug_data_dict[key].append(val) 32 | return aug_data_dict 33 | 34 | def __repr__(self): 35 | repr_str = self.__class__.__name__ 36 | repr_str += '(transforms={}, img_scale={}, flip={})'.format( 37 | self.transforms, self.img_scale, self.flip) 38 | return repr_str 39 | -------------------------------------------------------------------------------- /mmdet/datasets/registry.py: -------------------------------------------------------------------------------- 1 | from mmdet.utils import Registry 2 | 3 | DATASETS = Registry('dataset') 4 | PIPELINES = Registry('pipeline') 5 | -------------------------------------------------------------------------------- /mmdet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from mmdet.core import eval_map, eval_recalls 2 | from .registry import DATASETS 3 | from .xml_style import XMLDataset 4 | 5 | 6 | @DATASETS.register_module 7 | class VOCDataset(XMLDataset): 8 | 9 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 10 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 11 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 12 | 'tvmonitor') 13 | 14 | def __init__(self, **kwargs): 15 | super(VOCDataset, self).__init__(**kwargs) 16 | if 'VOC2007' in self.img_prefix: 17 | self.year = 2007 18 | elif 'VOC2012' in self.img_prefix: 19 | self.year = 2012 20 | else: 21 | raise ValueError('Cannot infer dataset year from img_prefix') 22 | 23 | def evaluate(self, 24 | results, 25 | metric='mAP', 26 | logger=None, 27 | proposal_nums=(100, 300, 1000), 28 | iou_thr=0.5, 29 | scale_ranges=None): 30 | if not isinstance(metric, str): 31 | assert len(metric) == 1 32 | metric = metric[0] 33 | allowed_metrics = ['mAP', 'recall'] 34 | if metric not in allowed_metrics: 35 | raise KeyError('metric {} is not supported'.format(metric)) 36 | annotations = [self.get_ann_info(i) for i in range(len(self))] 37 | eval_results = {} 38 | if metric == 'mAP': 39 | assert isinstance(iou_thr, float) 40 | if self.year == 2007: 41 | ds_name = 'voc07' 42 | else: 43 | ds_name = self.dataset.CLASSES 44 | mean_ap, _ = eval_map( 45 | results, 46 | annotations, 47 | scale_ranges=None, 48 | iou_thr=iou_thr, 49 | dataset=ds_name, 50 | logger=logger) 51 | eval_results['mAP'] = mean_ap 52 | elif metric == 'recall': 53 | gt_bboxes = [ann['bboxes'] for ann in annotations] 54 | if isinstance(iou_thr, float): 55 | iou_thr = [iou_thr] 56 | recalls = eval_recalls( 57 | gt_bboxes, results, proposal_nums, iou_thr, logger=logger) 58 | for i, num in enumerate(proposal_nums): 59 | for j, iou in enumerate(iou_thr): 60 | eval_results['recall@{}@{}'.format(num, iou)] = recalls[i, 61 | j] 62 | if recalls.shape[1] > 1: 63 | ar = recalls.mean(axis=1) 64 | for i, num in enumerate(proposal_nums): 65 | eval_results['AR@{}'.format(num)] = ar[i] 66 | return eval_results 67 | -------------------------------------------------------------------------------- /mmdet/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | 6 | from .registry import DATASETS 7 | from .xml_style import XMLDataset 8 | 9 | 10 | @DATASETS.register_module 11 | class WIDERFaceDataset(XMLDataset): 12 | """ 13 | Reader for the WIDER Face dataset in PASCAL VOC format. 14 | Conversion scripts can be found in 15 | https://github.com/sovrasov/wider-face-pascal-voc-annotations 16 | """ 17 | CLASSES = ('face', ) 18 | 19 | def __init__(self, **kwargs): 20 | super(WIDERFaceDataset, self).__init__(**kwargs) 21 | 22 | def load_annotations(self, ann_file): 23 | img_infos = [] 24 | img_ids = mmcv.list_from_file(ann_file) 25 | for img_id in img_ids: 26 | filename = '{}.jpg'.format(img_id) 27 | xml_path = osp.join(self.img_prefix, 'Annotations', 28 | '{}.xml'.format(img_id)) 29 | tree = ET.parse(xml_path) 30 | root = tree.getroot() 31 | size = root.find('size') 32 | width = int(size.find('width').text) 33 | height = int(size.find('height').text) 34 | folder = root.find('folder').text 35 | img_infos.append( 36 | dict( 37 | id=img_id, 38 | filename=osp.join(folder, filename), 39 | width=width, 40 | height=height)) 41 | 42 | return img_infos 43 | -------------------------------------------------------------------------------- /mmdet/datasets/xml_style.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | from .custom import CustomDataset 8 | from .registry import DATASETS 9 | 10 | 11 | @DATASETS.register_module 12 | class XMLDataset(CustomDataset): 13 | 14 | def __init__(self, min_size=None, **kwargs): 15 | super(XMLDataset, self).__init__(**kwargs) 16 | self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)} 17 | self.min_size = min_size 18 | 19 | def load_annotations(self, ann_file): 20 | img_infos = [] 21 | img_ids = mmcv.list_from_file(ann_file) 22 | for img_id in img_ids: 23 | filename = 'JPEGImages/{}.jpg'.format(img_id) 24 | xml_path = osp.join(self.img_prefix, 'Annotations', 25 | '{}.xml'.format(img_id)) 26 | tree = ET.parse(xml_path) 27 | root = tree.getroot() 28 | size = root.find('size') 29 | width = int(size.find('width').text) 30 | height = int(size.find('height').text) 31 | img_infos.append( 32 | dict(id=img_id, filename=filename, width=width, height=height)) 33 | return img_infos 34 | 35 | def get_ann_info(self, idx): 36 | img_id = self.img_infos[idx]['id'] 37 | xml_path = osp.join(self.img_prefix, 'Annotations', 38 | '{}.xml'.format(img_id)) 39 | tree = ET.parse(xml_path) 40 | root = tree.getroot() 41 | bboxes = [] 42 | labels = [] 43 | bboxes_ignore = [] 44 | labels_ignore = [] 45 | for obj in root.findall('object'): 46 | name = obj.find('name').text 47 | label = self.cat2label[name] 48 | difficult = int(obj.find('difficult').text) 49 | bnd_box = obj.find('bndbox') 50 | # Coordinates may be float type 51 | bbox = [ 52 | int(float(bnd_box.find('xmin').text)), 53 | int(float(bnd_box.find('ymin').text)), 54 | int(float(bnd_box.find('xmax').text)), 55 | int(float(bnd_box.find('ymax').text)) 56 | ] 57 | ignore = False 58 | if self.min_size: 59 | assert not self.test_mode 60 | w = bbox[2] - bbox[0] 61 | h = bbox[3] - bbox[1] 62 | if w < self.min_size or h < self.min_size: 63 | ignore = True 64 | if difficult or ignore: 65 | bboxes_ignore.append(bbox) 66 | labels_ignore.append(label) 67 | else: 68 | bboxes.append(bbox) 69 | labels.append(label) 70 | if not bboxes: 71 | bboxes = np.zeros((0, 4)) 72 | labels = np.zeros((0, )) 73 | else: 74 | bboxes = np.array(bboxes, ndmin=2) - 1 75 | labels = np.array(labels) 76 | if not bboxes_ignore: 77 | bboxes_ignore = np.zeros((0, 4)) 78 | labels_ignore = np.zeros((0, )) 79 | else: 80 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 81 | labels_ignore = np.array(labels_ignore) 82 | ann = dict( 83 | bboxes=bboxes.astype(np.float32), 84 | labels=labels.astype(np.int64), 85 | bboxes_ignore=bboxes_ignore.astype(np.float32), 86 | labels_ignore=labels_ignore.astype(np.int64)) 87 | return ann 88 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_heads import * # noqa: F401,F403 2 | from .backbones import * # noqa: F401,F403 3 | from .bbox_heads import * # noqa: F401,F403 4 | from .builder import (build_backbone, build_detector, build_head, build_loss, 5 | build_neck, build_roi_extractor, build_shared_head) 6 | from .efficientps import * # noqa: F401,F403 7 | from .losses import * # noqa: F401,F403 8 | from .mask_heads import * # noqa: F401,F403 9 | from .necks import * # noqa: F401,F403 10 | from .registry import (BACKBONES, EFFICIENTPS, HEADS, LOSSES, NECKS, 11 | ROI_EXTRACTORS, SHARED_HEADS) 12 | from .roi_extractors import * # noqa: F401,F403 13 | from .shared_heads import * # noqa: F401,F403 14 | 15 | __all__ = [ 16 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 17 | 'EFFICIENTPS', 'build_backbone', 'build_neck', 'build_roi_extractor', 18 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector' 19 | ] 20 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .atss_head import ATSSHead 3 | from .fcos_head import FCOSHead 4 | from .fovea_head import FoveaHead 5 | from .free_anchor_retina_head import FreeAnchorRetinaHead 6 | from .ga_retina_head import GARetinaHead 7 | from .ga_rpn_head import GARPNHead 8 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead 9 | from .reppoints_head import RepPointsHead 10 | from .retina_head import RetinaHead 11 | from .retina_sepbn_head import RetinaSepBNHead 12 | from .rpn_head import RPNHead 13 | from .sep_rpn_head import SepRPNHead 14 | from .ssd_head import SSDHead 15 | 16 | __all__ = [ 17 | 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead', 'SepRPNHead', 18 | 'GARPNHead', 'RetinaHead', 'RetinaSepBNHead', 'GARetinaHead', 'SSDHead', 19 | 'FCOSHead', 'RepPointsHead', 'FoveaHead', 'FreeAnchorRetinaHead', 20 | 'ATSSHead' 21 | ] 22 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import ResNet, make_res_layer 2 | 3 | __all__ = ['ResNet', 'make_res_layer'] 4 | -------------------------------------------------------------------------------- /mmdet/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | from .double_bbox_head import DoubleConvFCBBoxHead 4 | 5 | __all__ = [ 6 | 'BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'DoubleConvFCBBoxHead' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from mmdet.utils import build_from_cfg 4 | from .registry import (BACKBONES, EFFICIENTPS, HEADS, LOSSES, NECKS, 5 | ROI_EXTRACTORS, SHARED_HEADS) 6 | 7 | 8 | def build(cfg, registry, default_args=None): 9 | if isinstance(cfg, list): 10 | modules = [ 11 | build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg 12 | ] 13 | return nn.Sequential(*modules) 14 | else: 15 | return build_from_cfg(cfg, registry, default_args) 16 | 17 | 18 | def build_backbone(cfg): 19 | return build(cfg, BACKBONES) 20 | 21 | 22 | def build_neck(cfg): 23 | return build(cfg, NECKS) 24 | 25 | 26 | def build_roi_extractor(cfg): 27 | return build(cfg, ROI_EXTRACTORS) 28 | 29 | 30 | def build_shared_head(cfg): 31 | return build(cfg, SHARED_HEADS) 32 | 33 | 34 | def build_head(cfg): 35 | return build(cfg, HEADS) 36 | 37 | 38 | def build_loss(cfg): 39 | return build(cfg, LOSSES) 40 | 41 | 42 | def build_detector(cfg, train_cfg=None, test_cfg=None): 43 | return build(cfg, EFFICIENTPS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 44 | -------------------------------------------------------------------------------- /mmdet/models/efficientps/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .rpn import RPN 3 | from .two_stage import TwoStageDetector 4 | from .efficientPS import EfficientPS 5 | 6 | __all__ = [ 7 | 'BaseDetector', 'TwoStageDetector', 'RPN', 'EfficientPS', 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/models/efficientps/rpn.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from mmdet.core import bbox_mapping, tensor2imgs 4 | from .. import builder 5 | from ..registry import EFFICIENTPS 6 | from .base import BaseDetector 7 | from .test_mixins import RPNTestMixin 8 | 9 | 10 | @EFFICIENTPS.register_module 11 | class RPN(BaseDetector, RPNTestMixin): 12 | 13 | def __init__(self, 14 | backbone, 15 | neck, 16 | rpn_head, 17 | train_cfg, 18 | test_cfg, 19 | pretrained=None): 20 | super(RPN, self).__init__() 21 | self.backbone = builder.build_backbone(backbone) 22 | self.neck = builder.build_neck(neck) if neck is not None else None 23 | self.rpn_head = builder.build_head(rpn_head) 24 | self.train_cfg = train_cfg 25 | self.test_cfg = test_cfg 26 | self.init_weights(pretrained=pretrained) 27 | 28 | def init_weights(self, pretrained=None): 29 | super(RPN, self).init_weights(pretrained) 30 | self.backbone.init_weights(pretrained=pretrained) 31 | if self.with_neck: 32 | self.neck.init_weights() 33 | self.rpn_head.init_weights() 34 | 35 | def extract_feat(self, img): 36 | x = self.backbone(img) 37 | if self.with_neck: 38 | x = self.neck(x) 39 | return x 40 | 41 | def forward_dummy(self, img): 42 | x = self.extract_feat(img) 43 | rpn_outs = self.rpn_head(x) 44 | return rpn_outs 45 | 46 | def forward_train(self, 47 | img, 48 | img_metas, 49 | gt_bboxes=None, 50 | gt_bboxes_ignore=None): 51 | if self.train_cfg.rpn.get('debug', False): 52 | self.rpn_head.debug_imgs = tensor2imgs(img) 53 | 54 | x = self.extract_feat(img) 55 | rpn_outs = self.rpn_head(x) 56 | 57 | rpn_loss_inputs = rpn_outs + (gt_bboxes, img_metas, self.train_cfg.rpn) 58 | losses = self.rpn_head.loss( 59 | *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 60 | return losses 61 | 62 | def simple_test(self, img, img_metas, rescale=False): 63 | x = self.extract_feat(img) 64 | proposal_list = self.simple_test_rpn(x, img_metas, self.test_cfg.rpn) 65 | if rescale: 66 | for proposals, meta in zip(proposal_list, img_metas): 67 | proposals[:, :4] /= meta['scale_factor'] 68 | # TODO: remove this restriction 69 | return proposal_list[0].cpu().numpy() 70 | 71 | def aug_test(self, imgs, img_metas, rescale=False): 72 | proposal_list = self.aug_test_rpn( 73 | self.extract_feats(imgs), img_metas, self.test_cfg.rpn) 74 | if not rescale: 75 | for proposals, img_meta in zip(proposal_list, img_metas[0]): 76 | img_shape = img_meta['img_shape'] 77 | scale_factor = img_meta['scale_factor'] 78 | flip = img_meta['flip'] 79 | proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, 80 | scale_factor, flip) 81 | # TODO: remove this restriction 82 | return proposal_list[0].cpu().numpy() 83 | 84 | def show_result(self, data, result, dataset=None, top_k=20): 85 | """Show RPN proposals on the image. 86 | 87 | Although we assume batch size is 1, this method supports arbitrary 88 | batch size. 89 | """ 90 | img_tensor = data['img'][0] 91 | img_metas = data['img_metas'][0].data[0] 92 | imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) 93 | assert len(imgs) == len(img_metas) 94 | for img, img_meta in zip(imgs, img_metas): 95 | h, w, _ = img_meta['img_shape'] 96 | img_show = img[:h, :w, :] 97 | mmcv.imshow_bboxes(img_show, result, top_k=top_k) 98 | -------------------------------------------------------------------------------- /mmdet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import Accuracy, accuracy 2 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss 3 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 4 | cross_entropy, mask_cross_entropy) 5 | from .focal_loss import FocalLoss, sigmoid_focal_loss 6 | from .ghm_loss import GHMC, GHMR 7 | from .iou_loss import (BoundedIoULoss, GIoULoss, IoULoss, bounded_iou_loss, 8 | iou_loss) 9 | from .mse_loss import MSELoss, mse_loss 10 | from .smooth_l1_loss import SmoothL1Loss, smooth_l1_loss 11 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 12 | 13 | __all__ = [ 14 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 15 | 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss', 16 | 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss', 17 | 'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss', 18 | 'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'GHMC', 'GHMR', 'reduce_loss', 19 | 'weight_reduce_loss', 'weighted_loss' 20 | ] 21 | -------------------------------------------------------------------------------- /mmdet/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def accuracy(pred, target, topk=1): 5 | assert isinstance(topk, (int, tuple)) 6 | if isinstance(topk, int): 7 | topk = (topk, ) 8 | return_single = True 9 | else: 10 | return_single = False 11 | 12 | maxk = max(topk) 13 | _, pred_label = pred.topk(maxk, dim=1) 14 | pred_label = pred_label.t() 15 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 16 | 17 | res = [] 18 | for k in topk: 19 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 20 | res.append(correct_k.mul_(100.0 / pred.size(0))) 21 | return res[0] if return_single else res 22 | 23 | 24 | class Accuracy(nn.Module): 25 | 26 | def __init__(self, topk=(1, )): 27 | super().__init__() 28 | self.topk = topk 29 | 30 | def forward(self, pred, target): 31 | return accuracy(pred, target, self.topk) 32 | -------------------------------------------------------------------------------- /mmdet/models/losses/balanced_l1_loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from ..registry import LOSSES 6 | from .utils import weighted_loss 7 | 8 | 9 | @weighted_loss 10 | def balanced_l1_loss(pred, 11 | target, 12 | beta=1.0, 13 | alpha=0.5, 14 | gamma=1.5, 15 | reduction='mean'): 16 | assert beta > 0 17 | assert pred.size() == target.size() and target.numel() > 0 18 | 19 | diff = torch.abs(pred - target) 20 | b = np.e**(gamma / alpha) - 1 21 | loss = torch.where( 22 | diff < beta, alpha / b * 23 | (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff, 24 | gamma * diff + gamma / b - alpha * beta) 25 | 26 | return loss 27 | 28 | 29 | @LOSSES.register_module 30 | class BalancedL1Loss(nn.Module): 31 | """Balanced L1 Loss 32 | 33 | arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019) 34 | """ 35 | 36 | def __init__(self, 37 | alpha=0.5, 38 | gamma=1.5, 39 | beta=1.0, 40 | reduction='mean', 41 | loss_weight=1.0): 42 | super(BalancedL1Loss, self).__init__() 43 | self.alpha = alpha 44 | self.gamma = gamma 45 | self.beta = beta 46 | self.reduction = reduction 47 | self.loss_weight = loss_weight 48 | 49 | def forward(self, 50 | pred, 51 | target, 52 | weight=None, 53 | avg_factor=None, 54 | reduction_override=None, 55 | **kwargs): 56 | assert reduction_override in (None, 'none', 'mean', 'sum') 57 | reduction = ( 58 | reduction_override if reduction_override else self.reduction) 59 | loss_bbox = self.loss_weight * balanced_l1_loss( 60 | pred, 61 | target, 62 | weight, 63 | alpha=self.alpha, 64 | gamma=self.gamma, 65 | beta=self.beta, 66 | reduction=reduction, 67 | avg_factor=avg_factor, 68 | **kwargs) 69 | return loss_bbox 70 | -------------------------------------------------------------------------------- /mmdet/models/losses/cross_entropy_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from ..registry import LOSSES 6 | from .utils import weight_reduce_loss 7 | 8 | 9 | def cross_entropy(pred, label, weight=None, reduction='mean', avg_factor=None): 10 | # element-wise losses 11 | loss = F.cross_entropy(pred, label, reduction='none') 12 | 13 | # apply weights and do the reduction 14 | if weight is not None: 15 | weight = weight.float() 16 | loss = weight_reduce_loss( 17 | loss, weight=weight, reduction=reduction, avg_factor=avg_factor) 18 | 19 | return loss 20 | 21 | 22 | def _expand_binary_labels(labels, label_weights, label_channels): 23 | bin_labels = labels.new_full((labels.size(0), label_channels), 0) 24 | inds = torch.nonzero(labels >= 1).squeeze() 25 | if inds.numel() > 0: 26 | bin_labels[inds, labels[inds] - 1] = 1 27 | if label_weights is None: 28 | bin_label_weights = None 29 | else: 30 | bin_label_weights = label_weights.view(-1, 1).expand( 31 | label_weights.size(0), label_channels) 32 | return bin_labels, bin_label_weights 33 | 34 | 35 | def binary_cross_entropy(pred, 36 | label, 37 | weight=None, 38 | reduction='mean', 39 | avg_factor=None): 40 | if pred.dim() != label.dim(): 41 | label, weight = _expand_binary_labels(label, weight, pred.size(-1)) 42 | 43 | # weighted element-wise losses 44 | if weight is not None: 45 | weight = weight.float() 46 | loss = F.binary_cross_entropy_with_logits( 47 | pred, label.float(), weight, reduction='none') 48 | # do the reduction for the weighted loss 49 | loss = weight_reduce_loss(loss, reduction=reduction, avg_factor=avg_factor) 50 | 51 | return loss 52 | 53 | 54 | def mask_cross_entropy(pred, target, label, reduction='mean', avg_factor=None): 55 | # TODO: handle these two reserved arguments 56 | assert reduction == 'mean' and avg_factor is None 57 | num_rois = pred.size()[0] 58 | inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) 59 | pred_slice = pred[inds, label].squeeze(1) 60 | return F.binary_cross_entropy_with_logits( 61 | pred_slice, target, reduction='mean')[None] 62 | 63 | 64 | @LOSSES.register_module 65 | class CrossEntropyLoss(nn.Module): 66 | 67 | def __init__(self, 68 | use_sigmoid=False, 69 | use_mask=False, 70 | reduction='mean', 71 | loss_weight=1.0): 72 | super(CrossEntropyLoss, self).__init__() 73 | assert (use_sigmoid is False) or (use_mask is False) 74 | self.use_sigmoid = use_sigmoid 75 | self.use_mask = use_mask 76 | self.reduction = reduction 77 | self.loss_weight = loss_weight 78 | 79 | if self.use_sigmoid: 80 | self.cls_criterion = binary_cross_entropy 81 | elif self.use_mask: 82 | self.cls_criterion = mask_cross_entropy 83 | else: 84 | self.cls_criterion = cross_entropy 85 | 86 | def forward(self, 87 | cls_score, 88 | label, 89 | weight=None, 90 | avg_factor=None, 91 | reduction_override=None, 92 | **kwargs): 93 | assert reduction_override in (None, 'none', 'mean', 'sum') 94 | reduction = ( 95 | reduction_override if reduction_override else self.reduction) 96 | loss_cls = self.loss_weight * self.cls_criterion( 97 | cls_score, 98 | label, 99 | weight, 100 | reduction=reduction, 101 | avg_factor=avg_factor, 102 | **kwargs) 103 | return loss_cls 104 | -------------------------------------------------------------------------------- /mmdet/models/losses/focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss 5 | from ..registry import LOSSES 6 | from .utils import weight_reduce_loss 7 | 8 | 9 | # This method is only for debugging 10 | def py_sigmoid_focal_loss(pred, 11 | target, 12 | weight=None, 13 | gamma=2.0, 14 | alpha=0.25, 15 | reduction='mean', 16 | avg_factor=None): 17 | pred_sigmoid = pred.sigmoid() 18 | target = target.type_as(pred) 19 | pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) 20 | focal_weight = (alpha * target + (1 - alpha) * 21 | (1 - target)) * pt.pow(gamma) 22 | loss = F.binary_cross_entropy_with_logits( 23 | pred, target, reduction='none') * focal_weight 24 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 25 | return loss 26 | 27 | 28 | def sigmoid_focal_loss(pred, 29 | target, 30 | weight=None, 31 | gamma=2.0, 32 | alpha=0.25, 33 | reduction='mean', 34 | avg_factor=None): 35 | # Function.apply does not accept keyword arguments, so the decorator 36 | # "weighted_loss" is not applicable 37 | loss = _sigmoid_focal_loss(pred, target, gamma, alpha) 38 | # TODO: find a proper way to handle the shape of weight 39 | if weight is not None: 40 | weight = weight.view(-1, 1) 41 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 42 | return loss 43 | 44 | 45 | @LOSSES.register_module 46 | class FocalLoss(nn.Module): 47 | 48 | def __init__(self, 49 | use_sigmoid=True, 50 | gamma=2.0, 51 | alpha=0.25, 52 | reduction='mean', 53 | loss_weight=1.0): 54 | super(FocalLoss, self).__init__() 55 | assert use_sigmoid is True, 'Only sigmoid focal loss supported now.' 56 | self.use_sigmoid = use_sigmoid 57 | self.gamma = gamma 58 | self.alpha = alpha 59 | self.reduction = reduction 60 | self.loss_weight = loss_weight 61 | 62 | def forward(self, 63 | pred, 64 | target, 65 | weight=None, 66 | avg_factor=None, 67 | reduction_override=None): 68 | assert reduction_override in (None, 'none', 'mean', 'sum') 69 | reduction = ( 70 | reduction_override if reduction_override else self.reduction) 71 | if self.use_sigmoid: 72 | loss_cls = self.loss_weight * sigmoid_focal_loss( 73 | pred, 74 | target, 75 | weight, 76 | gamma=self.gamma, 77 | alpha=self.alpha, 78 | reduction=reduction, 79 | avg_factor=avg_factor) 80 | else: 81 | raise NotImplementedError 82 | return loss_cls 83 | -------------------------------------------------------------------------------- /mmdet/models/losses/mse_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from ..registry import LOSSES 5 | from .utils import weighted_loss 6 | 7 | 8 | @weighted_loss 9 | def mse_loss(pred, target): 10 | return F.mse_loss(pred, target, reduction='none') 11 | 12 | 13 | @LOSSES.register_module 14 | class MSELoss(nn.Module): 15 | 16 | def __init__(self, reduction='mean', loss_weight=1.0): 17 | super().__init__() 18 | self.reduction = reduction 19 | self.loss_weight = loss_weight 20 | 21 | def forward(self, pred, target, weight=None, avg_factor=None): 22 | loss = self.loss_weight * mse_loss( 23 | pred, 24 | target, 25 | weight, 26 | reduction=self.reduction, 27 | avg_factor=avg_factor) 28 | return loss 29 | -------------------------------------------------------------------------------- /mmdet/models/losses/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from ..registry import LOSSES 5 | from .utils import weighted_loss 6 | 7 | 8 | @weighted_loss 9 | def smooth_l1_loss(pred, target, beta=1.0): 10 | assert beta > 0 11 | assert pred.size() == target.size() and target.numel() > 0 12 | diff = torch.abs(pred - target) 13 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, 14 | diff - 0.5 * beta) 15 | return loss 16 | 17 | 18 | @LOSSES.register_module 19 | class SmoothL1Loss(nn.Module): 20 | 21 | def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0): 22 | super(SmoothL1Loss, self).__init__() 23 | self.beta = beta 24 | self.reduction = reduction 25 | self.loss_weight = loss_weight 26 | 27 | def forward(self, 28 | pred, 29 | target, 30 | weight=None, 31 | avg_factor=None, 32 | reduction_override=None, 33 | **kwargs): 34 | assert reduction_override in (None, 'none', 'mean', 'sum') 35 | reduction = ( 36 | reduction_override if reduction_override else self.reduction) 37 | loss_bbox = self.loss_weight * smooth_l1_loss( 38 | pred, 39 | target, 40 | weight, 41 | beta=self.beta, 42 | reduction=reduction, 43 | avg_factor=avg_factor, 44 | **kwargs) 45 | return loss_bbox 46 | -------------------------------------------------------------------------------- /mmdet/models/losses/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import torch.nn.functional as F 4 | 5 | 6 | def reduce_loss(loss, reduction): 7 | """Reduce loss as specified. 8 | 9 | Args: 10 | loss (Tensor): Elementwise loss tensor. 11 | reduction (str): Options are "none", "mean" and "sum". 12 | 13 | Return: 14 | Tensor: Reduced loss tensor. 15 | """ 16 | reduction_enum = F._Reduction.get_enum(reduction) 17 | # none: 0, elementwise_mean:1, sum: 2 18 | if reduction_enum == 0: 19 | return loss 20 | elif reduction_enum == 1: 21 | return loss.mean() 22 | elif reduction_enum == 2: 23 | return loss.sum() 24 | 25 | 26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 27 | """Apply element-wise weight and reduce loss. 28 | 29 | Args: 30 | loss (Tensor): Element-wise loss. 31 | weight (Tensor): Element-wise weights. 32 | reduction (str): Same as built-in losses of PyTorch. 33 | avg_factor (float): Avarage factor when computing the mean of losses. 34 | 35 | Returns: 36 | Tensor: Processed loss values. 37 | """ 38 | # if weight is specified, apply element-wise weight 39 | if weight is not None: 40 | loss = loss * weight 41 | 42 | # if avg_factor is not specified, just reduce the loss 43 | if avg_factor is None: 44 | loss = reduce_loss(loss, reduction) 45 | else: 46 | # if reduction is mean, then average the loss by avg_factor 47 | if reduction == 'mean': 48 | loss = loss.sum() / avg_factor 49 | # if reduction is 'none', then do nothing, otherwise raise an error 50 | elif reduction != 'none': 51 | raise ValueError('avg_factor can not be used with reduction="sum"') 52 | return loss 53 | 54 | 55 | def weighted_loss(loss_func): 56 | """Create a weighted version of a given loss function. 57 | 58 | To use this decorator, the loss function must have the signature like 59 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 60 | element-wise loss without any reduction. This decorator will add weight 61 | and reduction arguments to the function. The decorated function will have 62 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 63 | avg_factor=None, **kwargs)`. 64 | 65 | :Example: 66 | 67 | >>> import torch 68 | >>> @weighted_loss 69 | >>> def l1_loss(pred, target): 70 | >>> return (pred - target).abs() 71 | 72 | >>> pred = torch.Tensor([0, 2, 3]) 73 | >>> target = torch.Tensor([1, 1, 1]) 74 | >>> weight = torch.Tensor([1, 0, 1]) 75 | 76 | >>> l1_loss(pred, target) 77 | tensor(1.3333) 78 | >>> l1_loss(pred, target, weight) 79 | tensor(1.) 80 | >>> l1_loss(pred, target, reduction='none') 81 | tensor([1., 1., 2.]) 82 | >>> l1_loss(pred, target, weight, avg_factor=2) 83 | tensor(1.5000) 84 | """ 85 | 86 | @functools.wraps(loss_func) 87 | def wrapper(pred, 88 | target, 89 | weight=None, 90 | reduction='mean', 91 | avg_factor=None, 92 | **kwargs): 93 | # get element-wise loss 94 | loss = loss_func(pred, target, **kwargs) 95 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 96 | return loss 97 | 98 | return wrapper 99 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from .fcn_sep_mask_head import FCNSepMaskHead 3 | from .grid_head import GridHead 4 | from .htc_mask_head import HTCMaskHead 5 | from .maskiou_head import MaskIoUHead 6 | from .efficientps_semantic_head import EfficientPSSemanticHead 7 | 8 | __all__ = [ 9 | 'FCNMaskHead', 'FCNSepMaskHead', 'HTCMaskHead', 'GridHead', 10 | 'MaskIoUHead', 'EfficientPSSemanticHead' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | from mmdet.ops import ConvModule 2 | from ..registry import HEADS 3 | from .fcn_mask_head import FCNMaskHead 4 | 5 | 6 | @HEADS.register_module 7 | class HTCMaskHead(FCNMaskHead): 8 | 9 | def __init__(self, with_conv_res=True, *args, **kwargs): 10 | super(HTCMaskHead, self).__init__(*args, **kwargs) 11 | self.with_conv_res = with_conv_res 12 | if self.with_conv_res: 13 | self.conv_res = ConvModule( 14 | self.conv_out_channels, 15 | self.conv_out_channels, 16 | 1, 17 | conv_cfg=self.conv_cfg, 18 | norm_cfg=self.norm_cfg) 19 | 20 | def init_weights(self): 21 | super(HTCMaskHead, self).init_weights() 22 | if self.with_conv_res: 23 | self.conv_res.init_weights() 24 | 25 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 26 | if res_feat is not None: 27 | assert self.with_conv_res 28 | res_feat = self.conv_res(res_feat) 29 | x = x + res_feat 30 | for conv in self.convs: 31 | x = conv(x) 32 | res_feat = x 33 | outs = [] 34 | if return_logits: 35 | x = self.upsample(x) 36 | if self.upsample_method == 'deconv': 37 | x = self.relu(x) 38 | mask_pred = self.conv_logits(x) 39 | outs.append(mask_pred) 40 | if return_feat: 41 | outs.append(res_feat) 42 | return outs if len(outs) > 1 else outs[0] 43 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .two_way_fpn import TWOWAYFPN 2 | 3 | __all__ = ['TWOWAYFPN'] 4 | -------------------------------------------------------------------------------- /mmdet/models/registry.py: -------------------------------------------------------------------------------- 1 | from mmdet.utils import Registry 2 | 3 | BACKBONES = Registry('backbone') 4 | NECKS = Registry('neck') 5 | ROI_EXTRACTORS = Registry('roi_extractor') 6 | SHARED_HEADS = Registry('shared_head') 7 | HEADS = Registry('head') 8 | LOSSES = Registry('loss') 9 | EFFICIENTPS = Registry('detector') 10 | -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | 3 | __all__ = ['SingleRoIExtractor'] 4 | -------------------------------------------------------------------------------- /mmdet/models/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ['ResLayer'] 4 | -------------------------------------------------------------------------------- /mmdet/models/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmcv.cnn import constant_init, kaiming_init 3 | from mmcv.runner import load_checkpoint 4 | 5 | from mmdet.core import auto_fp16 6 | from mmdet.utils import get_root_logger 7 | from ..backbones import ResNet, make_res_layer 8 | from ..registry import SHARED_HEADS 9 | 10 | 11 | @SHARED_HEADS.register_module 12 | class ResLayer(nn.Module): 13 | 14 | def __init__(self, 15 | depth, 16 | stage=3, 17 | stride=2, 18 | dilation=1, 19 | style='pytorch', 20 | norm_cfg=dict(type='BN', requires_grad=True), 21 | norm_eval=True, 22 | with_cp=False, 23 | dcn=None): 24 | super(ResLayer, self).__init__() 25 | self.norm_eval = norm_eval 26 | self.norm_cfg = norm_cfg 27 | self.stage = stage 28 | self.fp16_enabled = False 29 | block, stage_blocks = ResNet.arch_settings[depth] 30 | stage_block = stage_blocks[stage] 31 | planes = 64 * 2**stage 32 | inplanes = 64 * 2**(stage - 1) * block.expansion 33 | 34 | res_layer = make_res_layer( 35 | block, 36 | inplanes, 37 | planes, 38 | stage_block, 39 | stride=stride, 40 | dilation=dilation, 41 | style=style, 42 | with_cp=with_cp, 43 | norm_cfg=self.norm_cfg, 44 | dcn=dcn) 45 | self.add_module('layer{}'.format(stage + 1), res_layer) 46 | 47 | def init_weights(self, pretrained=None): 48 | if isinstance(pretrained, str): 49 | logger = get_root_logger() 50 | load_checkpoint(self, pretrained, strict=False, logger=logger) 51 | elif pretrained is None: 52 | for m in self.modules(): 53 | if isinstance(m, nn.Conv2d): 54 | kaiming_init(m) 55 | elif isinstance(m, nn.BatchNorm2d): 56 | constant_init(m, 1) 57 | else: 58 | raise TypeError('pretrained must be a str or None') 59 | 60 | @auto_fp16() 61 | def forward(self, x): 62 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1)) 63 | out = res_layer(x) 64 | return out 65 | 66 | def train(self, mode=True): 67 | super(ResLayer, self).train(mode) 68 | if self.norm_eval: 69 | for m in self.modules(): 70 | if isinstance(m, nn.BatchNorm2d): 71 | m.eval() 72 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .weight_init import bias_init_with_prob 2 | 3 | __all__ = ['bias_init_with_prob'] 4 | -------------------------------------------------------------------------------- /mmdet/models/utils/weight_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bias_init_with_prob(prior_prob): 5 | """ initialize conv/fc bias value according to giving probablity""" 6 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 7 | return bias_init 8 | -------------------------------------------------------------------------------- /mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .context_block import ContextBlock 2 | from .conv import build_conv_layer 3 | from .conv_module import ConvModule 4 | from .conv_ws import ConvWS2d, conv_ws_2d 5 | from .dcn import (DeformConv, DeformConvPack, DeformRoIPooling, 6 | DeformRoIPoolingPack, ModulatedDeformConv, 7 | ModulatedDeformConvPack, ModulatedDeformRoIPoolingPack, 8 | deform_conv, deform_roi_pooling, modulated_deform_conv) 9 | from .generalized_attention import GeneralizedAttention 10 | from .depthwise_separable_conv_module import DepthwiseSeparableConvModule 11 | from .masked_conv import MaskedConv2d 12 | from .nms import nms, soft_nms 13 | from .non_local import NonLocal2D 14 | from .norm import build_norm_layer 15 | from .roi_align import RoIAlign, roi_align 16 | from .roi_pool import RoIPool, roi_pool 17 | from .scale import Scale 18 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 19 | from .upsample import build_upsample_layer 20 | from .utils import get_compiler_version, get_compiling_cuda_version 21 | 22 | __all__ = [ 23 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 24 | 'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 25 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 26 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 27 | 'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss', 28 | 'MaskedConv2d', 'ContextBlock', 'DepthwiseSeparableConvModule','GeneralizedAttention', 29 | 'NonLocal2D','get_compiler_version', 'get_compiling_cuda_version', 'build_conv_layer', 30 | 'ConvModule', 'ConvWS2d', 'conv_ws_2d', 'build_norm_layer', 'Scale', 31 | 'build_upsample_layer' 32 | ] 33 | -------------------------------------------------------------------------------- /mmdet/ops/activation.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | activation_cfg = { 4 | # layer_abbreviation: module 5 | 'ReLU': nn.ReLU, 6 | 'LeakyReLU': nn.LeakyReLU, 7 | 'PReLU': nn.PReLU, 8 | 'RReLU': nn.RReLU, 9 | 'ReLU6': nn.ReLU6, 10 | 'SELU': nn.SELU, 11 | 'CELU': nn.CELU 12 | } 13 | 14 | 15 | def build_activation_layer(cfg): 16 | """ Build activation layer 17 | 18 | Args: 19 | cfg (dict): cfg should contain: 20 | type (str): Identify activation layer type. 21 | layer args: args needed to instantiate a activation layer. 22 | 23 | Returns: 24 | layer (nn.Module): Created activation layer 25 | """ 26 | assert isinstance(cfg, dict) and 'type' in cfg 27 | cfg_ = cfg.copy() 28 | 29 | layer_type = cfg_.pop('type') 30 | if layer_type not in activation_cfg: 31 | raise KeyError('Unrecognized activation type {}'.format(layer_type)) 32 | else: 33 | activation = activation_cfg[layer_type] 34 | if activation is None: 35 | raise NotImplementedError 36 | 37 | layer = activation(**cfg_) 38 | return layer 39 | -------------------------------------------------------------------------------- /mmdet/ops/affine_grid/__init__.py: -------------------------------------------------------------------------------- 1 | from .affine_grid import affine_grid 2 | 3 | __all__ = ['affine_grid'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/affine_grid/affine_grid.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from . import affine_grid_cuda 7 | 8 | 9 | class _AffineGridGenerator(Function): 10 | 11 | @staticmethod 12 | def forward(ctx, theta, size, align_corners): 13 | 14 | ctx.save_for_backward(theta) 15 | ctx.size = size 16 | ctx.align_corners = align_corners 17 | 18 | func = affine_grid_cuda.affine_grid_generator_forward 19 | 20 | output = func(theta, size, align_corners) 21 | 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | theta = ctx.saved_tensors 28 | size = ctx.size 29 | align_corners = ctx.align_corners 30 | 31 | func = affine_grid_cuda.affine_grid_generator_backward 32 | 33 | grad_input = func(grad_output, theta, size, align_corners) 34 | 35 | return grad_input, None, None 36 | 37 | 38 | def affine_grid(theta, size, align_corners=False): 39 | if torch.__version__ >= '1.3': 40 | return F.affine_grid(theta, size, align_corners) 41 | elif align_corners: 42 | return F.affine_grid(theta, size) 43 | else: 44 | # enforce floating point dtype on theta 45 | if not theta.is_floating_point(): 46 | raise ValueError( 47 | 'Expected theta to have floating point type, but got {}'. 48 | format(theta.dtype)) 49 | # check that shapes and sizes match 50 | if len(size) == 4: 51 | if theta.dim() != 3 or theta.size(-2) != 2 or theta.size(-1) != 3: 52 | raise ValueError( 53 | 'Expected a batch of 2D affine matrices of shape Nx2x3 ' 54 | 'for size {}. Got {}.'.format(size, theta.shape)) 55 | elif len(size) == 5: 56 | if theta.dim() != 3 or theta.size(-2) != 3 or theta.size(-1) != 4: 57 | raise ValueError( 58 | 'Expected a batch of 3D affine matrices of shape Nx3x4 ' 59 | 'for size {}. Got {}.'.format(size, theta.shape)) 60 | else: 61 | raise NotImplementedError( 62 | 'affine_grid only supports 4D and 5D sizes, ' 63 | 'for 2D and 3D affine transforms, respectively. ' 64 | 'Got size {}.'.format(size)) 65 | if min(size) <= 0: 66 | raise ValueError( 67 | 'Expected non-zero, positive output size. Got {}'.format(size)) 68 | return _AffineGridGenerator.apply(theta, size, align_corners) 69 | -------------------------------------------------------------------------------- /mmdet/ops/carafe/__init__.py: -------------------------------------------------------------------------------- 1 | from .carafe import CARAFE, CARAFENaive, CARAFEPack, carafe, carafe_naive 2 | 3 | __all__ = ['carafe', 'carafe_naive', 'CARAFE', 'CARAFENaive', 'CARAFEPack'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/carafe/grad_check.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import mmcv 5 | import torch 6 | from torch.autograd import gradcheck 7 | 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 9 | from mmdet.ops.carafe import CARAFE, CARAFENaive # noqa: E402, isort:skip 10 | from mmdet.ops.carafe import carafe, carafe_naive # noqa: E402, isort:skip 11 | 12 | feat = torch.randn(2, 64, 3, 3, requires_grad=True, device='cuda:0').double() 13 | mask = torch.randn( 14 | 2, 100, 6, 6, requires_grad=True, device='cuda:0').sigmoid().double() 15 | 16 | print('Gradcheck for carafe...') 17 | test = gradcheck(CARAFE(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4) 18 | print(test) 19 | 20 | print('Gradcheck for carafe naive...') 21 | test = gradcheck(CARAFENaive(5, 4, 2), (feat, mask), atol=1e-4, eps=1e-4) 22 | print(test) 23 | 24 | feat = torch.randn( 25 | 2, 1024, 100, 100, requires_grad=True, device='cuda:0').float() 26 | mask = torch.randn( 27 | 2, 25, 200, 200, requires_grad=True, device='cuda:0').sigmoid().float() 28 | loop_num = 500 29 | 30 | time_forward = 0 31 | time_backward = 0 32 | bar = mmcv.ProgressBar(loop_num) 33 | timer = mmcv.Timer() 34 | for i in range(loop_num): 35 | x = carafe(feat.clone(), mask.clone(), 5, 1, 2) 36 | torch.cuda.synchronize() 37 | time_forward += timer.since_last_check() 38 | x.sum().backward(retain_graph=True) 39 | torch.cuda.synchronize() 40 | time_backward += timer.since_last_check() 41 | bar.update() 42 | print('\nCARAFE time forward: {} ms/iter | time backward: {} ms/iter'.format( 43 | (time_forward + 1e-3) * 1e3 / loop_num, 44 | (time_backward + 1e-3) * 1e3 / loop_num)) 45 | 46 | time_naive_forward = 0 47 | time_naive_backward = 0 48 | bar = mmcv.ProgressBar(loop_num) 49 | timer = mmcv.Timer() 50 | for i in range(loop_num): 51 | x = carafe_naive(feat.clone(), mask.clone(), 5, 1, 2) 52 | torch.cuda.synchronize() 53 | time_naive_forward += timer.since_last_check() 54 | x.sum().backward(retain_graph=True) 55 | torch.cuda.synchronize() 56 | time_naive_backward += timer.since_last_check() 57 | bar.update() 58 | print('\nCARAFE naive time forward: {} ms/iter | time backward: {} ms/iter'. 59 | format((time_naive_forward + 1e-3) * 1e3 / loop_num, 60 | (time_naive_backward + 1e-3) * 1e3 / loop_num)) 61 | -------------------------------------------------------------------------------- /mmdet/ops/carafe/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 4 | 5 | NVCC_ARGS = [ 6 | '-D__CUDA_NO_HALF_OPERATORS__', 7 | '-D__CUDA_NO_HALF_CONVERSIONS__', 8 | '-D__CUDA_NO_HALF2_OPERATORS__', 9 | ] 10 | 11 | setup( 12 | name='carafe', 13 | ext_modules=[ 14 | CUDAExtension( 15 | 'carafe_cuda', 16 | ['src/carafe_cuda.cpp', 'src/carafe_cuda_kernel.cu'], 17 | extra_compile_args={ 18 | 'cxx': [], 19 | 'nvcc': NVCC_ARGS 20 | }), 21 | CUDAExtension( 22 | 'carafe_naive_cuda', 23 | ['src/carafe_naive_cuda.cpp', 'src/carafe_naive_cuda_kernel.cu'], 24 | extra_compile_args={ 25 | 'cxx': [], 26 | 'nvcc': NVCC_ARGS 27 | }) 28 | ], 29 | cmdclass={'build_ext': BuildExtension}) 30 | -------------------------------------------------------------------------------- /mmdet/ops/carafe/src/carafe_naive_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | int CARAFENAIVEForwardLaucher(const at::Tensor features, const at::Tensor masks, 8 | const int kernel_size, const int group_size, 9 | const int scale_factor, const int batch_size, 10 | const int channels, const int height, 11 | const int width, at::Tensor output); 12 | 13 | int CARAFENAIVEBackwardLaucher(const at::Tensor top_grad, 14 | const at::Tensor features, 15 | const at::Tensor masks, const int kernel_size, 16 | const int group_size, const int scale_factor, 17 | const int batch_size, const int channels, 18 | const int height, const int width, 19 | at::Tensor bottom_grad, at::Tensor mask_grad); 20 | 21 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 22 | #define CHECK_CONTIGUOUS(x) \ 23 | TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 24 | #define CHECK_INPUT(x) \ 25 | CHECK_CUDA(x); \ 26 | CHECK_CONTIGUOUS(x) 27 | 28 | int carafe_naive_forward_cuda(at::Tensor features, at::Tensor masks, 29 | int kernel_size, int group_size, int scale_factor, 30 | at::Tensor output) { 31 | CHECK_INPUT(features); 32 | CHECK_INPUT(masks); 33 | CHECK_INPUT(output); 34 | at::DeviceGuard guard(features.device()); 35 | 36 | int batch_size = output.size(0); 37 | int num_channels = output.size(1); 38 | int data_height = output.size(2); 39 | int data_width = output.size(3); 40 | 41 | CARAFENAIVEForwardLaucher(features, masks, kernel_size, group_size, 42 | scale_factor, batch_size, num_channels, data_height, 43 | data_width, output); 44 | 45 | return 1; 46 | } 47 | 48 | int carafe_naive_backward_cuda(at::Tensor top_grad, at::Tensor features, 49 | at::Tensor masks, int kernel_size, 50 | int group_size, int scale_factor, 51 | at::Tensor bottom_grad, at::Tensor mask_grad) { 52 | CHECK_INPUT(top_grad); 53 | CHECK_INPUT(features); 54 | CHECK_INPUT(masks); 55 | CHECK_INPUT(bottom_grad); 56 | CHECK_INPUT(mask_grad); 57 | at::DeviceGuard guard(top_grad.device()); 58 | 59 | int batch_size = top_grad.size(0); 60 | int num_channels = top_grad.size(1); 61 | int data_height = top_grad.size(2); 62 | int data_width = top_grad.size(3); 63 | 64 | CARAFENAIVEBackwardLaucher(top_grad, features, masks, kernel_size, group_size, 65 | scale_factor, batch_size, num_channels, 66 | data_height, data_width, bottom_grad, mask_grad); 67 | 68 | return 1; 69 | } 70 | 71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 72 | m.def("forward", &carafe_naive_forward_cuda, "carafe_naive forward (CUDA)"); 73 | m.def("backward", &carafe_naive_backward_cuda, 74 | "carafe_naive backward (CUDA)"); 75 | } 76 | -------------------------------------------------------------------------------- /mmdet/ops/conv.py: -------------------------------------------------------------------------------- 1 | from torch import nn as nn 2 | 3 | from .conv_ws import ConvWS2d, ConvAWS2d 4 | from .dcn import DeformConvPack, ModulatedDeformConvPack 5 | from .saconv import SAConv2d 6 | 7 | conv_cfg = { 8 | 'Conv': nn.Conv2d, 9 | 'ConvWS': ConvWS2d, 10 | 'DCN': DeformConvPack, 11 | 'DCNv2': ModulatedDeformConvPack, 12 | 'ConvAWS': ConvAWS2d, 13 | 'SAC': SAConv2d, 14 | # TODO: octave conv 15 | } 16 | 17 | 18 | def build_conv_layer(cfg, *args, **kwargs): 19 | """ Build convolution layer 20 | 21 | Args: 22 | cfg (None or dict): cfg should contain: 23 | type (str): identify conv layer type. 24 | layer args: args needed to instantiate a conv layer. 25 | 26 | Returns: 27 | layer (nn.Module): created conv layer 28 | """ 29 | if cfg is None: 30 | cfg_ = dict(type='Conv') 31 | else: 32 | assert isinstance(cfg, dict) and 'type' in cfg 33 | cfg_ = cfg.copy() 34 | 35 | layer_type = cfg_.pop('type') 36 | if layer_type not in conv_cfg: 37 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 38 | else: 39 | conv_layer = conv_cfg[layer_type] 40 | 41 | layer = conv_layer(*args, **kwargs, **cfg_) 42 | 43 | return layer 44 | -------------------------------------------------------------------------------- /mmdet/ops/conv_ws.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def conv_ws_2d(input, 7 | weight, 8 | bias=None, 9 | stride=1, 10 | padding=0, 11 | dilation=1, 12 | groups=1, 13 | eps=1e-5): 14 | c_in = weight.size(0) 15 | weight_flat = weight.view(c_in, -1) 16 | mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) 17 | std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) 18 | weight = (weight - mean) / (std + eps) 19 | return F.conv2d(input, weight, bias, stride, padding, dilation, groups) 20 | 21 | 22 | class ConvWS2d(nn.Conv2d): 23 | 24 | def __init__(self, 25 | in_channels, 26 | out_channels, 27 | kernel_size, 28 | stride=1, 29 | padding=0, 30 | dilation=1, 31 | groups=1, 32 | bias=True, 33 | eps=1e-5): 34 | super(ConvWS2d, self).__init__( 35 | in_channels, 36 | out_channels, 37 | kernel_size, 38 | stride=stride, 39 | padding=padding, 40 | dilation=dilation, 41 | groups=groups, 42 | bias=bias) 43 | self.eps = eps 44 | 45 | def forward(self, x): 46 | return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, 47 | self.dilation, self.groups, self.eps) 48 | 49 | 50 | class ConvAWS2d(nn.Conv2d): 51 | 52 | def __init__(self, 53 | in_channels, 54 | out_channels, 55 | kernel_size, 56 | stride=1, 57 | padding=0, 58 | dilation=1, 59 | groups=1, 60 | bias=True): 61 | super().__init__( 62 | in_channels, 63 | out_channels, 64 | kernel_size, 65 | stride=stride, 66 | padding=padding, 67 | dilation=dilation, 68 | groups=groups, 69 | bias=bias) 70 | self.register_buffer('weight_gamma', torch.ones(self.out_channels, 1, 1, 1)) 71 | self.register_buffer('weight_beta', torch.zeros(self.out_channels, 1, 1, 1)) 72 | 73 | def _get_weight(self, weight): 74 | weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2, 75 | keepdim=True).mean(dim=3, keepdim=True) 76 | weight = weight - weight_mean 77 | std = torch.sqrt(weight.view(weight.size(0), -1).var(dim=1) + 1e-5).view(-1, 1, 1, 1) 78 | weight = weight / std 79 | weight = self.weight_gamma * weight + self.weight_beta 80 | return weight 81 | 82 | def forward(self, x): 83 | weight = self._get_weight(self.weight) 84 | return super().conv2d_forward(x, weight) 85 | 86 | def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, 87 | missing_keys, unexpected_keys, error_msgs): 88 | self.weight_gamma.data.fill_(-1) 89 | super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, 90 | missing_keys, unexpected_keys, error_msgs) 91 | if self.weight_gamma.data.mean() > 0: 92 | return 93 | weight = self.weight.data 94 | weight_mean = weight.data.mean(dim=1, keepdim=True).mean(dim=2, 95 | keepdim=True).mean(dim=3, keepdim=True) 96 | self.weight_beta.data.copy_(weight_mean) 97 | std = torch.sqrt(weight.view(weight.size(0), -1).var(dim=1) + 1e-5).view(-1, 1, 1, 1) 98 | self.weight_gamma.data.copy_(std) 99 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, deform_conv, 3 | modulated_deform_conv) 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 5 | ModulatedDeformRoIPoolingPack, deform_roi_pooling) 6 | 7 | __all__ = [ 8 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 9 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 10 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', 11 | 'deform_roi_pooling' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/ops/grid_sampler/__init__.py: -------------------------------------------------------------------------------- 1 | from .grid_sampler import grid_sample 2 | 3 | __all__ = ['grid_sample'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/grid_sampler/src/grid_sampler.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace mmdetection { 5 | 6 | using namespace at; 7 | 8 | // No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. 9 | Tensor grid_sampler_2d_forward_cpu(const Tensor& input, const Tensor& grid, 10 | int64_t interpolation_mode, int64_t padding_mode, 11 | bool align_corners); 12 | 13 | // No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. 14 | Tensor grid_sampler_3d_forward_cpu(const Tensor& input, const Tensor& grid, 15 | int64_t interpolation_mode, int64_t padding_mode, 16 | bool align_corners); 17 | 18 | // No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. 19 | std::tuple 20 | grid_sampler_2d_backward_cpu(const Tensor& grad_output, const Tensor& input, 21 | const Tensor& grid, int64_t interpolation_mode, 22 | int64_t padding_mode, bool align_corners); 23 | 24 | // No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. 25 | std::tuple 26 | grid_sampler_3d_backward_cpu(const Tensor& grad_output, const Tensor& input, 27 | const Tensor& grid, int64_t interpolation_mode, int64_t padding_mode, 28 | bool align_corners); 29 | 30 | // No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. 31 | Tensor grid_sampler_2d_forward_cuda(const Tensor& input, const Tensor& grid, 32 | int64_t interpolation_mode, int64_t padding_mode, 33 | bool align_corners); 34 | 35 | // No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. 36 | Tensor grid_sampler_3d_forward_cuda(const Tensor& input, const Tensor& grid, 37 | int64_t interpolation_mode, int64_t padding_mode, 38 | bool align_corners); 39 | 40 | // No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. 41 | std::tuple 42 | grid_sampler_2d_backward_cuda(const Tensor& grad_output, const Tensor& input, 43 | const Tensor& grid, int64_t interpolation_mode, 44 | int64_t padding_mode, bool align_corners); 45 | 46 | // No shape checking needed here. See # NOTE [ grid_sampler Native Functions ]. 47 | std::tuple 48 | grid_sampler_3d_backward_cuda(const Tensor& grad_output, const Tensor& input, 49 | const Tensor& grid, int64_t interpolation_mode, int64_t padding_mode, 50 | bool align_corners); 51 | 52 | 53 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 54 | 55 | m.def("grid_sampler_2d_forward_cpu", &grid_sampler_2d_forward_cpu, "grid_sampler_2d_forward (CPU)"); 56 | m.def("grid_sampler_2d_backward_cpu", &grid_sampler_2d_backward_cpu, "grid_sampler_2d_backward (CPU)"); 57 | m.def("grid_sampler_3d_forward_cpu", &grid_sampler_3d_forward_cpu, "grid_sampler_3d_forward (CPU)"); 58 | m.def("grid_sampler_3d_backward_cpu", &grid_sampler_3d_backward_cpu, "grid_sampler_3d_backward (CPU)"); 59 | 60 | m.def("grid_sampler_2d_forward_cuda", &grid_sampler_2d_forward_cuda, "grid_sampler_2d_forward (CUDA)"); 61 | m.def("grid_sampler_2d_backward_cuda", &grid_sampler_2d_backward_cuda, "grid_sampler_2d_backward (CUDA)"); 62 | m.def("grid_sampler_3d_forward_cuda", &grid_sampler_3d_forward_cuda, "grid_sampler_3d_forward (CUDA)"); 63 | m.def("grid_sampler_3d_backward_cuda", &grid_sampler_3d_backward_cuda, "grid_sampler_3d_backward (CUDA)"); 64 | } 65 | 66 | } // namespace mmdetection 67 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/__init__.py: -------------------------------------------------------------------------------- 1 | from .masked_conv import MaskedConv2d, masked_conv2d 2 | 3 | __all__ = ['masked_conv2d', 'MaskedConv2d'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/masked_conv.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Function 6 | from torch.autograd.function import once_differentiable 7 | from torch.nn.modules.utils import _pair 8 | 9 | from . import masked_conv2d_cuda 10 | 11 | 12 | class MaskedConv2dFunction(Function): 13 | 14 | @staticmethod 15 | def forward(ctx, features, mask, weight, bias, padding=0, stride=1): 16 | assert mask.dim() == 3 and mask.size(0) == 1 17 | assert features.dim() == 4 and features.size(0) == 1 18 | assert features.size()[2:] == mask.size()[1:] 19 | pad_h, pad_w = _pair(padding) 20 | stride_h, stride_w = _pair(stride) 21 | if stride_h != 1 or stride_w != 1: 22 | raise ValueError( 23 | 'Stride could not only be 1 in masked_conv2d currently.') 24 | if not features.is_cuda: 25 | raise NotImplementedError 26 | 27 | out_channel, in_channel, kernel_h, kernel_w = weight.size() 28 | 29 | batch_size = features.size(0) 30 | out_h = int( 31 | math.floor((features.size(2) + 2 * pad_h - 32 | (kernel_h - 1) - 1) / stride_h + 1)) 33 | out_w = int( 34 | math.floor((features.size(3) + 2 * pad_w - 35 | (kernel_h - 1) - 1) / stride_w + 1)) 36 | mask_inds = torch.nonzero(mask[0] > 0) 37 | output = features.new_zeros(batch_size, out_channel, out_h, out_w) 38 | if mask_inds.numel() > 0: 39 | mask_h_idx = mask_inds[:, 0].contiguous() 40 | mask_w_idx = mask_inds[:, 1].contiguous() 41 | data_col = features.new_zeros(in_channel * kernel_h * kernel_w, 42 | mask_inds.size(0)) 43 | masked_conv2d_cuda.masked_im2col_forward(features, mask_h_idx, 44 | mask_w_idx, kernel_h, 45 | kernel_w, pad_h, pad_w, 46 | data_col) 47 | 48 | masked_output = torch.addmm(1, bias[:, None], 1, 49 | weight.view(out_channel, -1), data_col) 50 | masked_conv2d_cuda.masked_col2im_forward(masked_output, mask_h_idx, 51 | mask_w_idx, out_h, out_w, 52 | out_channel, output) 53 | return output 54 | 55 | @staticmethod 56 | @once_differentiable 57 | def backward(ctx, grad_output): 58 | return (None, ) * 5 59 | 60 | 61 | masked_conv2d = MaskedConv2dFunction.apply 62 | 63 | 64 | class MaskedConv2d(nn.Conv2d): 65 | """A MaskedConv2d which inherits the official Conv2d. 66 | 67 | The masked forward doesn't implement the backward function and only 68 | supports the stride parameter to be 1 currently. 69 | """ 70 | 71 | def __init__(self, 72 | in_channels, 73 | out_channels, 74 | kernel_size, 75 | stride=1, 76 | padding=0, 77 | dilation=1, 78 | groups=1, 79 | bias=True): 80 | super(MaskedConv2d, 81 | self).__init__(in_channels, out_channels, kernel_size, stride, 82 | padding, dilation, groups, bias) 83 | 84 | def forward(self, input, mask=None): 85 | if mask is None: # fallback to the normal Conv2d 86 | return super(MaskedConv2d, self).forward(input) 87 | else: 88 | return masked_conv2d(input, mask, self.weight, self.bias, 89 | self.padding) 90 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int MaskedIm2colForwardLaucher(const at::Tensor im, const int height, 7 | const int width, const int channels, 8 | const int kernel_h, const int kernel_w, 9 | const int pad_h, const int pad_w, 10 | const at::Tensor mask_h_idx, 11 | const at::Tensor mask_w_idx, const int mask_cnt, 12 | at::Tensor col); 13 | 14 | int MaskedCol2imForwardLaucher(const at::Tensor col, const int height, 15 | const int width, const int channels, 16 | const at::Tensor mask_h_idx, 17 | const at::Tensor mask_w_idx, const int mask_cnt, 18 | at::Tensor im); 19 | 20 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx, 28 | const at::Tensor mask_w_idx, const int kernel_h, 29 | const int kernel_w, const int pad_h, 30 | const int pad_w, at::Tensor col) { 31 | CHECK_INPUT(im); 32 | CHECK_INPUT(mask_h_idx); 33 | CHECK_INPUT(mask_w_idx); 34 | CHECK_INPUT(col); 35 | // im: (n, ic, h, w), kernel size (kh, kw) 36 | // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) 37 | at::DeviceGuard guard(im.device()); 38 | 39 | int channels = im.size(1); 40 | int height = im.size(2); 41 | int width = im.size(3); 42 | int mask_cnt = mask_h_idx.size(0); 43 | 44 | MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w, 45 | pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt, 46 | col); 47 | 48 | return 1; 49 | } 50 | 51 | int masked_col2im_forward_cuda(const at::Tensor col, 52 | const at::Tensor mask_h_idx, 53 | const at::Tensor mask_w_idx, int height, 54 | int width, int channels, at::Tensor im) { 55 | CHECK_INPUT(col); 56 | CHECK_INPUT(mask_h_idx); 57 | CHECK_INPUT(mask_w_idx); 58 | CHECK_INPUT(im); 59 | // im: (n, ic, h, w), kernel size (kh, kw) 60 | // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) 61 | at::DeviceGuard guard(col.device()); 62 | 63 | int mask_cnt = mask_h_idx.size(0); 64 | 65 | MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx, 66 | mask_w_idx, mask_cnt, im); 67 | 68 | return 1; 69 | } 70 | 71 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 72 | m.def("masked_im2col_forward", &masked_im2col_forward_cuda, 73 | "masked_im2col forward (CUDA)"); 74 | m.def("masked_col2im_forward", &masked_col2im_forward_cuda, 75 | "masked_col2im forward (CUDA)"); 76 | } 77 | -------------------------------------------------------------------------------- /mmdet/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } 18 | -------------------------------------------------------------------------------- /mmdet/ops/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from inplace_abn import InPlaceABN, InPlaceABNSync 3 | 4 | norm_cfg = { 5 | # format: layer_type: (abbreviation, module) 6 | 'BN': ('bn', nn.BatchNorm2d), 7 | 'SyncBN': ('bn', nn.SyncBatchNorm), 8 | 'GN': ('gn', nn.GroupNorm), 9 | 'InPlaceABN': ('bn',InPlaceABN), 10 | 'InPlaceABNSync': ('bn',InPlaceABNSync), 11 | # and potentially 'SN' 12 | } 13 | 14 | 15 | def build_norm_layer(cfg, num_features, postfix=''): 16 | """ Build normalization layer 17 | 18 | Args: 19 | cfg (dict): cfg should contain: 20 | type (str): identify norm layer type. 21 | layer args: args needed to instantiate a norm layer. 22 | requires_grad (bool): [optional] whether stop gradient updates 23 | num_features (int): number of channels from input. 24 | postfix (int, str): appended into norm abbreviation to 25 | create named layer. 26 | 27 | Returns: 28 | name (str): abbreviation + postfix 29 | layer (nn.Module): created norm layer 30 | """ 31 | assert isinstance(cfg, dict) and 'type' in cfg 32 | cfg_ = cfg.copy() 33 | 34 | layer_type = cfg_.pop('type') 35 | if layer_type not in norm_cfg: 36 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 37 | else: 38 | abbr, norm_layer = norm_cfg[layer_type] 39 | if norm_layer is None: 40 | raise NotImplementedError 41 | 42 | assert isinstance(postfix, (int, str)) 43 | name = abbr + str(postfix) 44 | 45 | requires_grad = cfg_.pop('requires_grad', True) 46 | cfg_.setdefault('eps', 1e-5) 47 | if layer_type != 'GN': 48 | layer = norm_layer(num_features, **cfg_) 49 | if layer_type == 'SyncBN': 50 | layer._specify_ddp_gpu_num(1) 51 | else: 52 | assert 'num_groups' in cfg_ 53 | layer = norm_layer(num_channels=num_features, **cfg_) 54 | 55 | for param in layer.parameters(): 56 | param.requires_grad = requires_grad 57 | 58 | return name, layer 59 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_align import RoIAlign, roi_align 2 | 3 | __all__ = ['roi_align', 'RoIAlign'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import numpy as np 5 | import torch 6 | from torch.autograd import gradcheck 7 | 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 9 | from roi_align import RoIAlign # noqa: E402, isort:skip 10 | 11 | feat_size = 15 12 | spatial_scale = 1.0 / 8 13 | img_size = feat_size / spatial_scale 14 | num_imgs = 2 15 | num_rois = 20 16 | 17 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 18 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 19 | rois[:, 2:] += img_size * 0.5 20 | rois = np.hstack((batch_ind, rois)) 21 | 22 | feat = torch.randn( 23 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 24 | rois = torch.from_numpy(rois).float().cuda() 25 | inputs = (feat, rois) 26 | print('Gradcheck for roi align...') 27 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 28 | print(test) 29 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 30 | print(test) 31 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_pool import RoIPool, roi_pool 2 | 3 | __all__ = ['roi_pool', 'RoIPool'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import torch 5 | from torch.autograd import gradcheck 6 | 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_pool import RoIPool # noqa: E402, isort:skip 9 | 10 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 11 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 12 | [1, 67, 40, 110, 120]]).cuda() 13 | inputs = (feat, rois) 14 | print('Gradcheck for roi pooling...') 15 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 16 | print(test) 17 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from . import roi_pool_cuda 8 | 9 | 10 | class RoIPoolFunction(Function): 11 | 12 | @staticmethod 13 | def forward(ctx, features, rois, out_size, spatial_scale): 14 | assert features.is_cuda 15 | out_h, out_w = _pair(out_size) 16 | assert isinstance(out_h, int) and isinstance(out_w, int) 17 | ctx.save_for_backward(rois) 18 | num_channels = features.size(1) 19 | num_rois = rois.size(0) 20 | out_size = (num_rois, num_channels, out_h, out_w) 21 | output = features.new_zeros(out_size) 22 | argmax = features.new_zeros(out_size, dtype=torch.int) 23 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, 24 | output, argmax) 25 | ctx.spatial_scale = spatial_scale 26 | ctx.feature_size = features.size() 27 | ctx.argmax = argmax 28 | 29 | return output 30 | 31 | @staticmethod 32 | @once_differentiable 33 | def backward(ctx, grad_output): 34 | assert grad_output.is_cuda 35 | spatial_scale = ctx.spatial_scale 36 | feature_size = ctx.feature_size 37 | argmax = ctx.argmax 38 | rois = ctx.saved_tensors[0] 39 | assert feature_size is not None 40 | 41 | grad_input = grad_rois = None 42 | if ctx.needs_input_grad[0]: 43 | grad_input = grad_output.new_zeros(feature_size) 44 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax, 45 | spatial_scale, grad_input) 46 | 47 | return grad_input, grad_rois, None, None 48 | 49 | 50 | roi_pool = RoIPoolFunction.apply 51 | 52 | 53 | class RoIPool(nn.Module): 54 | 55 | def __init__(self, out_size, spatial_scale, use_torchvision=False): 56 | super(RoIPool, self).__init__() 57 | 58 | self.out_size = _pair(out_size) 59 | self.spatial_scale = float(spatial_scale) 60 | self.use_torchvision = use_torchvision 61 | 62 | def forward(self, features, rois): 63 | if self.use_torchvision: 64 | from torchvision.ops import roi_pool as tv_roi_pool 65 | return tv_roi_pool(features, rois, self.out_size, 66 | self.spatial_scale) 67 | else: 68 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 69 | 70 | def __repr__(self): 71 | format_str = self.__class__.__name__ 72 | format_str += '(out_size={}, spatial_scale={}'.format( 73 | self.out_size, self.spatial_scale) 74 | format_str += ', use_torchvision={})'.format(self.use_torchvision) 75 | return format_str 76 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/src/roi_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int channels, 8 | const int height, const int width, const int num_rois, 9 | const int pooled_h, const int pooled_w, 10 | at::Tensor output, at::Tensor argmax); 11 | 12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 13 | const at::Tensor argmax, const float spatial_scale, 14 | const int batch_size, const int channels, 15 | const int height, const int width, 16 | const int num_rois, const int pooled_h, 17 | const int pooled_w, at::Tensor bottom_grad); 18 | 19 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 20 | #define CHECK_CONTIGUOUS(x) \ 21 | TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") 22 | #define CHECK_INPUT(x) \ 23 | CHECK_CUDA(x); \ 24 | CHECK_CONTIGUOUS(x) 25 | 26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, 27 | int pooled_height, int pooled_width, 28 | float spatial_scale, at::Tensor output, 29 | at::Tensor argmax) { 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(argmax); 34 | at::DeviceGuard guard(features.device()); 35 | 36 | // Number of ROIs 37 | int num_rois = rois.size(0); 38 | int size_rois = rois.size(1); 39 | 40 | if (size_rois != 5) { 41 | printf("wrong roi size\n"); 42 | return 0; 43 | } 44 | 45 | int channels = features.size(1); 46 | int height = features.size(2); 47 | int width = features.size(3); 48 | 49 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, 50 | num_rois, pooled_height, pooled_width, output, argmax); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | at::Tensor argmax, float spatial_scale, 57 | at::Tensor bottom_grad) { 58 | CHECK_INPUT(top_grad); 59 | CHECK_INPUT(rois); 60 | CHECK_INPUT(argmax); 61 | CHECK_INPUT(bottom_grad); 62 | at::DeviceGuard guard(top_grad.device()); 63 | 64 | int pooled_height = top_grad.size(2); 65 | int pooled_width = top_grad.size(3); 66 | int num_rois = rois.size(0); 67 | int size_rois = rois.size(1); 68 | 69 | if (size_rois != 5) { 70 | printf("wrong roi size\n"); 71 | return 0; 72 | } 73 | int batch_size = bottom_grad.size(0); 74 | int channels = bottom_grad.size(1); 75 | int height = bottom_grad.size(2); 76 | int width = bottom_grad.size(3); 77 | 78 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, 79 | channels, height, width, num_rois, pooled_height, 80 | pooled_width, bottom_grad); 81 | 82 | return 1; 83 | } 84 | 85 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 86 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); 87 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); 88 | } 89 | -------------------------------------------------------------------------------- /mmdet/ops/roi_sampling/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import roi_sampling, invert_roi_bbx 2 | 3 | 4 | -------------------------------------------------------------------------------- /mmdet/ops/roi_sampling/src/roi_sampling.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include "utils/checks.h" 7 | #include "roi_sampling.h" 8 | 9 | std::tuple roi_sampling_forward( 10 | const at::Tensor& x, const at::Tensor& bbx, const at::Tensor& idx, std::tuple out_size, 11 | Interpolation interpolation, PaddingMode padding, bool valid_mask) { 12 | // Check dimensions 13 | TORCH_CHECK(x.ndimension() == 4, "x must be a 4-dimensional tensor"); 14 | TORCH_CHECK(bbx.ndimension() == 2, "bbx must be a 2-dimensional tensor"); 15 | TORCH_CHECK(idx.ndimension() == 1, "idx must be a 1-dimensional tensor"); 16 | TORCH_CHECK(bbx.size(0) == idx.size(0), "idx and bbx must have the same size in the first dimension"); 17 | TORCH_CHECK(bbx.size(1) == 4, "bbx must be N x 4"); 18 | 19 | // Check types 20 | TORCH_CHECK(bbx.scalar_type() == at::ScalarType::Float, "bbx must have type float32"); 21 | TORCH_CHECK(idx.scalar_type() == at::ScalarType::Long, "idx must have type long"); 22 | 23 | if (x.is_cuda()) { 24 | CHECK_CUDA(bbx); 25 | CHECK_CUDA(idx); 26 | 27 | return roi_sampling_forward_cuda(x, bbx, idx, out_size, interpolation, padding, valid_mask); 28 | } else { 29 | CHECK_CPU(bbx); 30 | CHECK_CPU(idx); 31 | 32 | return roi_sampling_forward_cpu(x, bbx, idx, out_size, interpolation, padding, valid_mask); 33 | } 34 | } 35 | 36 | at::Tensor roi_sampling_backward( 37 | const at::Tensor& dy, const at::Tensor& bbx, const at::Tensor& idx, std::tuple in_size, 38 | Interpolation interpolation, PaddingMode padding) { 39 | // Check dimensions 40 | TORCH_CHECK(dy.ndimension() == 4, "dy must be a 4-dimensional tensor"); 41 | TORCH_CHECK(bbx.ndimension() == 2, "bbx must be a 2-dimensional tensor"); 42 | TORCH_CHECK(idx.ndimension() == 1, "idx must be a 1-dimensional tensor"); 43 | TORCH_CHECK(bbx.size(0) == idx.size(0), "idx and bbx must have the same size in the first dimension"); 44 | TORCH_CHECK(bbx.size(1) == 4, "bbx must be N x 4"); 45 | 46 | // Check types 47 | TORCH_CHECK(bbx.scalar_type() == at::ScalarType::Float, "bbx must have type float32"); 48 | TORCH_CHECK(idx.scalar_type() == at::ScalarType::Long, "idx must have type long"); 49 | 50 | if (dy.is_cuda()) { 51 | CHECK_CUDA(bbx); 52 | CHECK_CUDA(idx); 53 | 54 | return roi_sampling_backward_cuda(dy, bbx, idx, in_size, interpolation, padding); 55 | } else { 56 | CHECK_CPU(bbx); 57 | CHECK_CPU(idx); 58 | 59 | return roi_sampling_backward_cpu(dy, bbx, idx, in_size, interpolation, padding); 60 | } 61 | } 62 | 63 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 64 | pybind11::enum_(m, "PaddingMode") 65 | .value("Zero", PaddingMode::Zero) 66 | .value("Border", PaddingMode::Border); 67 | 68 | pybind11::enum_(m, "Interpolation") 69 | .value("Bilinear", Interpolation::Bilinear) 70 | .value("Nearest", Interpolation::Nearest); 71 | 72 | m.def("roi_sampling_forward", &roi_sampling_forward, "ROI sampling forward"); 73 | m.def("roi_sampling_backward", &roi_sampling_backward, "ROI sampling backward"); 74 | } 75 | -------------------------------------------------------------------------------- /mmdet/ops/roi_sampling/src/utils/checks.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | // Define AT_CHECK for old version of ATen where the same function was called AT_ASSERT 7 | #ifndef AT_CHECK 8 | #define AT_CHECK AT_ASSERT 9 | #endif 10 | 11 | #define CHECK_CUDA(x) TORCH_CHECK((x).type().is_cuda(), #x " must be a CUDA tensor") 12 | #define CHECK_CPU(x) TORCH_CHECK(!(x).type().is_cuda(), #x " must be a CPU tensor") 13 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK((x).is_contiguous(), #x " must be contiguous") 14 | 15 | #define CHECK_CUDA_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 16 | #define CHECK_CPU_INPUT(x) CHECK_CPU(x); CHECK_CONTIGUOUS(x) 17 | -------------------------------------------------------------------------------- /mmdet/ops/roi_sampling/src/utils/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | /* 7 | * Functions to share code between CPU and GPU 8 | */ 9 | 10 | #ifdef __CUDACC__ 11 | // CUDA versions 12 | 13 | #define HOST_DEVICE __host__ __device__ 14 | #define INLINE_HOST_DEVICE __host__ __device__ inline 15 | #define FLOOR(x) floor(x) 16 | 17 | #if __CUDA_ARCH__ >= 600 18 | // Recent compute capabilities have both grid-level and block-level atomicAdd for all data types, so we use those 19 | #define ACCUM_BLOCK(x,y) atomicAdd_block(&(x),(y)) 20 | #define ACCUM(x, y) atomicAdd(&(x),(y)) 21 | #else 22 | // Older architectures don't have block-level atomicAdd, nor atomicAdd for doubles, so we defer to atomicAdd for float 23 | // and use the known atomicCAS-based implementation for double 24 | template 25 | __device__ inline data_t atomic_add(data_t *address, data_t val) { 26 | return atomicAdd(address, val); 27 | } 28 | 29 | template<> 30 | __device__ inline double atomic_add(double *address, double val) { 31 | unsigned long long int* address_as_ull = (unsigned long long int*)address; 32 | unsigned long long int old = *address_as_ull, assumed; 33 | do { 34 | assumed = old; 35 | old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); 36 | } while (assumed != old); 37 | return __longlong_as_double(old); 38 | } 39 | 40 | #define ACCUM_BLOCK(x,y) atomic_add(&(x),(y)) 41 | #define ACCUM(x,y) atomic_add(&(x),(y)) 42 | #endif // #if __CUDA_ARCH__ >= 600 43 | 44 | #else 45 | // CPU versions 46 | 47 | #define HOST_DEVICE 48 | #define INLINE_HOST_DEVICE inline 49 | #define FLOOR(x) std::floor(x) 50 | #define ACCUM_BLOCK(x,y) (x) += (y) 51 | #define ACCUM(x,y) (x) += (y) 52 | 53 | #endif // #ifdef __CUDACC__ 54 | 55 | /* 56 | * Other utility functions 57 | */ 58 | template 59 | INLINE_HOST_DEVICE void ind2sub(T i, T *sizes, T &i_n) { 60 | static_assert(dim == 1, "dim must be 1"); 61 | i_n = i % sizes[0]; 62 | } 63 | 64 | template 65 | INLINE_HOST_DEVICE void ind2sub(T i, T *sizes, T &i_n, Indices&...args) { 66 | static_assert(dim == sizeof...(args) + 1, "dim must equal the number of args"); 67 | i_n = i % sizes[dim - 1]; 68 | ind2sub(i / sizes[dim - 1], sizes, args...); 69 | } 70 | 71 | template inline T div_up(T x, T y) { 72 | static_assert(std::is_integral::value, "div_up is only defined for integral types"); 73 | return x / y + (x % y > 0); 74 | } -------------------------------------------------------------------------------- /mmdet/ops/roi_sampling/src/utils/cuda.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | /* 4 | * General settings and functions 5 | */ 6 | const int WARP_SIZE = 32; 7 | const int MAX_BLOCK_SIZE = 1024; 8 | 9 | static int getNumThreads(int nElem) { 10 | int threadSizes[6] = {32, 64, 128, 256, 512, MAX_BLOCK_SIZE}; 11 | for (int i = 0; i < 6; ++i) { 12 | if (nElem <= threadSizes[i]) { 13 | return threadSizes[i]; 14 | } 15 | } 16 | return MAX_BLOCK_SIZE; 17 | } -------------------------------------------------------------------------------- /mmdet/ops/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Scale(nn.Module): 6 | """ 7 | A learnable scale parameter 8 | """ 9 | 10 | def __init__(self, scale=1.0): 11 | super(Scale, self).__init__() 12 | self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) 13 | 14 | def forward(self, x): 15 | return x * self.scale 16 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 2 | 3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from . import sigmoid_focal_loss_cuda 6 | 7 | 8 | class SigmoidFocalLossFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx, input, target, gamma=2.0, alpha=0.25): 12 | ctx.save_for_backward(input, target) 13 | num_classes = input.shape[1] 14 | ctx.num_classes = num_classes 15 | ctx.gamma = gamma 16 | ctx.alpha = alpha 17 | 18 | loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes, 19 | gamma, alpha) 20 | return loss 21 | 22 | @staticmethod 23 | @once_differentiable 24 | def backward(ctx, d_loss): 25 | input, target = ctx.saved_tensors 26 | num_classes = ctx.num_classes 27 | gamma = ctx.gamma 28 | alpha = ctx.alpha 29 | d_loss = d_loss.contiguous() 30 | d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss, 31 | num_classes, gamma, alpha) 32 | return d_input, None, None, None, None 33 | 34 | 35 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply 36 | 37 | 38 | # TODO: remove this module 39 | class SigmoidFocalLoss(nn.Module): 40 | 41 | def __init__(self, gamma, alpha): 42 | super(SigmoidFocalLoss, self).__init__() 43 | self.gamma = gamma 44 | self.alpha = alpha 45 | 46 | def forward(self, logits, targets): 47 | assert logits.is_cuda 48 | loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha) 49 | return loss.sum() 50 | 51 | def __repr__(self): 52 | tmpstr = self.__class__.__name__ + '(gamma={}, alpha={})'.format( 53 | self.gamma, self.alpha) 54 | return tmpstr 55 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h 3 | #include 4 | 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, 6 | const at::Tensor &targets, 7 | const int num_classes, 8 | const float gamma, const float alpha); 9 | 10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, 11 | const at::Tensor &targets, 12 | const at::Tensor &d_losses, 13 | const int num_classes, 14 | const float gamma, const float alpha); 15 | 16 | // Interface for Python 17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits, 18 | const at::Tensor &targets, 19 | const int num_classes, const float gamma, 20 | const float alpha) { 21 | if (logits.type().is_cuda()) { 22 | at::DeviceGuard guard(logits.device()); 23 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, 24 | alpha); 25 | } 26 | AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); 27 | } 28 | 29 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits, 30 | const at::Tensor &targets, 31 | const at::Tensor &d_losses, 32 | const int num_classes, const float gamma, 33 | const float alpha) { 34 | if (logits.type().is_cuda()) { 35 | at::DeviceGuard guard(logits.device()); 36 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, 37 | num_classes, gamma, alpha); 38 | } 39 | AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); 40 | } 41 | 42 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 43 | m.def("forward", &SigmoidFocalLoss_forward, 44 | "SigmoidFocalLoss forward (CUDA)"); 45 | m.def("backward", &SigmoidFocalLoss_backward, 46 | "SigmoidFocalLoss backward (CUDA)"); 47 | } 48 | -------------------------------------------------------------------------------- /mmdet/ops/upsample.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from mmcv.cnn import xavier_init 4 | 5 | from .carafe import CARAFEPack 6 | 7 | 8 | class PixelShufflePack(nn.Module): 9 | """ Pixel Shuffle upsample layer 10 | 11 | Args: 12 | in_channels (int): Number of input channels 13 | out_channels (int): Number of output channels 14 | scale_factor (int): Upsample ratio 15 | upsample_kernel (int): Kernel size of Conv layer to expand the channels 16 | 17 | Returns: 18 | upsampled feature map 19 | """ 20 | 21 | def __init__(self, in_channels, out_channels, scale_factor, 22 | upsample_kernel): 23 | super(PixelShufflePack, self).__init__() 24 | self.in_channels = in_channels 25 | self.out_channels = out_channels 26 | self.scale_factor = scale_factor 27 | self.upsample_kernel = upsample_kernel 28 | self.upsample_conv = nn.Conv2d( 29 | self.in_channels, 30 | self.out_channels * scale_factor * scale_factor, 31 | self.upsample_kernel, 32 | padding=(self.upsample_kernel - 1) // 2) 33 | self.init_weights() 34 | 35 | def init_weights(self): 36 | xavier_init(self.upsample_conv, distribution='uniform') 37 | 38 | def forward(self, x): 39 | x = self.upsample_conv(x) 40 | x = F.pixel_shuffle(x, self.scale_factor) 41 | return x 42 | 43 | 44 | upsample_cfg = { 45 | # layer_abbreviation: module 46 | 'nearest': nn.Upsample, 47 | 'bilinear': nn.Upsample, 48 | 'deconv': nn.ConvTranspose2d, 49 | 'pixel_shuffle': PixelShufflePack, 50 | 'carafe': CARAFEPack 51 | } 52 | 53 | 54 | def build_upsample_layer(cfg): 55 | """ Build upsample layer 56 | 57 | Args: 58 | cfg (dict): cfg should contain: 59 | type (str): Identify upsample layer type. 60 | upsample ratio (int): Upsample ratio 61 | layer args: args needed to instantiate a upsample layer. 62 | 63 | Returns: 64 | layer (nn.Module): Created upsample layer 65 | """ 66 | assert isinstance(cfg, dict) and 'type' in cfg 67 | cfg_ = cfg.copy() 68 | 69 | layer_type = cfg_.pop('type') 70 | if layer_type not in upsample_cfg: 71 | raise KeyError('Unrecognized upsample type {}'.format(layer_type)) 72 | else: 73 | upsample = upsample_cfg[layer_type] 74 | if upsample is None: 75 | raise NotImplementedError 76 | 77 | layer = upsample(**cfg_) 78 | return layer 79 | -------------------------------------------------------------------------------- /mmdet/ops/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # from . import compiling_info 2 | from .compiling_info import get_compiler_version, get_compiling_cuda_version 3 | 4 | # get_compiler_version = compiling_info.get_compiler_version 5 | # get_compiling_cuda_version = compiling_info.get_compiling_cuda_version 6 | 7 | __all__ = ['get_compiler_version', 'get_compiling_cuda_version'] 8 | -------------------------------------------------------------------------------- /mmdet/ops/utils/src/compiling_info.cpp: -------------------------------------------------------------------------------- 1 | // modified from 2 | // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp 3 | #include 4 | #include 5 | 6 | #ifdef WITH_CUDA 7 | int get_cudart_version() { return CUDART_VERSION; } 8 | #endif 9 | 10 | std::string get_compiling_cuda_version() { 11 | #ifdef WITH_CUDA 12 | std::ostringstream oss; 13 | 14 | // copied from 15 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 16 | auto printCudaStyleVersion = [&](int v) { 17 | oss << (v / 1000) << "." << (v / 10 % 100); 18 | if (v % 10 != 0) { 19 | oss << "." << (v % 10); 20 | } 21 | }; 22 | printCudaStyleVersion(get_cudart_version()); 23 | return oss.str(); 24 | #else 25 | return std::string("not available"); 26 | #endif 27 | } 28 | 29 | // similar to 30 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 31 | std::string get_compiler_version() { 32 | std::ostringstream ss; 33 | #if defined(__GNUC__) 34 | #ifndef __clang__ 35 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 36 | #endif 37 | #endif 38 | 39 | #if defined(__clang_major__) 40 | { 41 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 42 | << __clang_patchlevel__; 43 | } 44 | #endif 45 | 46 | #if defined(_MSC_VER) 47 | { ss << "MSVC " << _MSC_FULL_VER; } 48 | #endif 49 | return ss.str(); 50 | } 51 | 52 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 53 | m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); 54 | m.def("get_compiling_cuda_version", &get_compiling_cuda_version, 55 | "get_compiling_cuda_version"); 56 | } 57 | -------------------------------------------------------------------------------- /mmdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .collect_env import collect_env 2 | from .flops_counter import get_model_complexity_info 3 | from .logger import get_root_logger, print_log 4 | from .registry import Registry, build_from_cfg 5 | 6 | __all__ = [ 7 | 'Registry', 'build_from_cfg', 'get_model_complexity_info', 8 | 'get_root_logger', 'print_log', 'collect_env' 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import subprocess 3 | import sys 4 | from collections import defaultdict 5 | 6 | import cv2 7 | import mmcv 8 | import torch 9 | import torchvision 10 | 11 | import mmdet 12 | 13 | 14 | def collect_env(): 15 | env_info = {} 16 | env_info['sys.platform'] = sys.platform 17 | env_info['Python'] = sys.version.replace('\n', '') 18 | 19 | cuda_available = torch.cuda.is_available() 20 | env_info['CUDA available'] = cuda_available 21 | 22 | if cuda_available: 23 | from torch.utils.cpp_extension import CUDA_HOME 24 | env_info['CUDA_HOME'] = CUDA_HOME 25 | 26 | if CUDA_HOME is not None and osp.isdir(CUDA_HOME): 27 | try: 28 | nvcc = osp.join(CUDA_HOME, 'bin/nvcc') 29 | nvcc = subprocess.check_output( 30 | '"{}" -V | tail -n1'.format(nvcc), shell=True) 31 | nvcc = nvcc.decode('utf-8').strip() 32 | except subprocess.SubprocessError: 33 | nvcc = 'Not Available' 34 | env_info['NVCC'] = nvcc 35 | 36 | devices = defaultdict(list) 37 | for k in range(torch.cuda.device_count()): 38 | devices[torch.cuda.get_device_name(k)].append(str(k)) 39 | for name, devids in devices.items(): 40 | env_info['GPU ' + ','.join(devids)] = name 41 | 42 | gcc = subprocess.check_output('gcc --version | head -n1', shell=True) 43 | gcc = gcc.decode('utf-8').strip() 44 | env_info['GCC'] = gcc 45 | 46 | env_info['PyTorch'] = torch.__version__ 47 | env_info['PyTorch compiling details'] = torch.__config__.show() 48 | 49 | env_info['TorchVision'] = torchvision.__version__ 50 | 51 | env_info['OpenCV'] = cv2.__version__ 52 | 53 | env_info['MMCV'] = mmcv.__version__ 54 | env_info['MMDetection'] = mmdet.__version__ 55 | from mmdet.ops import get_compiler_version, get_compiling_cuda_version 56 | env_info['MMDetection Compiler'] = get_compiler_version() 57 | env_info['MMDetection CUDA Compiler'] = get_compiling_cuda_version() 58 | return env_info 59 | 60 | 61 | if __name__ == '__main__': 62 | for name, val in collect_env().items(): 63 | print('{}: {}'.format(name, val)) 64 | -------------------------------------------------------------------------------- /mmdet/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from mmcv.runner import get_dist_info 4 | 5 | 6 | def get_root_logger(log_file=None, log_level=logging.INFO): 7 | """Get the root logger. 8 | 9 | The logger will be initialized if it has not been initialized. By default a 10 | StreamHandler will be added. If `log_file` is specified, a FileHandler will 11 | also be added. The name of the root logger is the top-level package name, 12 | e.g., "mmdet". 13 | 14 | Args: 15 | log_file (str | None): The log filename. If specified, a FileHandler 16 | will be added to the root logger. 17 | log_level (int): The root logger level. Note that only the process of 18 | rank 0 is affected, while other processes will set the level to 19 | "Error" and be silent most of the time. 20 | 21 | Returns: 22 | logging.Logger: The root logger. 23 | """ 24 | logger = logging.getLogger(__name__.split('.')[0]) # i.e., mmdet 25 | # if the logger has been initialized, just return it 26 | if logger.hasHandlers(): 27 | return logger 28 | 29 | format_str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' 30 | logging.basicConfig(format=format_str, level=log_level) 31 | rank, _ = get_dist_info() 32 | if rank != 0: 33 | logger.setLevel('ERROR') 34 | elif log_file is not None: 35 | file_handler = logging.FileHandler(log_file, 'w') 36 | file_handler.setFormatter(logging.Formatter(format_str)) 37 | file_handler.setLevel(log_level) 38 | logger.addHandler(file_handler) 39 | 40 | return logger 41 | 42 | 43 | def print_log(msg, logger=None, level=logging.INFO): 44 | """Print a log message. 45 | 46 | Args: 47 | msg (str): The message to be logged. 48 | logger (logging.Logger | str | None): The logger to be used. Some 49 | special loggers are: 50 | - "root": the root logger obtained with `get_root_logger()`. 51 | - "silent": no message will be printed. 52 | - None: The `print()` method will be used to print log messages. 53 | level (int): Logging level. Only available when `logger` is a Logger 54 | object or "root". 55 | """ 56 | if logger is None: 57 | print(msg) 58 | elif logger == 'root': 59 | _logger = get_root_logger() 60 | _logger.log(level, msg) 61 | elif isinstance(logger, logging.Logger): 62 | logger.log(level, msg) 63 | elif logger != 'silent': 64 | raise TypeError( 65 | 'logger should be either a logging.Logger object, "root", ' 66 | '"silent" or None, but got {}'.format(logger)) 67 | -------------------------------------------------------------------------------- /mmdet/utils/profiling.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import sys 3 | import time 4 | 5 | import torch 6 | 7 | if sys.version_info >= (3, 7): 8 | 9 | @contextlib.contextmanager 10 | def profile_time(trace_name, 11 | name, 12 | enabled=True, 13 | stream=None, 14 | end_stream=None): 15 | """Print time spent by CPU and GPU. 16 | 17 | Useful as a temporary context manager to find sweet spots of 18 | code suitable for async implementation. 19 | 20 | """ 21 | if (not enabled) or not torch.cuda.is_available(): 22 | yield 23 | return 24 | stream = stream if stream else torch.cuda.current_stream() 25 | end_stream = end_stream if end_stream else stream 26 | start = torch.cuda.Event(enable_timing=True) 27 | end = torch.cuda.Event(enable_timing=True) 28 | stream.record_event(start) 29 | try: 30 | cpu_start = time.monotonic() 31 | yield 32 | finally: 33 | cpu_end = time.monotonic() 34 | end_stream.record_event(end) 35 | end.synchronize() 36 | cpu_time = (cpu_end - cpu_start) * 1000 37 | gpu_time = start.elapsed_time(end) 38 | msg = '{} {} cpu_time {:.2f} ms '.format(trace_name, name, 39 | cpu_time) 40 | msg += 'gpu_time {:.2f} ms stream {}'.format(gpu_time, stream) 41 | print(msg, end_stream) 42 | -------------------------------------------------------------------------------- /mmdet/utils/registry.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from functools import partial 3 | 4 | import mmcv 5 | 6 | 7 | class Registry(object): 8 | 9 | def __init__(self, name): 10 | self._name = name 11 | self._module_dict = dict() 12 | 13 | def __repr__(self): 14 | format_str = self.__class__.__name__ + '(name={}, items={})'.format( 15 | self._name, list(self._module_dict.keys())) 16 | return format_str 17 | 18 | @property 19 | def name(self): 20 | return self._name 21 | 22 | @property 23 | def module_dict(self): 24 | return self._module_dict 25 | 26 | def get(self, key): 27 | return self._module_dict.get(key, None) 28 | 29 | def _register_module(self, module_class, force=False): 30 | """Register a module. 31 | 32 | Args: 33 | module (:obj:`nn.Module`): Module to be registered. 34 | """ 35 | if not inspect.isclass(module_class): 36 | raise TypeError('module must be a class, but got {}'.format( 37 | type(module_class))) 38 | module_name = module_class.__name__ 39 | if not force and module_name in self._module_dict: 40 | raise KeyError('{} is already registered in {}'.format( 41 | module_name, self.name)) 42 | self._module_dict[module_name] = module_class 43 | 44 | def register_module(self, cls=None, force=False): 45 | if cls is None: 46 | return partial(self.register_module, force=force) 47 | self._register_module(cls, force=force) 48 | return cls 49 | 50 | 51 | def build_from_cfg(cfg, registry, default_args=None): 52 | """Build a module from config dict. 53 | 54 | Args: 55 | cfg (dict): Config dict. It should at least contain the key "type". 56 | registry (:obj:`Registry`): The registry to search the type from. 57 | default_args (dict, optional): Default initialization arguments. 58 | 59 | Returns: 60 | obj: The constructed object. 61 | """ 62 | assert isinstance(cfg, dict) and 'type' in cfg 63 | assert isinstance(default_args, dict) or default_args is None 64 | args = cfg.copy() 65 | obj_type = args.pop('type') 66 | if mmcv.is_str(obj_type): 67 | obj_cls = registry.get(obj_type) 68 | if obj_cls is None: 69 | raise KeyError('{} is not in the {} registry'.format( 70 | obj_type, registry.name)) 71 | elif inspect.isclass(obj_type): 72 | obj_cls = obj_type 73 | else: 74 | raise TypeError('type must be a str or valid type, but got {}'.format( 75 | type(obj_type))) 76 | if default_args is not None: 77 | for name, value in default_args.items(): 78 | args.setdefault(name, value) 79 | return obj_cls(**args) 80 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --xdoctest --xdoctest-style=auto 3 | norecursedirs = .git ignore build __pycache__ data docker docs .eggs 4 | 5 | filterwarnings= default 6 | ignore:.*No cfgstr given in Cacher constructor or call.*:Warning 7 | ignore:.*Define the __nice__ method for.*:Warning 8 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pycocotools 2 | #git+git://github.com/waspinator/pycococreator.git@0.2.0 3 | git+https://github.com/mapillary/inplace_abn.git 4 | -------------------------------------------------------------------------------- /tests/async_benchmark.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | import shutil 4 | import urllib 5 | 6 | import mmcv 7 | import torch 8 | 9 | from mmdet.apis import (async_inference_detector, inference_detector, 10 | init_detector, show_result) 11 | from mmdet.utils.contextmanagers import concurrent 12 | from mmdet.utils.profiling import profile_time 13 | 14 | 15 | async def main(): 16 | """ 17 | 18 | Benchmark between async and synchronous inference interfaces. 19 | 20 | Sample runs for 20 demo images on K80 GPU, model - mask_rcnn_r50_fpn_1x: 21 | 22 | async sync 23 | 24 | 7981.79 ms 9660.82 ms 25 | 8074.52 ms 9660.94 ms 26 | 7976.44 ms 9406.83 ms 27 | 28 | Async variant takes about 0.83-0.85 of the time of the synchronous 29 | interface. 30 | 31 | """ 32 | project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) 33 | 34 | config_file = os.path.join(project_dir, 'configs/mask_rcnn_r50_fpn_1x.py') 35 | checkpoint_file = os.path.join( 36 | project_dir, 'checkpoints/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth') 37 | 38 | if not os.path.exists(checkpoint_file): 39 | url = ('https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection' 40 | '/models/mask_rcnn_r50_fpn_1x_20181010-069fa190.pth') 41 | print('Downloading {} ...'.format(url)) 42 | local_filename, _ = urllib.request.urlretrieve(url) 43 | os.makedirs(os.path.dirname(checkpoint_file), exist_ok=True) 44 | shutil.move(local_filename, checkpoint_file) 45 | print('Saved as {}'.format(checkpoint_file)) 46 | else: 47 | print('Using existing checkpoint {}'.format(checkpoint_file)) 48 | 49 | device = 'cuda:0' 50 | model = init_detector( 51 | config_file, checkpoint=checkpoint_file, device=device) 52 | 53 | # queue is used for concurrent inference of multiple images 54 | streamqueue = asyncio.Queue() 55 | # queue size defines concurrency level 56 | streamqueue_size = 4 57 | 58 | for _ in range(streamqueue_size): 59 | streamqueue.put_nowait(torch.cuda.Stream(device=device)) 60 | 61 | # test a single image and show the results 62 | img = mmcv.imread(os.path.join(project_dir, 'demo/demo.jpg')) 63 | 64 | # warmup 65 | await async_inference_detector(model, img) 66 | 67 | async def detect(img): 68 | async with concurrent(streamqueue): 69 | return await async_inference_detector(model, img) 70 | 71 | num_of_images = 20 72 | with profile_time('benchmark', 'async'): 73 | tasks = [ 74 | asyncio.create_task(detect(img)) for _ in range(num_of_images) 75 | ] 76 | async_results = await asyncio.gather(*tasks) 77 | 78 | with torch.cuda.stream(torch.cuda.default_stream()): 79 | with profile_time('benchmark', 'sync'): 80 | sync_results = [ 81 | inference_detector(model, img) for _ in range(num_of_images) 82 | ] 83 | 84 | result_dir = os.path.join(project_dir, 'demo') 85 | show_result( 86 | img, 87 | async_results[0], 88 | model.CLASSES, 89 | score_thr=0.5, 90 | show=False, 91 | out_file=os.path.join(result_dir, 'result_async.jpg')) 92 | show_result( 93 | img, 94 | sync_results[0], 95 | model.CLASSES, 96 | score_thr=0.5, 97 | show=False, 98 | out_file=os.path.join(result_dir, 'result_sync.jpg')) 99 | 100 | 101 | if __name__ == '__main__': 102 | asyncio.run(main()) 103 | -------------------------------------------------------------------------------- /tests/test_async.py: -------------------------------------------------------------------------------- 1 | """Tests for async interface.""" 2 | 3 | import asyncio 4 | import os 5 | import sys 6 | 7 | import asynctest 8 | import mmcv 9 | import torch 10 | 11 | from mmdet.apis import async_inference_detector, init_detector 12 | 13 | if sys.version_info >= (3, 7): 14 | from mmdet.utils.contextmanagers import concurrent 15 | 16 | 17 | class AsyncTestCase(asynctest.TestCase): 18 | use_default_loop = False 19 | forbid_get_event_loop = True 20 | 21 | TEST_TIMEOUT = int(os.getenv('ASYNCIO_TEST_TIMEOUT', '30')) 22 | 23 | def _run_test_method(self, method): 24 | result = method() 25 | if asyncio.iscoroutine(result): 26 | self.loop.run_until_complete( 27 | asyncio.wait_for(result, timeout=self.TEST_TIMEOUT)) 28 | 29 | 30 | class MaskRCNNDetector: 31 | 32 | def __init__(self, 33 | model_config, 34 | checkpoint=None, 35 | streamqueue_size=3, 36 | device='cuda:0'): 37 | 38 | self.streamqueue_size = streamqueue_size 39 | self.device = device 40 | # build the model and load checkpoint 41 | self.model = init_detector( 42 | model_config, checkpoint=None, device=self.device) 43 | self.streamqueue = None 44 | 45 | async def init(self): 46 | self.streamqueue = asyncio.Queue() 47 | for _ in range(self.streamqueue_size): 48 | stream = torch.cuda.Stream(device=self.device) 49 | self.streamqueue.put_nowait(stream) 50 | 51 | if sys.version_info >= (3, 7): 52 | 53 | async def apredict(self, img): 54 | if isinstance(img, str): 55 | img = mmcv.imread(img) 56 | async with concurrent(self.streamqueue): 57 | result = await async_inference_detector(self.model, img) 58 | return result 59 | 60 | 61 | class AsyncInferenceTestCase(AsyncTestCase): 62 | 63 | if sys.version_info >= (3, 7): 64 | 65 | async def test_simple_inference(self): 66 | if not torch.cuda.is_available(): 67 | import pytest 68 | 69 | pytest.skip('test requires GPU and torch+cuda') 70 | 71 | root_dir = os.path.dirname(os.path.dirname(__name__)) 72 | model_config = os.path.join(root_dir, 73 | 'configs/mask_rcnn_r50_fpn_1x.py') 74 | detector = MaskRCNNDetector(model_config) 75 | await detector.init() 76 | img_path = os.path.join(root_dir, 'demo/demo.jpg') 77 | bboxes, _ = await detector.apredict(img_path) 78 | self.assertTrue(bboxes) 79 | -------------------------------------------------------------------------------- /tests/test_nms.py: -------------------------------------------------------------------------------- 1 | """ 2 | CommandLine: 3 | pytest tests/test_nms.py 4 | """ 5 | import numpy as np 6 | import torch 7 | 8 | from mmdet.ops.nms.nms_wrapper import nms 9 | 10 | 11 | def test_nms_device_and_dtypes_cpu(): 12 | """ 13 | CommandLine: 14 | xdoctest -m tests/test_nms.py test_nms_device_and_dtypes_cpu 15 | """ 16 | iou_thr = 0.7 17 | base_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9], 18 | [49.3, 32.9, 51.0, 35.3, 0.9], 19 | [35.3, 11.5, 39.9, 14.5, 0.4], 20 | [35.2, 11.7, 39.7, 15.7, 0.3]]) 21 | 22 | # CPU can handle float32 and float64 23 | dets = base_dets.astype(np.float32) 24 | supressed, inds = nms(dets, iou_thr) 25 | assert dets.dtype == supressed.dtype 26 | assert len(inds) == len(supressed) == 3 27 | 28 | dets = torch.FloatTensor(base_dets) 29 | surpressed, inds = nms(dets, iou_thr) 30 | assert dets.dtype == surpressed.dtype 31 | assert len(inds) == len(surpressed) == 3 32 | 33 | dets = base_dets.astype(np.float64) 34 | supressed, inds = nms(dets, iou_thr) 35 | assert dets.dtype == supressed.dtype 36 | assert len(inds) == len(supressed) == 3 37 | 38 | dets = torch.DoubleTensor(base_dets) 39 | surpressed, inds = nms(dets, iou_thr) 40 | assert dets.dtype == surpressed.dtype 41 | assert len(inds) == len(surpressed) == 3 42 | 43 | 44 | def test_nms_device_and_dtypes_gpu(): 45 | """ 46 | CommandLine: 47 | xdoctest -m tests/test_nms.py test_nms_device_and_dtypes_gpu 48 | """ 49 | if not torch.cuda.is_available(): 50 | import pytest 51 | pytest.skip('test requires GPU and torch+cuda') 52 | 53 | iou_thr = 0.7 54 | base_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9], 55 | [49.3, 32.9, 51.0, 35.3, 0.9], 56 | [35.3, 11.5, 39.9, 14.5, 0.4], 57 | [35.2, 11.7, 39.7, 15.7, 0.3]]) 58 | 59 | for device_id in range(torch.cuda.device_count()): 60 | print('Run NMS on device_id = {!r}'.format(device_id)) 61 | # GPU can handle float32 but not float64 62 | dets = base_dets.astype(np.float32) 63 | supressed, inds = nms(dets, iou_thr, device_id) 64 | assert dets.dtype == supressed.dtype 65 | assert len(inds) == len(supressed) == 3 66 | 67 | dets = torch.FloatTensor(base_dets).to(device_id) 68 | surpressed, inds = nms(dets, iou_thr) 69 | assert dets.dtype == surpressed.dtype 70 | assert len(inds) == len(surpressed) == 3 71 | 72 | -------------------------------------------------------------------------------- /tests/test_roi_sampling.py: -------------------------------------------------------------------------------- 1 | """ 2 | CommandLine: 3 | pytest tests/test_roi_sampling.py 4 | """ 5 | import numpy as np 6 | import torch 7 | 8 | from mmdet.ops.roi_sampling import roi_sampling, invert_roi_bbx 9 | 10 | 11 | def test_nms_device_and_dtypes_gpu(): 12 | """ 13 | CommandLine: 14 | xdoctest -m tests/test_roi_sampling.py test_roi_sampling_device_and_dtypes_gpu 15 | """ 16 | if not torch.cuda.is_available(): 17 | import pytest 18 | pytest.skip('test requires GPU and torch+cuda') 19 | 20 | bbx_dets = np.array([[ 366.8981, 503.8129, 483.4658, 558.4958], 21 | [ 314.6978, 1728.0537, 503.4887, 1786.7266], 22 | [ 312.2163, 1783.3762, 501.1314, 1858.0264], 23 | [ 357.2108, 1666.8306, 427.9639, 1692.9177], 24 | [ 352.9979, 1686.4675, 426.5080, 1708.1284], 25 | [ 369.2542, 1040.2343, 506.5618, 1213.3898], 26 | [ 344.8744, 1337.1475, 527.9548, 1576.3521], 27 | [ 379.8827, 956.4868, 471.0074, 1065.2467], 28 | [ 396.4836, 774.0189, 461.9510, 909.5333], 29 | [ 362.6898, 1572.2887, 426.6845, 1652.2772], 30 | [ 407.2052, 566.6193, 457.3239, 651.4299], 31 | [ 380.3073, 1558.8389, 402.9362, 1577.3118], 32 | [ 380.5341, 1560.0563, 411.1904, 1579.3331], 33 | [ 409.9388, 565.4561, 441.7789, 650.7983]]) 34 | 35 | mask_preds = np.random.rand(bbx_dets.shape[0], 28, 28) 36 | 37 | 38 | for device_id in range(torch.cuda.device_count()): 39 | print('Run NMS on device_id = {!r}'.format(device_id)) 40 | # GPU can handle float32 but not float64 41 | dets = torch.FloatTensor(bbx_dets.astype(np.float32)).to(device_id) 42 | bbx_inv = invert_roi_bbx(dets, (28,28), (1024, 2048)) 43 | bbx_idx = torch.arange(0, dets.size(0), 44 | dtype=torch.long, device=dets.device) 45 | preds = torch.FloatTensor(mask_preds.astype(np.float32)).to(device_id) 46 | preds = roi_sampling(preds.unsqueeze(1), bbx_inv, bbx_idx, (1024, 2048), 47 | padding="zero") 48 | assert preds.shape[0] == dets.shape[0] 49 | assert preds.shape[2] == 1024 50 | assert preds.shape[3] == 2048 51 | 52 | -------------------------------------------------------------------------------- /tests/test_soft_nms.py: -------------------------------------------------------------------------------- 1 | """ 2 | CommandLine: 3 | pytest tests/test_soft_nms.py 4 | """ 5 | import numpy as np 6 | import torch 7 | 8 | from mmdet.ops.nms.nms_wrapper import soft_nms 9 | 10 | 11 | def test_soft_nms_device_and_dtypes_cpu(): 12 | """ 13 | CommandLine: 14 | xdoctest -m tests/test_soft_nms.py test_soft_nms_device_and_dtypes_cpu 15 | """ 16 | iou_thr = 0.7 17 | base_dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9], 18 | [49.3, 32.9, 51.0, 35.3, 0.9], 19 | [35.3, 11.5, 39.9, 14.5, 0.4], 20 | [35.2, 11.7, 39.7, 15.7, 0.3]]) 21 | 22 | # CPU can handle float32 and float64 23 | dets = base_dets.astype(np.float32) 24 | new_dets, inds = soft_nms(dets, iou_thr) 25 | assert dets.dtype == new_dets.dtype 26 | assert len(inds) == len(new_dets) == 4 27 | 28 | dets = torch.FloatTensor(base_dets) 29 | new_dets, inds = soft_nms(dets, iou_thr) 30 | assert dets.dtype == new_dets.dtype 31 | assert len(inds) == len(new_dets) == 4 32 | 33 | dets = base_dets.astype(np.float64) 34 | new_dets, inds = soft_nms(dets, iou_thr) 35 | assert dets.dtype == new_dets.dtype 36 | assert len(inds) == len(new_dets) == 4 37 | 38 | dets = torch.DoubleTensor(base_dets) 39 | new_dets, inds = soft_nms(dets, iou_thr) 40 | assert dets.dtype == new_dets.dtype 41 | assert len(inds) == len(new_dets) == 4 42 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import numpy.testing as npt 2 | 3 | from mmdet.utils.flops_counter import params_to_string 4 | 5 | 6 | def test_params_to_string(): 7 | npt.assert_equal(params_to_string(1e9), '1000.0 M') 8 | npt.assert_equal(params_to_string(2e5), '200.0 k') 9 | npt.assert_equal(params_to_string(3e-9), '3e-09') 10 | -------------------------------------------------------------------------------- /tools/cityscapes_demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import cv2 5 | import mmcv 6 | import torch 7 | import numpy as np 8 | import json 9 | 10 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 11 | from mmcv.runner import get_dist_info, init_dist, load_checkpoint 12 | from tools.fuse_conv_bn import fuse_module 13 | 14 | from mmdet.apis import multi_gpu_test, single_gpu_test 15 | from mmdet.core import wrap_fp16_model 16 | from mmdet.datasets import build_dataloader, build_dataset 17 | from mmdet.datasets.cityscapes import PALETTE 18 | from mmdet.models import build_detector 19 | from mmdet.apis import init_detector, inference_detector, show_result 20 | from mmdet.core import cityscapes_originalIds 21 | 22 | from PIL import Image 23 | from skimage.morphology import dilation 24 | from skimage.segmentation import find_boundaries 25 | 26 | 27 | 28 | def parse_args(): 29 | parser = argparse.ArgumentParser(description='MMDetection webcam demo') 30 | parser.add_argument('config', help='test config file path') 31 | parser.add_argument('checkpoint', help='checkpoint file') 32 | parser.add_argument('input', help='input folder') 33 | parser.add_argument('out', help='output folder') 34 | parser.add_argument( 35 | '--device', type=str, default='cuda:0', help='CPU/CUDA device option') 36 | parser.add_argument( 37 | '--show', type=bool, default=False, help='display option') 38 | parser.add_argument( 39 | '--wait', type=int, default=0, help='cv2 wait time') 40 | args = parser.parse_args() 41 | return args 42 | 43 | 44 | def main(): 45 | args = parse_args() 46 | 47 | device = torch.device(args.device) 48 | os.makedirs(args.out, exist_ok=True) 49 | cfg = mmcv.Config.fromfile(args.config) 50 | 51 | model = init_detector(args.config, args.checkpoint, device=device) 52 | 53 | PALETTE.append([0,0,0]) 54 | colors = np.array(PALETTE, dtype=np.uint8) 55 | 56 | for img_file in os.listdir(args.input): 57 | img = cv2.imread(os.path.join(args.input, img_file)) 58 | img_shape = img.shape[:2][::-1] 59 | img_ = cv2.resize(img, cfg.test_pipeline[1]['img_scale']) 60 | 61 | result = inference_detector(model, img_, eval='panoptic') 62 | pan_pred, cat_pred, _ = result[0] 63 | 64 | sem = cat_pred[pan_pred].numpy() 65 | sem_tmp = sem.copy() 66 | sem_tmp[sem==255] = colors.shape[0] - 1 67 | sem_img = Image.fromarray(colors[sem_tmp]) 68 | 69 | is_background = (sem < 11) | (sem == 255) 70 | pan_pred = pan_pred.numpy() 71 | pan_pred[is_background] = 0 72 | 73 | contours = find_boundaries(pan_pred, mode="outer", background=0).astype(np.uint8) * 255 74 | contours = dilation(contours) 75 | 76 | contours = np.expand_dims(contours, -1).repeat(4, -1) 77 | contours_img = Image.fromarray(contours, mode="RGBA") 78 | 79 | out = Image.blend(Image.fromarray(img_[:,:,::-1]), sem_img, 0.5).convert(mode="RGBA") 80 | out = Image.alpha_composite(out, contours_img).convert(mode="RGB") 81 | out = cv2.resize(np.array(out)[:,:,::-1], img_shape) 82 | 83 | if args.show: 84 | cv2.imshow('img', img) 85 | cv2.imshow('panopitc', out) 86 | ch = cv2.waitKey(args.wait) 87 | 88 | cv2.imwrite(os.path.join(args.out, img_file), out) 89 | 90 | if __name__ == '__main__': 91 | main() 92 | -------------------------------------------------------------------------------- /tools/cityscapes_save_predictions.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import mmcv 5 | import torch 6 | import numpy as np 7 | import json 8 | 9 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 10 | from mmcv.runner import get_dist_info, init_dist, load_checkpoint 11 | from tools.fuse_conv_bn import fuse_module 12 | 13 | from mmdet.apis import multi_gpu_test, single_gpu_test 14 | from mmdet.core import wrap_fp16_model 15 | from mmdet.datasets import build_dataloader, build_dataset 16 | from mmdet.datasets.cityscapes import PALETTE 17 | from mmdet.models import build_detector 18 | from mmdet.apis import init_detector, inference_detector, show_result 19 | from mmdet.core import cityscapes_originalIds 20 | 21 | from PIL import Image 22 | from skimage.morphology import dilation 23 | from skimage.segmentation import find_boundaries 24 | 25 | def parse_args(): 26 | parser = argparse.ArgumentParser( 27 | description='MMDet test (and eval) a model') 28 | parser.add_argument('config', help='test config file path') 29 | parser.add_argument('checkpoint', help='checkpoint file') 30 | parser.add_argument('input', help='input folder') 31 | parser.add_argument('out', help='output folder') 32 | parser.add_argument('--local_rank', type=int, default=0) 33 | args = parser.parse_args() 34 | if 'LOCAL_RANK' not in os.environ: 35 | os.environ['LOCAL_RANK'] = str(args.local_rank) 36 | return args 37 | 38 | 39 | def main(): 40 | args = parse_args() 41 | 42 | model = init_detector(args.config, args.checkpoint, device='cuda:0') 43 | 44 | images = [] 45 | annotations = [] 46 | if not os.path.exists(args.out): 47 | os.mkdir(args.out) 48 | 49 | PALETTE.append([0,0,0]) 50 | colors = np.array(PALETTE, dtype=np.uint8) 51 | 52 | for city in os.listdir(args.input): 53 | path = os.path.join(args.input, city) 54 | out_dir = os.path.join(args.out, city) 55 | if not os.path.exists(out_dir): 56 | os.mkdir(out_dir) 57 | 58 | prog_bar = mmcv.ProgressBar(len(os.listdir(path))) 59 | for imgName in os.listdir(path): 60 | result = inference_detector(model, os.path.join(path, imgName), eval='panoptic') 61 | pan_pred, cat_pred, _ = result[0] 62 | 63 | imageId = imgName.replace("_leftImg8bit.png", "") 64 | inputFileName = imgName 65 | outputFileName = imgName.replace("_leftImg8bit.png", "_panoptic.png") 66 | 67 | img = Image.open(os.path.join(path, imgName)) 68 | out_path = os.path.join(out_dir, outputFileName) 69 | 70 | sem = cat_pred[pan_pred].numpy() 71 | sem_tmp = sem.copy() 72 | sem_tmp[sem==255] = colors.shape[0] - 1 73 | sem_img = Image.fromarray(colors[sem_tmp]) 74 | 75 | is_background = (sem < 11) | (sem == 255) 76 | pan_pred = pan_pred.numpy() 77 | pan_pred[is_background] = 0 78 | 79 | contours = find_boundaries(pan_pred, mode="outer", background=0).astype(np.uint8) * 255 80 | contours = dilation(contours) 81 | 82 | contours = np.expand_dims(contours, -1).repeat(4, -1) 83 | contours_img = Image.fromarray(contours, mode="RGBA") 84 | 85 | out = Image.blend(img, sem_img, 0.5).convert(mode="RGBA") 86 | out = Image.alpha_composite(out, contours_img) 87 | out.convert(mode="RGB").save(out_path) 88 | 89 | prog_bar.update() 90 | 91 | if __name__ == '__main__': 92 | main() 93 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | CONFIG=$1 6 | CHECKPOINT=$2 7 | GPUS=$3 8 | PORT=${PORT:-29500} 9 | 10 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 11 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 12 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | CONFIG=$1 6 | GPUS=$2 7 | PORT=${PORT:-29500} 8 | 9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 11 | -------------------------------------------------------------------------------- /tools/fuse_conv_bn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | import torch.nn as nn 5 | from mmcv.runner import save_checkpoint 6 | 7 | from mmdet.apis import init_detector 8 | 9 | 10 | def fuse_conv_bn(conv, bn): 11 | """ During inference, the functionary of batch norm layers is turned off 12 | but only the mean and var alone channels are used, which exposes the 13 | chance to fuse it with the preceding conv layers to save computations and 14 | simplify network structures. 15 | """ 16 | conv_w = conv.weight 17 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like( 18 | bn.running_mean) 19 | 20 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) 21 | conv.weight = nn.Parameter(conv_w * 22 | factor.reshape([conv.out_channels, 1, 1, 1])) 23 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) 24 | return conv 25 | 26 | 27 | def fuse_module(m): 28 | last_conv = None 29 | last_conv_name = None 30 | 31 | for name, child in m.named_children(): 32 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)): 33 | if last_conv is None: # only fuse BN that is after Conv 34 | continue 35 | fused_conv = fuse_conv_bn(last_conv, child) 36 | m._modules[last_conv_name] = fused_conv 37 | # To reduce changes, set BN as Identity instead of deleting it. 38 | m._modules[name] = nn.Identity() 39 | last_conv = None 40 | elif isinstance(child, nn.Conv2d): 41 | last_conv = child 42 | last_conv_name = name 43 | else: 44 | fuse_module(child) 45 | return m 46 | 47 | 48 | def parse_args(): 49 | parser = argparse.ArgumentParser( 50 | description='fuse Conv and BN layers in a model') 51 | parser.add_argument('config', help='config file path') 52 | parser.add_argument('checkpoint', help='checkpoint file path') 53 | parser.add_argument('out', help='output path of the converted model') 54 | args = parser.parse_args() 55 | return args 56 | 57 | 58 | def main(): 59 | args = parse_args() 60 | # build the model from a config file and a checkpoint file 61 | model = init_detector(args.config, args.checkpoint) 62 | # fuse conv and bn layers of the model 63 | fused_model = fuse_module(model) 64 | save_checkpoint(fused_model, args.out) 65 | 66 | 67 | if __name__ == '__main__': 68 | main() 69 | -------------------------------------------------------------------------------- /tools/kitti_demo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import cv2 5 | import mmcv 6 | import torch 7 | import numpy as np 8 | import json 9 | 10 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 11 | from mmcv.runner import get_dist_info, init_dist, load_checkpoint 12 | from tools.fuse_conv_bn import fuse_module 13 | 14 | from mmdet.apis import multi_gpu_test, single_gpu_test 15 | from mmdet.core import wrap_fp16_model 16 | from mmdet.datasets import build_dataloader, build_dataset 17 | from mmdet.datasets.cityscapes import PALETTE 18 | from mmdet.models import build_detector 19 | from mmdet.apis import init_detector, inference_detector, show_result 20 | from mmdet.core import cityscapes_originalIds 21 | 22 | from PIL import Image 23 | from skimage.morphology import dilation 24 | from skimage.segmentation import find_boundaries 25 | 26 | 27 | 28 | def parse_args(): 29 | parser = argparse.ArgumentParser(description='MMDetection webcam demo') 30 | parser.add_argument('config', help='test config file path') 31 | parser.add_argument('checkpoint', help='checkpoint file') 32 | parser.add_argument('input', help='input folder') 33 | parser.add_argument('out', help='output folder') 34 | parser.add_argument( 35 | '--device', type=str, default='cuda:0', help='CPU/CUDA device option') 36 | parser.add_argument( 37 | '--show', type=bool, default=False, help='display option') 38 | parser.add_argument( 39 | '--wait', type=int, default=0, help='cv2 wait time') 40 | args = parser.parse_args() 41 | return args 42 | 43 | 44 | def main(): 45 | args = parse_args() 46 | 47 | device = torch.device(args.device) 48 | os.makedirs(args.out, exist_ok=True) 49 | 50 | model = init_detector(args.config, args.checkpoint, device=device) 51 | 52 | PALETTE.append([0,0,0]) 53 | colors = np.array(PALETTE, dtype=np.uint8) 54 | 55 | for img_file in os.listdir(args.input): 56 | img = cv2.imread(os.path.join(args.input, img_file)) 57 | img_shape = img.shape[:2][::-1] 58 | img_ = cv2.resize(img, (384,1280)) 59 | 60 | result = inference_detector(model, img_, eval='panoptic') 61 | pan_pred, cat_pred, _ = result[0] 62 | 63 | sem = cat_pred[pan_pred].numpy() 64 | sem_tmp = sem.copy() 65 | sem_tmp[sem==255] = colors.shape[0] - 1 66 | sem_img = Image.fromarray(colors[sem_tmp]) 67 | 68 | is_background = (sem < 11) | (sem == 255) 69 | pan_pred = pan_pred.numpy() 70 | pan_pred[is_background] = 0 71 | 72 | contours = find_boundaries(pan_pred, mode="outer", background=0).astype(np.uint8) * 255 73 | contours = dilation(contours) 74 | 75 | contours = np.expand_dims(contours, -1).repeat(4, -1) 76 | contours_img = Image.fromarray(contours, mode="RGBA") 77 | 78 | out = Image.blend(Image.fromarray(img_[:,:,::-1]), sem_img, 0.5).convert(mode="RGBA") 79 | out = Image.alpha_composite(out, contours_img).convert(mode="RGB") 80 | out = cv2.resize(np.array(out)[:,:,::-1], img_shape) 81 | 82 | if args.show: 83 | cv2.imshow('img', img) 84 | cv2.imshow('panopitc', out) 85 | ch = cv2.waitKey(args.wait) 86 | 87 | cv2.imwrite(os.path.join(args.out, img_file), img) 88 | 89 | if __name__ == '__main__': 90 | main() 91 | --------------------------------------------------------------------------------