├── .gitignore ├── .idea ├── .gitignore ├── SA-SSD.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── configs ├── car_cfg.py └── multi_cfg.py ├── doc ├── hqdefault.jpg └── model.png ├── mmdet ├── __init__.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ ├── anchor3d_generator.py │ │ ├── anchor_generator.py │ │ └── anchor_target.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assignment.py │ │ ├── bbox_target.py │ │ ├── geometry.py │ │ ├── sampling.py │ │ └── transforms.py │ ├── bbox3d │ │ ├── __init__.py │ │ ├── bbox3d_target.py │ │ ├── box_coders.py │ │ ├── geometry.py │ │ ├── region_similarity.py │ │ └── target_ops.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── bbox_overlaps.py │ │ ├── class_names.py │ │ ├── coco_utils.py │ │ ├── eval_hooks.py │ │ ├── kitti_eval.py │ │ ├── mean_ap.py │ │ └── recall.py │ ├── loss │ │ ├── __init__.py │ │ └── losses.py │ ├── mask │ │ ├── __init__.py │ │ ├── mask_target.py │ │ └── utils.py │ ├── point_cloud │ │ ├── __init__.py │ │ ├── point_augmentor.py │ │ └── voxel_generator.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ ├── merge_augs.py │ │ └── rotate_nms_gpu.py │ └── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ └── misc.py ├── datasets │ ├── __init__.py │ ├── coco.py │ ├── concat_dataset.py │ ├── custom.py │ ├── kitti.py │ ├── kitti_utils.py │ ├── loader │ │ ├── __init__.py │ │ ├── build_loader.py │ │ └── sampler.py │ ├── transforms.py │ ├── utils.py │ ├── voc.py │ └── xml_style.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── pillar.py │ │ ├── resnet.py │ │ └── vxnet.py │ ├── bbox_heads │ │ ├── __init__.py │ │ ├── bbox_head.py │ │ └── convfc_bbox_head.py │ ├── builder.py │ ├── detectors │ │ ├── __init__.py │ │ ├── base.py │ │ ├── pointpillars.py │ │ ├── rpn.py │ │ ├── single_stage.py │ │ └── test_mixins.py │ ├── mask_heads │ │ ├── __init__.py │ │ └── fcn_mask_head.py │ ├── necks │ │ ├── __init__.py │ │ ├── cmn.py │ │ ├── fpn.py │ │ └── rpn.py │ ├── roi_extractors │ │ ├── __init__.py │ │ └── single_level.py │ ├── rpn_heads │ │ ├── __init__.py │ │ └── rpn_head.py │ ├── single_stage_heads │ │ ├── __init__.py │ │ ├── retina_head.py │ │ └── ssd_rotate_head.py │ └── utils │ │ ├── __init__.py │ │ ├── conv_module.py │ │ ├── empty.py │ │ ├── norm.py │ │ ├── sequential.py │ │ └── weight_init.py ├── ops │ ├── __init__.py │ ├── iou3d │ │ ├── iou3d_utils.py │ │ ├── setup.py │ │ └── src │ │ │ ├── iou3d.cpp │ │ │ └── iou3d_kernel.cu │ ├── pointnet2 │ │ ├── pointnet2_utils.py │ │ ├── setup.py │ │ └── src │ │ │ ├── cuda_utils.h │ │ │ ├── interpolate.cpp │ │ │ ├── interpolate_gpu.cu │ │ │ ├── interpolate_gpu.h │ │ │ └── pointnet2_api.cpp │ └── points_op │ │ ├── __init__.py │ │ ├── points_ops.py │ │ ├── setup.py │ │ └── src │ │ └── points_op.cpp └── version.py ├── readme.md └── tools ├── create_data.py ├── dist_train.sh ├── env.py ├── kitti_common.py ├── test.py ├── train.py └── train_utils ├── __init__.py └── optimization ├── __init__.py ├── fastai_optim.py └── learning_schedules_fastai.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | *.pyc 10 | 11 | # Packages # 12 | ############ 13 | # it's better to unpack these files and commit the raw source 14 | # git has its own built in compression methods 15 | *.7z 16 | *.dmg 17 | *.gz 18 | *.iso 19 | *.jar 20 | *.rar 21 | *.tar 22 | *.zip 23 | 24 | 25 | # Specific directory # 26 | saved_model_vehicle/ 27 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /workspace.xml -------------------------------------------------------------------------------- /.idea/SA-SSD.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | 13 | 15 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /doc/hqdefault.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skyhehe123/SA-SSD/2d75c973af65453186bd9242d7fa5e62dc44ec03/doc/hqdefault.jpg -------------------------------------------------------------------------------- /doc/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skyhehe123/SA-SSD/2d75c973af65453186bd9242d7fa5e62dc44ec03/doc/model.png -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .mask import * # noqa: F401, F403 4 | from .loss import * # noqa: F401, F403 5 | from .evaluation import * # noqa: F401, F403 6 | from .post_processing import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import anchor_target 3 | 4 | __all__ = ['AnchorGenerator', 'anchor_target'] 5 | -------------------------------------------------------------------------------- /mmdet/core/anchor/anchor3d_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def create_anchors_3d_stride(feature_size, 4 | sizes=[1.6, 3.9, 1.56], 5 | anchor_strides=[0.4, 0.4, 0.0], 6 | anchor_offsets=[0.2, -39.8, -1.78], 7 | rotations=[0, np.pi / 2], 8 | dtype=np.float32): 9 | """ 10 | Args: 11 | feature_size: list [D, H, W](zyx) 12 | sizes: [N, 3] list of list or array, size of anchors, xyz 13 | 14 | Returns: 15 | anchors: [*feature_size, num_sizes, num_rots, 7] tensor. 16 | """ 17 | # almost 2x faster than v1 18 | x_stride, y_stride, z_stride = anchor_strides 19 | x_offset, y_offset, z_offset = anchor_offsets 20 | z_centers = np.arange(feature_size[0], dtype=dtype) 21 | y_centers = np.arange(feature_size[1], dtype=dtype) 22 | x_centers = np.arange(feature_size[2], dtype=dtype) 23 | z_centers = z_centers * z_stride + z_offset 24 | y_centers = y_centers * y_stride + y_offset 25 | x_centers = x_centers * x_stride + x_offset 26 | sizes = np.reshape(np.array(sizes, dtype=dtype), [-1, 3]) 27 | rotations = np.array(rotations, dtype=dtype) 28 | rets = np.meshgrid( 29 | x_centers, y_centers, z_centers, rotations, indexing='ij') 30 | tile_shape = [1] * 5 31 | tile_shape[-2] = int(sizes.shape[0]) 32 | for i in range(len(rets)): 33 | rets[i] = np.tile(rets[i][..., np.newaxis, :], tile_shape) 34 | rets[i] = rets[i][..., np.newaxis] # for concat 35 | sizes = np.reshape(sizes, [1, 1, 1, -1, 1, 3]) 36 | tile_size_shape = list(rets[0].shape) 37 | tile_size_shape[3] = 1 38 | sizes = np.tile(sizes, tile_size_shape) 39 | rets.insert(3, sizes) 40 | ret = np.concatenate(rets, axis=-1) 41 | return np.transpose(ret, [2, 1, 0, 3, 4, 5]) 42 | 43 | 44 | def create_anchors_3d_range(feature_size, 45 | anchor_range, 46 | sizes=[1.6, 3.9, 1.56], 47 | rotations=[0, np.pi / 2], 48 | dtype=np.float32): 49 | """ 50 | Args: 51 | feature_size: list [D, H, W](zyx) 52 | sizes: [N, 3] list of list or array, size of anchors, xyz 53 | 54 | Returns: 55 | anchors: [*feature_size, num_sizes, num_rots, 7] tensor. 56 | """ 57 | anchor_range = np.array(anchor_range, dtype) 58 | z_centers = np.linspace( 59 | anchor_range[2], anchor_range[5], feature_size[0], dtype=dtype) 60 | y_centers = np.linspace( 61 | anchor_range[1], anchor_range[4], feature_size[1], dtype=dtype) 62 | x_centers = np.linspace( 63 | anchor_range[0], anchor_range[3], feature_size[2], dtype=dtype) 64 | sizes = np.reshape(np.array(sizes, dtype=dtype), [-1, 3]) 65 | rotations = np.array(rotations, dtype=dtype) 66 | rets = np.meshgrid( 67 | x_centers, y_centers, z_centers, rotations, indexing='ij') 68 | tile_shape = [1] * 5 69 | tile_shape[-2] = int(sizes.shape[0]) 70 | for i in range(len(rets)): 71 | rets[i] = np.tile(rets[i][..., np.newaxis, :], tile_shape) 72 | rets[i] = rets[i][..., np.newaxis] # for concat 73 | sizes = np.reshape(sizes, [1, 1, 1, -1, 1, 3]) 74 | tile_size_shape = list(rets[0].shape) 75 | tile_size_shape[3] = 1 76 | sizes = np.tile(sizes, tile_size_shape) 77 | rets.insert(3, sizes) 78 | ret = np.concatenate(rets, axis=-1) 79 | return np.transpose(ret, [2, 1, 0, 3, 4, 5]) 80 | 81 | class AnchorGeneratorStride: 82 | def __init__(self, 83 | sizes=[1.6, 3.9, 1.56], 84 | anchor_strides=[0.4, 0.4, 1.0], 85 | anchor_offsets=[0.2, -39.8, -1.78], 86 | rotations=[0, np.pi / 2], 87 | dtype=np.float32): 88 | self._sizes = sizes 89 | self._anchor_strides = anchor_strides 90 | self._anchor_offsets = anchor_offsets 91 | self._rotations = rotations 92 | self._dtype = dtype 93 | 94 | @property 95 | def num_anchors_per_localization(self): 96 | num_rot = len(self._rotations) 97 | num_size = np.array(self._sizes).reshape([-1, 3]).shape[0] 98 | return num_rot * num_size 99 | 100 | def __call__(self, feature_map_size): 101 | return create_anchors_3d_stride( 102 | feature_map_size, self._sizes, self._anchor_strides, 103 | self._anchor_offsets, self._rotations, self._dtype) 104 | 105 | class AnchorGeneratorRange: 106 | def __init__(self, 107 | anchor_ranges, 108 | sizes=[1.6, 3.9, 1.56], 109 | rotations=[0, np.pi / 2], 110 | dtype=np.float32): 111 | self._sizes = sizes 112 | self._anchor_ranges = anchor_ranges 113 | self._rotations = rotations 114 | self._dtype = dtype 115 | 116 | @property 117 | def num_anchors_per_localization(self): 118 | num_rot = len(self._rotations) 119 | num_size = np.array(self._sizes).reshape([-1, 3]).shape[0] 120 | return num_rot * num_size 121 | 122 | def __call__(self, feature_map_size): 123 | return create_anchors_3d_range( 124 | feature_map_size, self._anchor_ranges, self._sizes, 125 | self._rotations, self._dtype) 126 | -------------------------------------------------------------------------------- /mmdet/core/anchor/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AnchorGenerator(object): 5 | 6 | def __init__(self, base_size, scales, ratios, scale_major=True): 7 | self.base_size = base_size 8 | self.scales = torch.Tensor(scales) 9 | self.ratios = torch.Tensor(ratios) 10 | self.scale_major = scale_major 11 | self.base_anchors = self.gen_base_anchors() 12 | 13 | @property 14 | def num_base_anchors(self): 15 | return self.base_anchors.size(0) 16 | 17 | def gen_base_anchors(self): 18 | base_anchor = torch.Tensor( 19 | [0, 0, self.base_size - 1, self.base_size - 1]) 20 | 21 | w = base_anchor[2] - base_anchor[0] + 1 22 | h = base_anchor[3] - base_anchor[1] + 1 23 | x_ctr = base_anchor[0] + 0.5 * (w - 1) 24 | y_ctr = base_anchor[1] + 0.5 * (h - 1) 25 | 26 | h_ratios = torch.sqrt(self.ratios) 27 | w_ratios = 1 / h_ratios 28 | if self.scale_major: 29 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1) 30 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1) 31 | else: 32 | ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1) 33 | hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1) 34 | 35 | base_anchors = torch.stack( 36 | [ 37 | x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), 38 | x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) 39 | ], 40 | dim=-1).round() 41 | 42 | return base_anchors 43 | 44 | def _meshgrid(self, x, y, row_major=True): 45 | xx = x.repeat(len(y)) 46 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 47 | if row_major: 48 | return xx, yy 49 | else: 50 | return yy, xx 51 | 52 | def grid_anchors(self, featmap_size, stride=16, device='cuda'): 53 | base_anchors = self.base_anchors.to(device) 54 | 55 | feat_h, feat_w = featmap_size 56 | shift_x = torch.arange(0, feat_w, device=device) * stride 57 | shift_y = torch.arange(0, feat_h, device=device) * stride 58 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 59 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) 60 | shifts = shifts.type_as(base_anchors) 61 | # first feat_w elements correspond to the first row of shifts 62 | # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get 63 | # shifted anchors (K, A, 4), reshape to (K*A, 4) 64 | 65 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :] 66 | all_anchors = all_anchors.view(-1, 4) 67 | # first A rows correspond to A anchors of (0, 0) in feature map, 68 | # then (0, 1), (0, 2), ... 69 | return all_anchors 70 | 71 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 72 | feat_h, feat_w = featmap_size 73 | valid_h, valid_w = valid_size 74 | assert valid_h <= feat_h and valid_w <= feat_w 75 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) 76 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) 77 | valid_x[:valid_w] = 1 78 | valid_y[:valid_h] = 1 79 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 80 | valid = valid_xx & valid_yy 81 | valid = valid[:, None].expand( 82 | valid.size(0), self.num_base_anchors).contiguous().view(-1) 83 | return valid 84 | -------------------------------------------------------------------------------- /mmdet/core/anchor/anchor_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from ..bbox import assign_and_sample, BBoxAssigner, SamplingResult, bbox2delta, rbbox3d2delta 3 | 4 | def anchor_target(flat_anchors, 5 | inside_flags, 6 | gt_bboxes, 7 | gt_labels, 8 | target_means, 9 | target_stds, 10 | cfg, 11 | cls_out_channels=1, 12 | sampling=True): 13 | 14 | # assign gt and sample anchors 15 | 16 | anchors = flat_anchors[inside_flags] 17 | 18 | if sampling: 19 | assign_result, sampling_result = assign_and_sample( 20 | anchors, gt_bboxes, None, None, cfg) 21 | else: 22 | bbox_assigner = BBoxAssigner(**cfg.assigner) 23 | assign_result = bbox_assigner.assign(anchors, gt_bboxes, None, gt_labels) 24 | pos_inds = torch.nonzero( 25 | assign_result.gt_inds > 0).squeeze(-1).unique() 26 | neg_inds = torch.nonzero( 27 | assign_result.gt_inds == 0).squeeze(-1).unique() 28 | gt_flags = anchors.new_zeros(anchors.shape[0], dtype=torch.uint8) 29 | sampling_result = SamplingResult(pos_inds, neg_inds, anchors, 30 | gt_bboxes, assign_result, gt_flags) 31 | 32 | num_valid_anchors = anchors.shape[0] 33 | bbox_targets = torch.zeros_like(anchors) 34 | bbox_weights = torch.zeros_like(anchors) 35 | labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long) 36 | label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) 37 | dir_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float) 38 | 39 | pos_inds = sampling_result.pos_inds 40 | neg_inds = sampling_result.neg_inds 41 | if len(pos_inds) > 0: 42 | pos_bbox_targets = rbbox3d2delta(sampling_result.pos_bboxes, 43 | sampling_result.pos_gt_bboxes, 44 | target_means, target_stds) 45 | bbox_targets[pos_inds, :] = pos_bbox_targets 46 | bbox_weights[pos_inds, :] = 1.0 47 | dir_weights[pos_inds] = 1. 48 | if gt_labels is None: 49 | labels[pos_inds] = 1 50 | else: 51 | labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds] 52 | if cfg.pos_weight <= 0: 53 | label_weights[pos_inds] = 1.0 54 | else: 55 | label_weights[pos_inds] = cfg.pos_weight 56 | if len(neg_inds) > 0: 57 | label_weights[neg_inds] = 1.0 58 | 59 | # map up to original set of anchors 60 | num_total_anchors = flat_anchors.shape[0] 61 | labels = unmap(labels, num_total_anchors, inside_flags) 62 | label_weights = unmap(label_weights, num_total_anchors, inside_flags) 63 | if cls_out_channels > 1: 64 | labels, label_weights = expand_binary_labels(labels, label_weights, 65 | cls_out_channels) 66 | bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) 67 | bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags) 68 | 69 | dir_labels = get_direction_target(flat_anchors, bbox_targets) 70 | dir_weights = unmap(dir_weights, num_total_anchors, inside_flags) 71 | 72 | return (labels, label_weights, bbox_targets, bbox_weights, dir_labels, dir_weights, pos_inds, 73 | neg_inds) 74 | 75 | 76 | def expand_binary_labels(labels, label_weights, cls_out_channels): 77 | bin_labels = labels.new_full( 78 | (labels.size(0), cls_out_channels), 0, dtype=torch.float32) 79 | inds = torch.nonzero(labels >= 1).squeeze() 80 | if inds.numel() > 0: 81 | bin_labels[inds, labels[inds] - 1] = 1 82 | bin_label_weights = label_weights.view(-1, 1).expand( 83 | label_weights.size(0), cls_out_channels) 84 | return bin_labels, bin_label_weights 85 | 86 | def get_direction_target(anchors, reg_targets): 87 | anchors = anchors.view(-1, 7) 88 | rot_gt = reg_targets[:, -1] + anchors[:, -1] 89 | dir_cls_targets = (rot_gt > 0).long() 90 | return dir_cls_targets 91 | 92 | def anchor_inside_flags(flat_anchors, valid_flags, img_shape, 93 | allowed_border=0): 94 | img_h, img_w = img_shape[:2] 95 | if allowed_border >= 0: 96 | inside_flags = valid_flags & \ 97 | (flat_anchors[:, 0] >= -allowed_border) & \ 98 | (flat_anchors[:, 1] >= -allowed_border) & \ 99 | (flat_anchors[:, 2] < img_w + allowed_border) & \ 100 | (flat_anchors[:, 3] < img_h + allowed_border) 101 | else: 102 | inside_flags = valid_flags 103 | return inside_flags 104 | 105 | 106 | def unmap(data, count, inds, fill=0): 107 | """ Unmap a subset of item (data) back to the original set of items (of 108 | size count) """ 109 | if data.dim() == 1: 110 | ret = data.new_full((count, ), fill) 111 | ret[inds] = data 112 | else: 113 | new_size = (count, ) + data.size()[1:] 114 | ret = data.new_full(new_size, fill) 115 | ret[inds, :] = data 116 | return ret 117 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .geometry import bbox_overlaps 2 | from .assignment import BBoxAssigner, AssignResult 3 | from .sampling import (BBoxSampler, SamplingResult, assign_and_sample, 4 | random_choice) 5 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, rbbox3d2delta, delta2rbbox3d, add_sin_difference, 6 | bbox_mapping_back, bbox2roi, roi2bbox, bbox2result, rbbox2roi, kitti_bbox2results, tensor2points) 7 | from .bbox_target import bbox_target 8 | 9 | __all__ = [ 10 | 'bbox_overlaps', 'BBoxAssigner', 'AssignResult', 'BBoxSampler', 11 | 'SamplingResult', 'assign_and_sample', 'random_choice', 'bbox2delta', 12 | 'delta2bbox', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 13 | 'roi2bbox', 'bbox2result', 'bbox_target','rbbox3d2delta','delta2rbbox3d', 14 | 'rbbox2roi', 'kitti_bbox2results','add_sin_difference','tensor2points', 15 | ] 16 | -------------------------------------------------------------------------------- /mmdet/core/bbox/bbox_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .transforms import bbox2delta, rbbox3d2delta 4 | from ..utils import multi_apply 5 | 6 | 7 | def bbox_target(pos_bboxes_list, 8 | neg_bboxes_list, 9 | pos_gt_bboxes_list, 10 | pos_gt_labels_list, 11 | cfg, 12 | reg_classes=1, 13 | target_means=[.0, .0, .0, .0], 14 | target_stds=[1.0, 1.0, 1.0, 1.0], 15 | concat=True): 16 | labels, label_weights, bbox_targets, bbox_weights = multi_apply( 17 | bbox_target_single, 18 | pos_bboxes_list, 19 | neg_bboxes_list, 20 | pos_gt_bboxes_list, 21 | pos_gt_labels_list, 22 | cfg=cfg, 23 | reg_classes=reg_classes, 24 | target_means=target_means, 25 | target_stds=target_stds) 26 | 27 | if concat: 28 | labels = torch.cat(labels, 0) 29 | label_weights = torch.cat(label_weights, 0) 30 | bbox_targets = torch.cat(bbox_targets, 0) 31 | bbox_weights = torch.cat(bbox_weights, 0) 32 | return labels, label_weights, bbox_targets, bbox_weights 33 | 34 | 35 | def bbox_target_single(pos_bboxes, 36 | neg_bboxes, 37 | pos_gt_bboxes, 38 | pos_gt_labels, 39 | cfg, 40 | reg_classes=1, 41 | target_means=[.0, .0, .0, .0], 42 | target_stds=[1.0, 1.0, 1.0, 1.0]): 43 | num_pos = pos_bboxes.size(0) 44 | num_neg = neg_bboxes.size(0) 45 | num_samples = num_pos + num_neg 46 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long) 47 | label_weights = pos_bboxes.new_zeros(num_samples) 48 | bbox_targets = pos_bboxes.new_zeros(num_samples, 7) 49 | bbox_weights = pos_bboxes.new_zeros(num_samples, 7) 50 | if num_pos > 0: 51 | labels[:num_pos] = pos_gt_labels 52 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 53 | label_weights[:num_pos] = pos_weight 54 | 55 | pos_bbox_targets = rbbox3d2delta(pos_bboxes, pos_gt_bboxes, target_means, 56 | target_stds) 57 | bbox_targets[:num_pos, :] = pos_bbox_targets 58 | bbox_weights[:num_pos, :] = 1 59 | if num_neg > 0: 60 | label_weights[-num_neg:] = 1.0 61 | if reg_classes > 1: 62 | bbox_targets, bbox_weights = expand_target(bbox_targets, bbox_weights, 63 | labels, reg_classes) 64 | 65 | return labels, label_weights, bbox_targets, bbox_weights 66 | 67 | 68 | def expand_target(bbox_targets, bbox_weights, labels, num_classes): 69 | bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0), 70 | 7 * num_classes)) 71 | bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0), 72 | 7 * num_classes)) 73 | for i in torch.nonzero(labels > 0).squeeze(-1): 74 | start, end = labels[i] * 7, (labels[i] + 1) * 7 75 | bbox_targets_expand[i, start:end] = bbox_targets[i, :] 76 | bbox_weights_expand[i, start:end] = bbox_weights[i, :] 77 | return bbox_targets_expand, bbox_weights_expand 78 | -------------------------------------------------------------------------------- /mmdet/core/bbox/geometry.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): 5 | """Calculate overlap between two set of bboxes. 6 | 7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox 8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of 9 | bboxes1 and bboxes2. 10 | 11 | Args: 12 | bboxes1 (Tensor): shape (m, 4) 13 | bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n 14 | must be equal. 15 | mode (str): "iou" (intersection over union) or iof (intersection over 16 | foreground). 17 | 18 | Returns: 19 | ious(Tensor): shape (n, k) if is_aligned == False else shape (n, 1) 20 | """ 21 | 22 | assert mode in ['iou', 'iof'] 23 | 24 | rows = bboxes1.size(0) 25 | cols = bboxes2.size(0) 26 | if is_aligned: 27 | assert rows == cols 28 | 29 | if rows * cols == 0: 30 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) 31 | 32 | if is_aligned: 33 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 34 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 35 | 36 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2] 37 | overlap = wh[:, 0] * wh[:, 1] 38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 39 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 40 | 41 | if mode == 'iou': 42 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 43 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 44 | ious = overlap / (area1 + area2 - overlap) 45 | else: 46 | ious = overlap / area1 47 | else: 48 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] 49 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] 50 | 51 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] 52 | overlap = wh[:, :, 0] * wh[:, :, 1] 53 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 54 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 55 | 56 | if mode == 'iou': 57 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 58 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 59 | ious = overlap / (area1[:, None] + area2 - overlap) 60 | else: 61 | ious = overlap / (area1[:, None]) 62 | 63 | return ious 64 | -------------------------------------------------------------------------------- /mmdet/core/bbox3d/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skyhehe123/SA-SSD/2d75c973af65453186bd9242d7fa5e62dc44ec03/mmdet/core/bbox3d/__init__.py -------------------------------------------------------------------------------- /mmdet/core/bbox3d/bbox3d_target.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox3d.target_ops import create_target_np 2 | from mmdet.core.bbox3d import region_similarity as regionSimilarity 3 | from mmdet.core.bbox3d import box_coders as boxCoders 4 | 5 | class TargetEncoder: 6 | def __init__(self, 7 | box_coders, 8 | region_similarity): 9 | 10 | self._similarity_fn = getattr(regionSimilarity, region_similarity)() 11 | self._box_coder = getattr(boxCoders, box_coders)() 12 | 13 | @property 14 | def box_coder(self): 15 | return self._box_coder 16 | 17 | def assign(self, 18 | anchors, 19 | gt_boxes, 20 | anchors_mask=None, 21 | gt_classes=None, 22 | pos_iou_thr=0.6, 23 | neg_iou_thr=0.45, 24 | positive_fraction=None, 25 | sample_size=512, 26 | ): 27 | 28 | return create_target_np( 29 | anchors, 30 | gt_boxes, 31 | anchors_mask, 32 | gt_classes, 33 | similarity_fn=self._similarity_fn, 34 | box_encoding_fn = self._box_coder.encode, 35 | matched_threshold=pos_iou_thr, 36 | unmatched_threshold=neg_iou_thr, 37 | positive_fraction=positive_fraction, 38 | rpn_batch_size=sample_size, 39 | norm_by_num_examples=False, 40 | box_code_size=self.box_coder.code_size) 41 | 42 | -------------------------------------------------------------------------------- /mmdet/core/bbox3d/region_similarity.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Region Similarity Calculators for BoxLists. 17 | 18 | Region Similarity Calculators compare a pairwise measure of similarity 19 | between the boxes in two BoxLists. 20 | """ 21 | 22 | from mmdet.core.bbox3d.geometry import rbbox2d_to_near_bbox, iou_jit, distance_similarity 23 | from mmdet.core.post_processing.rotate_nms_gpu import rotate_iou_gpu, rotate_iou_gpu_eval 24 | import numba 25 | 26 | @numba.jit(nopython=True, parallel=True) 27 | def d3_box_overlap_kernel(boxes, qboxes, rinc, criterion=-1): 28 | N, K = boxes.shape[0], qboxes.shape[0] 29 | for i in range(N): 30 | for j in range(K): 31 | if rinc[i, j] > 0: 32 | iw = (min(boxes[i, 2], qboxes[j, 2]) - max( 33 | boxes[i, 2] - boxes[i, 5], qboxes[j, 2] - qboxes[j, 5])) 34 | 35 | if iw > 0: 36 | area1 = boxes[i, 3] * boxes[i, 4] * boxes[i, 5] 37 | area2 = qboxes[j, 3] * qboxes[j, 4] * qboxes[j, 5] 38 | inc = iw * rinc[i, j] 39 | if criterion == -1: 40 | ua = (area1 + area2 - inc) 41 | elif criterion == 0: 42 | ua = area1 43 | elif criterion == 1: 44 | ua = area2 45 | else: 46 | ua = 1.0 47 | rinc[i, j] = inc / ua 48 | else: 49 | rinc[i, j] = 0.0 50 | 51 | class RotateIou2dSimilarity(object): 52 | """Class to compute similarity based on Intersection over Union (IOU) metric. 53 | 54 | This class computes pairwise similarity between two BoxLists based on IOU. 55 | """ 56 | def __call__(self, boxes1, boxes2): 57 | boxes1_rbv = boxes1[:, [0, 1, 3, 4, 6]] 58 | boxes2_rbv = boxes2[:, [0, 1, 3, 4, 6]] 59 | return rotate_iou_gpu(boxes1_rbv, boxes2_rbv) 60 | 61 | class RotateIou3dSimilarity(object): 62 | """Class to compute similarity based on Intersection over Union (IOU) metric. 63 | 64 | This class computes pairwise similarity between two BoxLists based on IOU. 65 | """ 66 | def __call__(self, boxes1, boxes2): 67 | boxes1_rbv = boxes1[:, [0, 1, 3, 4, 6]] 68 | boxes2_rbv = boxes2[:, [0, 1, 3, 4, 6]] 69 | rinc = rotate_iou_gpu_eval(boxes1_rbv, boxes2_rbv, criterion=2) 70 | d3_box_overlap_kernel(boxes1, boxes2, rinc) 71 | return rinc 72 | 73 | class NearestIouSimilarity(object): 74 | """Class to compute similarity based on the squared distance metric. 75 | 76 | This class computes pairwise similarity between two BoxLists based on the 77 | negative squared distance metric. 78 | """ 79 | 80 | def __call__(self, boxes1, boxes2): 81 | """Compute matrix of (negated) sq distances. 82 | 83 | Args: 84 | boxlist1: BoxList holding N boxes. 85 | boxlist2: BoxList holding M boxes. 86 | 87 | Returns: 88 | A tensor with shape [N, M] representing negated pairwise squared distance. 89 | """ 90 | boxes1_rbv = boxes1[:, [0, 1, 3, 4, 6]] 91 | boxes2_rbv = boxes2[:, [0, 1, 3, 4, 6]] 92 | boxes1_bv = rbbox2d_to_near_bbox(boxes1_rbv) 93 | boxes2_bv = rbbox2d_to_near_bbox(boxes2_rbv) 94 | ret = iou_jit(boxes1_bv, boxes2_bv, eps=0.0) 95 | return ret 96 | 97 | 98 | class DistanceSimilarity(object): 99 | """Class to compute similarity based on Intersection over Area (IOA) metric. 100 | 101 | This class computes pairwise similarity between two BoxLists based on their 102 | pairwise intersections divided by the areas of second BoxLists. 103 | """ 104 | 105 | def __init__(self, distance_norm, with_rotation=False, rotation_alpha=0.5): 106 | self._distance_norm = distance_norm 107 | self._with_rotation = with_rotation 108 | self._rotation_alpha = rotation_alpha 109 | 110 | def __call__(self, boxes1, boxes2): 111 | """Compute matrix of (negated) sq distances. 112 | 113 | Args: 114 | boxlist1: BoxList holding N boxes. 115 | boxlist2: BoxList holding M boxes. 116 | 117 | Returns: 118 | A tensor with shape [N, M] representing negated pairwise squared distance. 119 | """ 120 | boxes1_rbv = boxes1[:, [0, 1, 3, 4, 6]] 121 | boxes2_rbv = boxes2[:, [0, 1, 3, 4, 6]] 122 | return distance_similarity( 123 | boxes1_rbv[..., [0, 1, -1]], 124 | boxes2_rbv[..., [0, 1, -1]], 125 | dist_norm=self._distance_norm, 126 | with_rotation=self._with_rotation, 127 | rot_alpha=self._rotation_alpha) 128 | 129 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (voc_classes, imagenet_det_classes, 2 | imagenet_vid_classes, coco_classes, dataset_aliases, 3 | get_classes) 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json 5 | from .eval_hooks import (DistEvalHook, CocoDistEvalRecallHook, 6 | CocoDistEvalmAPHook,KittiEvalmAPHook, DistEvalmAPHook) 7 | from .mean_ap import average_precision, eval_map, print_map_summary 8 | from .recall import (eval_recalls, print_recall_summary, plot_num_recall, 9 | plot_iou_recall) 10 | 11 | __all__ = [ 12 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 13 | 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', 14 | 'fast_eval_recall', 'results2json', 'DistEvalHook', 15 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', 16 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 17 | 'plot_num_recall', 'plot_iou_recall','KittiEvalmAPHook','DistEvalmAPHook' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 32 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 34 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 35 | for i in range(bboxes1.shape[0]): 36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 41 | y_end - y_start + 1, 0) 42 | if mode == 'iou': 43 | union = area1[i] + area2 - overlap 44 | else: 45 | union = area1[i] if not exchange else area2 46 | ious[i, :] = overlap / union 47 | if exchange: 48 | ious = ious.T 49 | return ious 50 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/class_names.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def voc_classes(): 5 | return [ 6 | 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 7 | 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 8 | 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' 9 | ] 10 | 11 | 12 | def imagenet_det_classes(): 13 | return [ 14 | 'accordion', 'airplane', 'ant', 'antelope', 'apple', 'armadillo', 15 | 'artichoke', 'axe', 'baby_bed', 'backpack', 'bagel', 'balance_beam', 16 | 'banana', 'band_aid', 'banjo', 'baseball', 'basketball', 'bathing_cap', 17 | 'beaker', 'bear', 'bee', 'bell_pepper', 'bench', 'bicycle', 'binder', 18 | 'bird', 'bookshelf', 'bow_tie', 'bow', 'bowl', 'brassiere', 'burrito', 19 | 'bus', 'butterfly', 'camel', 'can_opener', 'car', 'cart', 'cattle', 20 | 'cello', 'centipede', 'chain_saw', 'chair', 'chime', 'cocktail_shaker', 21 | 'coffee_maker', 'computer_keyboard', 'computer_mouse', 'corkscrew', 22 | 'cream', 'croquet_ball', 'crutch', 'cucumber', 'cup_or_mug', 'diaper', 23 | 'digital_clock', 'dishwasher', 'dog', 'domestic_cat', 'dragonfly', 24 | 'drum', 'dumbbell', 'electric_fan', 'elephant', 'face_powder', 'fig', 25 | 'filing_cabinet', 'flower_pot', 'flute', 'fox', 'french_horn', 'frog', 26 | 'frying_pan', 'giant_panda', 'goldfish', 'golf_ball', 'golfcart', 27 | 'guacamole', 'guitar', 'hair_dryer', 'hair_spray', 'hamburger', 28 | 'hammer', 'hamster', 'harmonica', 'harp', 'hat_with_a_wide_brim', 29 | 'head_cabbage', 'helmet', 'hippopotamus', 'horizontal_bar', 'horse', 30 | 'hotdog', 'iPod', 'isopod', 'jellyfish', 'koala_bear', 'ladle', 31 | 'ladybug', 'lamp', 'laptop', 'lemon', 'lion', 'lipstick', 'lizard', 32 | 'lobster', 'maillot', 'maraca', 'microphone', 'microwave', 'milk_can', 33 | 'miniskirt', 'monkey', 'motorcycle', 'mushroom', 'nail', 'neck_brace', 34 | 'oboe', 'orange', 'otter', 'pencil_box', 'pencil_sharpener', 'perfume', 35 | 'person', 'piano', 'pineapple', 'ping-pong_ball', 'pitcher', 'pizza', 36 | 'plastic_bag', 'plate_rack', 'pomegranate', 'popsicle', 'porcupine', 37 | 'power_drill', 'pretzel', 'printer', 'puck', 'punching_bag', 'purse', 38 | 'rabbit', 'racket', 'ray', 'red_panda', 'refrigerator', 39 | 'remote_control', 'rubber_eraser', 'rugby_ball', 'ruler', 40 | 'salt_or_pepper_shaker', 'saxophone', 'scorpion', 'screwdriver', 41 | 'seal', 'sheep', 'ski', 'skunk', 'snail', 'snake', 'snowmobile', 42 | 'snowplow', 'soap_dispenser', 'soccer_ball', 'sofa', 'spatula', 43 | 'squirrel', 'starfish', 'stethoscope', 'stove', 'strainer', 44 | 'strawberry', 'stretcher', 'sunglasses', 'swimming_trunks', 'swine', 45 | 'syringe', 'table', 'tape_player', 'tennis_ball', 'tick', 'tie', 46 | 'tiger', 'toaster', 'traffic_light', 'train', 'trombone', 'trumpet', 47 | 'turtle', 'tv_or_monitor', 'unicycle', 'vacuum', 'violin', 48 | 'volleyball', 'waffle_iron', 'washer', 'water_bottle', 'watercraft', 49 | 'whale', 'wine_bottle', 'zebra' 50 | ] 51 | 52 | 53 | def imagenet_vid_classes(): 54 | return [ 55 | 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 56 | 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 57 | 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 58 | 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 59 | 'watercraft', 'whale', 'zebra' 60 | ] 61 | 62 | 63 | def coco_classes(): 64 | return [ 65 | 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 66 | 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 67 | 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 68 | 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 69 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 70 | 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 71 | 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 72 | 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 73 | 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 74 | 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 75 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 76 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 77 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush' 78 | ] 79 | 80 | def kitti_classes(): 81 | return [ 82 | 'car', 'pedestrians','cyclists' 83 | ] 84 | 85 | dataset_aliases = { 86 | 'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'], 87 | 'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'], 88 | 'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'], 89 | 'coco': ['coco', 'mscoco', 'ms_coco'], 90 | 'kitti': ['kitti'] 91 | } 92 | 93 | 94 | def get_classes(dataset): 95 | """Get class names of a dataset.""" 96 | alias2name = {} 97 | for name, aliases in dataset_aliases.items(): 98 | for alias in aliases: 99 | alias2name[alias] = name 100 | 101 | if mmcv.is_str(dataset): 102 | if dataset in alias2name: 103 | labels = eval(alias2name[dataset] + '_classes()') 104 | else: 105 | raise ValueError('Unrecognized dataset: {}'.format(dataset)) 106 | else: 107 | raise TypeError('dataset must a str, but got {}'.format(type(dataset))) 108 | return labels 109 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/coco_utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | from pycocotools.coco import COCO 4 | from pycocotools.cocoeval import COCOeval 5 | 6 | from .recall import eval_recalls 7 | 8 | 9 | def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)): 10 | for res_type in result_types: 11 | assert res_type in [ 12 | 'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints' 13 | ] 14 | 15 | if mmcv.is_str(coco): 16 | coco = COCO(coco) 17 | assert isinstance(coco, COCO) 18 | 19 | if result_types == ['proposal_fast']: 20 | ar = fast_eval_recall(result_file, coco, np.array(max_dets)) 21 | for i, num in enumerate(max_dets): 22 | print('AR@{}\t= {:.4f}'.format(num, ar[i])) 23 | return 24 | 25 | assert result_file.endswith('.json') 26 | coco_dets = coco.loadRes(result_file) 27 | 28 | img_ids = coco.getImgIds() 29 | for res_type in result_types: 30 | iou_type = 'bbox' if res_type == 'proposal' else res_type 31 | cocoEval = COCOeval(coco, coco_dets, iou_type) 32 | cocoEval.params.imgIds = img_ids 33 | if res_type == 'proposal': 34 | cocoEval.params.useCats = 0 35 | cocoEval.params.maxDets = list(max_dets) 36 | cocoEval.evaluate() 37 | cocoEval.accumulate() 38 | cocoEval.summarize() 39 | 40 | 41 | def fast_eval_recall(results, 42 | coco, 43 | max_dets, 44 | iou_thrs=np.arange(0.5, 0.96, 0.05)): 45 | if mmcv.is_str(results): 46 | assert results.endswith('.pkl') 47 | results = mmcv.load(results) 48 | elif not isinstance(results, list): 49 | raise TypeError( 50 | 'results must be a list of numpy arrays or a filename, not {}'. 51 | format(type(results))) 52 | 53 | gt_bboxes = [] 54 | img_ids = coco.getImgIds() 55 | for i in range(len(img_ids)): 56 | ann_ids = coco.getAnnIds(imgIds=img_ids[i]) 57 | ann_info = coco.loadAnns(ann_ids) 58 | if len(ann_info) == 0: 59 | gt_bboxes.append(np.zeros((0, 4))) 60 | continue 61 | bboxes = [] 62 | for ann in ann_info: 63 | if ann.get('ignore', False) or ann['iscrowd']: 64 | continue 65 | x1, y1, w, h = ann['bbox'] 66 | bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1]) 67 | bboxes = np.array(bboxes, dtype=np.float32) 68 | if bboxes.shape[0] == 0: 69 | bboxes = np.zeros((0, 4)) 70 | gt_bboxes.append(bboxes) 71 | 72 | recalls = eval_recalls( 73 | gt_bboxes, results, max_dets, iou_thrs, print_summary=False) 74 | ar = recalls.mean(axis=1) 75 | return ar 76 | 77 | 78 | def xyxy2xywh(bbox): 79 | _bbox = bbox.tolist() 80 | return [ 81 | _bbox[0], 82 | _bbox[1], 83 | _bbox[2] - _bbox[0] + 1, 84 | _bbox[3] - _bbox[1] + 1, 85 | ] 86 | 87 | 88 | def proposal2json(dataset, results): 89 | json_results = [] 90 | for idx in range(len(dataset)): 91 | img_id = dataset.img_ids[idx] 92 | bboxes = results[idx] 93 | for i in range(bboxes.shape[0]): 94 | data = dict() 95 | data['image_id'] = img_id 96 | data['bbox'] = xyxy2xywh(bboxes[i]) 97 | data['score'] = float(bboxes[i][4]) 98 | data['category_id'] = 1 99 | json_results.append(data) 100 | return json_results 101 | 102 | 103 | def det2json(dataset, results): 104 | json_results = [] 105 | for idx in range(len(dataset)): 106 | img_id = dataset.img_ids[idx] 107 | result = results[idx] 108 | for label in range(len(result)): 109 | bboxes = result[label] 110 | for i in range(bboxes.shape[0]): 111 | data = dict() 112 | data['image_id'] = img_id 113 | data['bbox'] = xyxy2xywh(bboxes[i]) 114 | data['score'] = float(bboxes[i][4]) 115 | data['category_id'] = dataset.cat_ids[label] 116 | json_results.append(data) 117 | return json_results 118 | 119 | 120 | def segm2json(dataset, results): 121 | json_results = [] 122 | for idx in range(len(dataset)): 123 | img_id = dataset.img_ids[idx] 124 | det, seg = results[idx] 125 | for label in range(len(det)): 126 | bboxes = det[label] 127 | segms = seg[label] 128 | for i in range(bboxes.shape[0]): 129 | data = dict() 130 | data['image_id'] = img_id 131 | data['bbox'] = xyxy2xywh(bboxes[i]) 132 | data['score'] = float(bboxes[i][4]) 133 | data['category_id'] = dataset.cat_ids[label] 134 | segms[i]['counts'] = segms[i]['counts'].decode() 135 | data['segmentation'] = segms[i] 136 | json_results.append(data) 137 | return json_results 138 | 139 | 140 | def results2json(dataset, results, out_file): 141 | if isinstance(results[0], list): 142 | json_results = det2json(dataset, results) 143 | elif isinstance(results[0], tuple): 144 | json_results = segm2json(dataset, results) 145 | elif isinstance(results[0], np.ndarray): 146 | json_results = proposal2json(dataset, results) 147 | else: 148 | raise TypeError('invalid type of results') 149 | mmcv.dump(json_results, out_file) 150 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/recall.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from terminaltables import AsciiTable 3 | 4 | from .bbox_overlaps import bbox_overlaps 5 | 6 | 7 | def _recalls(all_ious, proposal_nums, thrs): 8 | 9 | img_num = all_ious.shape[0] 10 | total_gt_num = sum([ious.shape[0] for ious in all_ious]) 11 | 12 | _ious = np.zeros((proposal_nums.size, total_gt_num), dtype=np.float32) 13 | for k, proposal_num in enumerate(proposal_nums): 14 | tmp_ious = np.zeros(0) 15 | for i in range(img_num): 16 | ious = all_ious[i][:, :proposal_num].copy() 17 | gt_ious = np.zeros((ious.shape[0])) 18 | if ious.size == 0: 19 | tmp_ious = np.hstack((tmp_ious, gt_ious)) 20 | continue 21 | for j in range(ious.shape[0]): 22 | gt_max_overlaps = ious.argmax(axis=1) 23 | max_ious = ious[np.arange(0, ious.shape[0]), gt_max_overlaps] 24 | gt_idx = max_ious.argmax() 25 | gt_ious[j] = max_ious[gt_idx] 26 | box_idx = gt_max_overlaps[gt_idx] 27 | ious[gt_idx, :] = -1 28 | ious[:, box_idx] = -1 29 | tmp_ious = np.hstack((tmp_ious, gt_ious)) 30 | _ious[k, :] = tmp_ious 31 | 32 | _ious = np.fliplr(np.sort(_ious, axis=1)) 33 | recalls = np.zeros((proposal_nums.size, thrs.size)) 34 | for i, thr in enumerate(thrs): 35 | recalls[:, i] = (_ious >= thr).sum(axis=1) / float(total_gt_num) 36 | 37 | return recalls 38 | 39 | 40 | def set_recall_param(proposal_nums, iou_thrs): 41 | """Check proposal_nums and iou_thrs and set correct format. 42 | """ 43 | if isinstance(proposal_nums, list): 44 | _proposal_nums = np.array(proposal_nums) 45 | elif isinstance(proposal_nums, int): 46 | _proposal_nums = np.array([proposal_nums]) 47 | else: 48 | _proposal_nums = proposal_nums 49 | 50 | if iou_thrs is None: 51 | _iou_thrs = np.array([0.5]) 52 | elif isinstance(iou_thrs, list): 53 | _iou_thrs = np.array(iou_thrs) 54 | elif isinstance(iou_thrs, float): 55 | _iou_thrs = np.array([iou_thrs]) 56 | else: 57 | _iou_thrs = iou_thrs 58 | 59 | return _proposal_nums, _iou_thrs 60 | 61 | 62 | def eval_recalls(gts, 63 | proposals, 64 | proposal_nums=None, 65 | iou_thrs=None, 66 | print_summary=True): 67 | """Calculate recalls. 68 | 69 | Args: 70 | gts(list or ndarray): a list of arrays of shape (n, 4) 71 | proposals(list or ndarray): a list of arrays of shape (k, 4) or (k, 5) 72 | proposal_nums(int or list of int or ndarray): top N proposals 73 | thrs(float or list or ndarray): iou thresholds 74 | 75 | Returns: 76 | ndarray: recalls of different ious and proposal nums 77 | """ 78 | 79 | img_num = len(gts) 80 | assert img_num == len(proposals) 81 | 82 | proposal_nums, iou_thrs = set_recall_param(proposal_nums, iou_thrs) 83 | 84 | all_ious = [] 85 | for i in range(img_num): 86 | if proposals[i].ndim == 2 and proposals[i].shape[1] == 5: 87 | scores = proposals[i][:, 4] 88 | sort_idx = np.argsort(scores)[::-1] 89 | img_proposal = proposals[i][sort_idx, :] 90 | else: 91 | img_proposal = proposals[i] 92 | prop_num = min(img_proposal.shape[0], proposal_nums[-1]) 93 | if gts[i] is None or gts[i].shape[0] == 0: 94 | ious = np.zeros((0, img_proposal.shape[0]), dtype=np.float32) 95 | else: 96 | ious = bbox_overlaps(gts[i], img_proposal[:prop_num, :4]) 97 | all_ious.append(ious) 98 | all_ious = np.array(all_ious) 99 | recalls = _recalls(all_ious, proposal_nums, iou_thrs) 100 | if print_summary: 101 | print_recall_summary(recalls, proposal_nums, iou_thrs) 102 | return recalls 103 | 104 | 105 | def print_recall_summary(recalls, 106 | proposal_nums, 107 | iou_thrs, 108 | row_idxs=None, 109 | col_idxs=None): 110 | """Print recalls in a table. 111 | 112 | Args: 113 | recalls(ndarray): calculated from `bbox_recalls` 114 | proposal_nums(ndarray or list): top N proposals 115 | iou_thrs(ndarray or list): iou thresholds 116 | row_idxs(ndarray): which rows(proposal nums) to print 117 | col_idxs(ndarray): which cols(iou thresholds) to print 118 | """ 119 | proposal_nums = np.array(proposal_nums, dtype=np.int32) 120 | iou_thrs = np.array(iou_thrs) 121 | if row_idxs is None: 122 | row_idxs = np.arange(proposal_nums.size) 123 | if col_idxs is None: 124 | col_idxs = np.arange(iou_thrs.size) 125 | row_header = [''] + iou_thrs[col_idxs].tolist() 126 | table_data = [row_header] 127 | for i, num in enumerate(proposal_nums[row_idxs]): 128 | row = [ 129 | '{:.3f}'.format(val) 130 | for val in recalls[row_idxs[i], col_idxs].tolist() 131 | ] 132 | row.insert(0, num) 133 | table_data.append(row) 134 | table = AsciiTable(table_data) 135 | print(table.table) 136 | 137 | 138 | def plot_num_recall(recalls, proposal_nums): 139 | """Plot Proposal_num-Recalls curve. 140 | 141 | Args: 142 | recalls(ndarray or list): shape (k,) 143 | proposal_nums(ndarray or list): same shape as `recalls` 144 | """ 145 | if isinstance(proposal_nums, np.ndarray): 146 | _proposal_nums = proposal_nums.tolist() 147 | else: 148 | _proposal_nums = proposal_nums 149 | if isinstance(recalls, np.ndarray): 150 | _recalls = recalls.tolist() 151 | else: 152 | _recalls = recalls 153 | 154 | import matplotlib.pyplot as plt 155 | f = plt.figure() 156 | plt.plot([0] + _proposal_nums, [0] + _recalls) 157 | plt.xlabel('Proposal num') 158 | plt.ylabel('Recall') 159 | plt.axis([0, proposal_nums.max(), 0, 1]) 160 | f.show() 161 | 162 | 163 | def plot_iou_recall(recalls, iou_thrs): 164 | """Plot IoU-Recalls curve. 165 | 166 | Args: 167 | recalls(ndarray or list): shape (k,) 168 | iou_thrs(ndarray or list): same shape as `recalls` 169 | """ 170 | if isinstance(iou_thrs, np.ndarray): 171 | _iou_thrs = iou_thrs.tolist() 172 | else: 173 | _iou_thrs = iou_thrs 174 | if isinstance(recalls, np.ndarray): 175 | _recalls = recalls.tolist() 176 | else: 177 | _recalls = recalls 178 | 179 | import matplotlib.pyplot as plt 180 | f = plt.figure() 181 | plt.plot(_iou_thrs + [1.0], _recalls + [0.]) 182 | plt.xlabel('IoU') 183 | plt.ylabel('Recall') 184 | plt.axis([iou_thrs.min(), 1, 0, 1]) 185 | f.show() 186 | 187 | 188 | -------------------------------------------------------------------------------- /mmdet/core/loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .losses import (weighted_nll_loss, weighted_cross_entropy, 2 | weighted_binary_cross_entropy, sigmoid_focal_loss, 3 | weighted_sigmoid_focal_loss, mask_cross_entropy, 4 | smooth_l1_loss, weighted_smoothl1, l1_loss, weighted_l1, accuracy) 5 | 6 | __all__ = [ 7 | 'weighted_nll_loss', 'weighted_cross_entropy', 8 | 'weighted_binary_cross_entropy', 'sigmoid_focal_loss', 9 | 'weighted_sigmoid_focal_loss', 'mask_cross_entropy', 'smooth_l1_loss', 10 | 'weighted_smoothl1', 'l1_loss', 'weighted_l1', 'accuracy' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/core/loss/losses.py: -------------------------------------------------------------------------------- 1 | # TODO merge naive and weighted loss. 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | def weighted_nll_loss(pred, label, weight, avg_factor=None): 7 | if avg_factor is None: 8 | avg_factor = max(torch.sum(weight > 0).float().item(), 1.) 9 | raw = F.nll_loss(pred, label, reduction='none') 10 | return torch.sum(raw * weight)[None] / avg_factor 11 | 12 | 13 | def weighted_cross_entropy(pred, label, weight, avg_factor=None, reduce=True): 14 | if avg_factor is None: 15 | avg_factor = max(torch.sum(weight > 0).float().item(), 1.) 16 | raw = F.cross_entropy(pred, label, reduction='none') 17 | if reduce: 18 | return torch.sum(raw * weight)[None] / avg_factor 19 | else: 20 | return raw * weight / avg_factor 21 | 22 | 23 | def weighted_binary_cross_entropy(pred, label, weight, avg_factor=None): 24 | if avg_factor is None: 25 | avg_factor = max(torch.sum(weight > 0).float().item(), 1.) 26 | return F.binary_cross_entropy_with_logits( 27 | pred, label.float(), weight.float(), 28 | reduction='sum')[None] / avg_factor 29 | 30 | 31 | def sigmoid_focal_loss(pred, 32 | target, 33 | weight, 34 | gamma=2.0, 35 | alpha=0.25, 36 | reduction='mean'): 37 | pred_sigmoid = pred.sigmoid() 38 | target = target.type_as(pred) 39 | pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) 40 | weight = (alpha * target + (1 - alpha) * (1 - target)) * weight 41 | weight = weight * pt.pow(gamma) 42 | loss = F.binary_cross_entropy_with_logits( 43 | pred, target, reduction='none') * weight 44 | reduction_enum = F._Reduction.get_enum(reduction) 45 | # none: 0, mean:1, sum: 2 46 | if reduction_enum == 0: 47 | return loss 48 | elif reduction_enum == 1: 49 | return loss.mean() 50 | elif reduction_enum == 2: 51 | return loss.sum() 52 | 53 | 54 | def weighted_sigmoid_focal_loss(pred, 55 | target, 56 | weight, 57 | gamma=2.0, 58 | alpha=0.25, 59 | avg_factor=None, 60 | num_classes=80): 61 | if avg_factor is None: 62 | avg_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6 63 | return sigmoid_focal_loss( 64 | pred, target, weight, gamma=gamma, alpha=alpha, 65 | reduction='sum')[None] / avg_factor 66 | 67 | 68 | def mask_cross_entropy(pred, target, label): 69 | num_rois = pred.size()[0] 70 | inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) 71 | pred_slice = pred[inds, label].squeeze(1) 72 | return F.binary_cross_entropy_with_logits( 73 | pred_slice, target, reduction='mean')[None] 74 | 75 | 76 | def smooth_l1_loss(pred, target, beta=1.0, reduction='mean'): 77 | assert beta > 0 78 | assert pred.size() == target.size() and target.numel() > 0 79 | diff = torch.abs(pred - target) 80 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, 81 | diff - 0.5 * beta) 82 | reduction_enum = F._Reduction.get_enum(reduction) 83 | # none: 0, mean:1, sum: 2 84 | if reduction_enum == 0: 85 | return loss 86 | elif reduction_enum == 1: 87 | return loss.sum() / pred.numel() 88 | elif reduction_enum == 2: 89 | return loss.sum() 90 | 91 | 92 | def weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None): 93 | if avg_factor is None: 94 | avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6 95 | loss = smooth_l1_loss(pred, target, beta, reduction='none') 96 | return torch.sum(loss * weight)[None] / avg_factor 97 | 98 | def l1_loss(pred, target, reduction='mean'): 99 | assert pred.size() == target.size() and target.numel() > 0 100 | loss = torch.abs(pred - target) 101 | reduction_enum = F._Reduction.get_enum(reduction) 102 | # none: 0, mean:1, sum: 2 103 | if reduction_enum == 0: 104 | return loss 105 | elif reduction_enum == 1: 106 | return loss.sum() / pred.numel() 107 | elif reduction_enum == 2: 108 | return loss.sum() 109 | 110 | def weighted_l1(pred, target, weight, avg_factor=None): 111 | if avg_factor is None: 112 | avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6 113 | loss = l1_loss(pred, target, reduction='none') 114 | return torch.sum(loss * weight)[None] / avg_factor 115 | 116 | def accuracy(pred, target, topk=1): 117 | if isinstance(topk, int): 118 | topk = (topk, ) 119 | return_single = True 120 | else: 121 | return_single = False 122 | 123 | maxk = max(topk) 124 | _, pred_label = pred.topk(maxk, 1, True, True) 125 | pred_label = pred_label.t() 126 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 127 | 128 | res = [] 129 | for k in topk: 130 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 131 | res.append(correct_k.mul_(100.0 / pred.size(0))) 132 | return res[0] if return_single else res -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import split_combined_polys 2 | from .mask_target import mask_target 3 | 4 | __all__ = ['split_combined_polys', 'mask_target'] 5 | -------------------------------------------------------------------------------- /mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import mmcv 4 | 5 | 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 7 | cfg): 8 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 9 | mask_targets = map(mask_target_single, pos_proposals_list, 10 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 11 | mask_targets = torch.cat(list(mask_targets)) 12 | return mask_targets 13 | 14 | 15 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 16 | mask_size = cfg.mask_size 17 | num_pos = pos_proposals.size(0) 18 | mask_targets = [] 19 | if num_pos > 0: 20 | proposals_np = pos_proposals.cpu().numpy() 21 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 22 | for i in range(num_pos): 23 | gt_mask = gt_masks[pos_assigned_gt_inds[i]] 24 | bbox = proposals_np[i, :].astype(np.int32) 25 | x1, y1, x2, y2 = bbox 26 | w = np.maximum(x2 - x1 + 1, 1) 27 | h = np.maximum(y2 - y1 + 1, 1) 28 | # mask is uint8 both before and after resizing 29 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], 30 | (mask_size, mask_size)) 31 | mask_targets.append(target) 32 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( 33 | pos_proposals.device) 34 | else: 35 | mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size)) 36 | return mask_targets 37 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def split_combined_polys(polys, poly_lens, polys_per_mask): 5 | """Split the combined 1-D polys into masks. 6 | 7 | A mask is represented as a list of polys, and a poly is represented as 8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 9 | tensor. Here we need to split the tensor into original representations. 10 | 11 | Args: 12 | polys (list): a list (length = image num) of 1-D tensors 13 | poly_lens (list): a list (length = image num) of poly length 14 | polys_per_mask (list): a list (length = image num) of poly number 15 | of each mask 16 | 17 | Returns: 18 | list: a list (length = image num) of list (length = mask num) of 19 | list (length = poly num) of numpy array 20 | """ 21 | mask_polys_list = [] 22 | for img_id in range(len(polys)): 23 | polys_single = polys[img_id] 24 | polys_lens_single = poly_lens[img_id].tolist() 25 | polys_per_mask_single = polys_per_mask[img_id].tolist() 26 | 27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 29 | mask_polys_list.append(mask_polys) 30 | return mask_polys_list 31 | -------------------------------------------------------------------------------- /mmdet/core/point_cloud/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/skyhehe123/SA-SSD/2d75c973af65453186bd9242d7fa5e62dc44ec03/mmdet/core/point_cloud/__init__.py -------------------------------------------------------------------------------- /mmdet/core/point_cloud/voxel_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from mmdet.ops.points_op import points_to_voxel 3 | 4 | class VoxelGenerator: 5 | def __init__(self, 6 | voxel_size, 7 | point_cloud_range, 8 | max_num_points, 9 | max_voxels=20000): 10 | point_cloud_range = np.array(point_cloud_range, dtype=np.float32) 11 | # [0, -40, -3, 70.4, 40, 1] 12 | voxel_size = np.array(voxel_size, dtype=np.float32) 13 | grid_size = ( 14 | point_cloud_range[3:] - point_cloud_range[:3]) / voxel_size 15 | grid_size = np.round(grid_size).astype(np.int64) 16 | self._voxel_size = voxel_size 17 | self._point_cloud_range = point_cloud_range 18 | self._max_num_points = max_num_points 19 | self._max_voxels = max_voxels 20 | self._grid_size = grid_size 21 | 22 | def generate(self, points): 23 | return points_to_voxel( 24 | points, self._voxel_size, self._point_cloud_range, 25 | self._max_num_points, True, self._max_voxels) 26 | 27 | @property 28 | def voxel_size(self): 29 | return self._voxel_size 30 | 31 | @property 32 | def max_num_points_per_voxel(self): 33 | return self._max_num_points 34 | 35 | @property 36 | def point_cloud_range(self): 37 | return self._point_cloud_range 38 | 39 | @property 40 | def grid_size(self): 41 | return self._grid_size 42 | 43 | 44 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes, 2 | merge_aug_scores, merge_aug_masks) 3 | from .rotate_nms_gpu import rotate_nms_gpu 4 | __all__ = [ 5 | 'merge_aug_proposals', 'merge_aug_bboxes', 6 | 'merge_aug_scores', 'merge_aug_masks','rotate_nms_gpu' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/bbox_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mmdet.ops.iou3d.iou3d_utils import nms_gpu 3 | 4 | def rotate_nms_torch(rbboxes, 5 | scores, 6 | pre_max_size=None, 7 | post_max_size=None, 8 | iou_threshold=0.5): 9 | if pre_max_size is not None: 10 | num_keeped_scores = scores.shape[0] 11 | pre_max_size = min(num_keeped_scores, pre_max_size) 12 | scores, indices = torch.topk(scores, k=pre_max_size) 13 | rbboxes = rbboxes[indices] 14 | 15 | if len(rbboxes) == 0: 16 | keep = torch.empty((0,), dtype=torch.int64) 17 | else: 18 | ret = nms_gpu(rbboxes, scores, iou_threshold) 19 | keep = ret[:post_max_size] 20 | 21 | if keep.shape[0] == 0: 22 | return None 23 | 24 | if pre_max_size is not None: 25 | return indices[keep] 26 | else: 27 | return keep -------------------------------------------------------------------------------- /mmdet/core/post_processing/merge_augs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import numpy as np 4 | from mmdet.core.bbox.transforms import bbox_mapping_back 5 | 6 | 7 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): 8 | """Merge augmented proposals (multiscale, flip, etc.) 9 | 10 | Args: 11 | aug_proposals (list[Tensor]): proposals from different testing 12 | schemes, shape (n, 5). Note that they are not rescaled to the 13 | original image size. 14 | img_metas (list[dict]): image info including "shape_scale" and "flip". 15 | rpn_test_cfg (dict): rpn test config. 16 | 17 | Returns: 18 | Tensor: shape (n, 4), proposals corresponding to original image scale. 19 | """ 20 | recovered_proposals = [] 21 | for proposals, img_info in zip(aug_proposals, img_metas): 22 | img_shape = img_info['img_shape'] 23 | scale_factor = img_info['scale_factor'] 24 | flip = img_info['flip'] 25 | _proposals = proposals.clone() 26 | _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, 27 | scale_factor, flip) 28 | recovered_proposals.append(_proposals) 29 | aug_proposals = torch.cat(recovered_proposals, dim=0) 30 | merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr) 31 | scores = merged_proposals[:, 4] 32 | _, order = scores.sort(0, descending=True) 33 | num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) 34 | order = order[:num] 35 | merged_proposals = merged_proposals[order, :] 36 | return merged_proposals 37 | 38 | 39 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): 40 | """Merge augmented detection bboxes and scores. 41 | 42 | Args: 43 | aug_bboxes (list[Tensor]): shape (n, 4*#class) 44 | aug_scores (list[Tensor] or None): shape (n, #class) 45 | img_shapes (list[Tensor]): shape (3, ). 46 | rcnn_test_cfg (dict): rcnn test config. 47 | 48 | Returns: 49 | tuple: (bboxes, scores) 50 | """ 51 | recovered_bboxes = [] 52 | for bboxes, img_info in zip(aug_bboxes, img_metas): 53 | img_shape = img_info[0]['img_shape'] 54 | scale_factor = img_info[0]['scale_factor'] 55 | flip = img_info[0]['flip'] 56 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) 57 | recovered_bboxes.append(bboxes) 58 | bboxes = torch.stack(recovered_bboxes).mean(dim=0) 59 | if aug_scores is None: 60 | return bboxes 61 | else: 62 | scores = torch.stack(aug_scores).mean(dim=0) 63 | return bboxes, scores 64 | 65 | 66 | def merge_aug_scores(aug_scores): 67 | """Merge augmented bbox scores.""" 68 | if isinstance(aug_scores[0], torch.Tensor): 69 | return torch.mean(torch.stack(aug_scores), dim=0) 70 | else: 71 | return np.mean(aug_scores, axis=0) 72 | 73 | 74 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None): 75 | """Merge augmented mask prediction. 76 | 77 | Args: 78 | aug_masks (list[ndarray]): shape (n, #class, h, w) 79 | img_shapes (list[ndarray]): shape (3, ). 80 | rcnn_test_cfg (dict): rcnn test config. 81 | 82 | Returns: 83 | tuple: (bboxes, scores) 84 | """ 85 | recovered_masks = [ 86 | mask if not img_info[0]['flip'] else mask[..., ::-1] 87 | for mask, img_info in zip(aug_masks, img_metas) 88 | ] 89 | if weights is None: 90 | merged_masks = np.mean(recovered_masks, axis=0) 91 | else: 92 | merged_masks = np.average( 93 | np.array(recovered_masks), axis=0, weights=np.array(weights)) 94 | return merged_masks 95 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import allreduce_grads, DistOptimizerHook 2 | from .misc import tensor2imgs, unmap, multi_apply 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', 6 | 'multi_apply' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors, 5 | _take_tensors) 6 | from mmcv.runner import OptimizerHook 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | if bucket_size_mb > 0: 11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 12 | buckets = _take_tensors(tensors, bucket_size_bytes) 13 | else: 14 | buckets = OrderedDict() 15 | for tensor in tensors: 16 | tp = tensor.type() 17 | if tp not in buckets: 18 | buckets[tp] = [] 19 | buckets[tp].append(tensor) 20 | buckets = buckets.values() 21 | 22 | for bucket in buckets: 23 | flat_tensors = _flatten_dense_tensors(bucket) 24 | dist.all_reduce(flat_tensors) 25 | flat_tensors.div_(world_size) 26 | for tensor, synced in zip( 27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data for param in model.parameters() 34 | if param.requires_grad and param.grad is not None 35 | ] 36 | world_size = dist.get_world_size() 37 | if coalesce: 38 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 39 | else: 40 | for tensor in grads: 41 | dist.all_reduce(tensor.div_(world_size)) 42 | 43 | 44 | class DistOptimizerHook(OptimizerHook): 45 | 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs['loss'].backward() 54 | allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb) 55 | if self.grad_clip is not None: 56 | self.clip_grads(runner.model.parameters()) 57 | runner.optimizer.step() 58 | -------------------------------------------------------------------------------- /mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import mmcv 3 | import numpy as np 4 | from six.moves import map, zip 5 | import time 6 | import torch 7 | 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 9 | num_imgs = tensor.size(0) 10 | mean = np.array(mean, dtype=np.float32) 11 | std = np.array(std, dtype=np.float32) 12 | imgs = [] 13 | for img_id in range(num_imgs): 14 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 15 | img = mmcv.imdenormalize( 16 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 17 | imgs.append(np.ascontiguousarray(img)) 18 | return imgs 19 | 20 | 21 | def multi_apply(func, *args, **kwargs): 22 | pfunc = partial(func, **kwargs) if kwargs else func 23 | map_results = map(pfunc, *args) 24 | return tuple(map(list, zip(*map_results))) 25 | 26 | 27 | def unmap(data, count, inds, fill=0): 28 | """ Unmap a subset of item (data) back to the original set of items (of 29 | size count) """ 30 | if data.dim() == 1: 31 | ret = data.new_full((count, ), fill) 32 | ret[inds] = data 33 | else: 34 | new_size = (count, ) + data.size()[1:] 35 | ret = data.new_full(new_size, fill) 36 | ret[inds, :] = data 37 | return ret 38 | 39 | class TimeCatcher(object): 40 | def __init__(self, show=True, cuda=True): 41 | self.show=show 42 | self.cuda = cuda 43 | 44 | def __enter__(self): 45 | if self.cuda: 46 | torch.cuda.synchronize() 47 | self.start = time.time() 48 | return self 49 | 50 | def __exit__(self, type, value, traceback): 51 | if self.cuda: 52 | torch.cuda.synchronize() 53 | self.end = time.time() 54 | ms = (self.end - self.start) * 1000 55 | if self.show: 56 | print("%.5f ms" % ms) -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom import CustomDataset 2 | from .coco import CocoDataset 3 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader 4 | from .utils import to_tensor, random_scale, show_ann, get_dataset 5 | from .concat_dataset import ConcatDataset 6 | from .kitti import KittiLiDAR, KittiVideo 7 | from .voc import VOCDataset 8 | __all__ = [ 9 | 'CustomDataset', 'CocoDataset', 'GroupSampler', 'DistributedGroupSampler', 10 | 'ConcatDataset', 'build_dataloader', 'to_tensor', 'random_scale', 11 | 'show_ann', 'get_dataset', 'KittiLiDAR','KittiVideo', 'VOCDataset' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/datasets/coco.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pycocotools.coco import COCO 3 | 4 | from .custom import CustomDataset 5 | 6 | 7 | class CocoDataset(CustomDataset): 8 | 9 | def load_annotations(self, ann_file): 10 | self.coco = COCO(ann_file) 11 | self.cat_ids = self.coco.getCatIds() 12 | self.cat2label = { 13 | cat_id: i + 1 14 | for i, cat_id in enumerate(self.cat_ids) 15 | } 16 | self.img_ids = self.coco.getImgIds() 17 | img_infos = [] 18 | for i in self.img_ids: 19 | info = self.coco.loadImgs([i])[0] 20 | info['filename'] = info['file_name'] 21 | img_infos.append(info) 22 | return img_infos 23 | 24 | def get_ann_info(self, idx): 25 | img_id = self.img_infos[idx]['id'] 26 | ann_ids = self.coco.getAnnIds(imgIds=[img_id]) 27 | ann_info = self.coco.loadAnns(ann_ids) 28 | return self._parse_ann_info(ann_info) 29 | 30 | def _filter_imgs(self, min_size=32): 31 | """Filter images too small or without ground truths.""" 32 | valid_inds = [] 33 | ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values()) 34 | for i, img_info in enumerate(self.img_infos): 35 | if self.img_ids[i] not in ids_with_ann: 36 | continue 37 | if min(img_info['width'], img_info['height']) >= min_size: 38 | valid_inds.append(i) 39 | return valid_inds 40 | 41 | def _parse_ann_info(self, ann_info, with_mask=True): 42 | """Parse bbox and mask annotation. 43 | 44 | Args: 45 | ann_info (list[dict]): Annotation info of an image. 46 | with_mask (bool): Whether to parse mask annotations. 47 | 48 | Returns: 49 | dict: A dict containing the following keys: bboxes, bboxes_ignore, 50 | labels, masks, mask_polys, poly_lens. 51 | """ 52 | gt_bboxes = [] 53 | gt_labels = [] 54 | gt_bboxes_ignore = [] 55 | # Two formats are provided. 56 | # 1. mask: a binary map of the same size of the image. 57 | # 2. polys: each mask consists of one or several polys, each poly is a 58 | # list of float. 59 | if with_mask: 60 | gt_masks = [] 61 | gt_mask_polys = [] 62 | gt_poly_lens = [] 63 | for i, ann in enumerate(ann_info): 64 | if ann.get('ignore', False): 65 | continue 66 | x1, y1, w, h = ann['bbox'] 67 | if ann['area'] <= 0 or w < 1 or h < 1: 68 | continue 69 | bbox = [x1, y1, x1 + w - 1, y1 + h - 1] 70 | if ann['iscrowd']: 71 | gt_bboxes_ignore.append(bbox) 72 | else: 73 | gt_bboxes.append(bbox) 74 | gt_labels.append(self.cat2label[ann['category_id']]) 75 | if with_mask: 76 | gt_masks.append(self.coco.annToMask(ann)) 77 | mask_polys = [ 78 | p for p in ann['segmentation'] if len(p) >= 6 79 | ] # valid polygons have >= 3 points (6 coordinates) 80 | poly_lens = [len(p) for p in mask_polys] 81 | gt_mask_polys.append(mask_polys) 82 | gt_poly_lens.extend(poly_lens) 83 | if gt_bboxes: 84 | gt_bboxes = np.array(gt_bboxes, dtype=np.float32) 85 | gt_labels = np.array(gt_labels, dtype=np.int64) 86 | else: 87 | gt_bboxes = np.zeros((0, 4), dtype=np.float32) 88 | gt_labels = np.array([], dtype=np.int64) 89 | 90 | if gt_bboxes_ignore: 91 | gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32) 92 | else: 93 | gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32) 94 | 95 | ann = dict( 96 | bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore) 97 | 98 | if with_mask: 99 | ann['masks'] = gt_masks 100 | # poly format is not used in the current implementation 101 | ann['mask_polys'] = gt_mask_polys 102 | ann['poly_lens'] = gt_poly_lens 103 | return ann 104 | -------------------------------------------------------------------------------- /mmdet/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | 5 | class ConcatDataset(_ConcatDataset): 6 | """ 7 | Same as torch.utils.data.dataset.ConcatDataset, but 8 | concat the group flag for image aspect ratio. 9 | """ 10 | def __init__(self, datasets): 11 | """ 12 | flag: Images with aspect ratio greater than 1 will be set as group 1, 13 | otherwise group 0. 14 | """ 15 | super(ConcatDataset, self).__init__(datasets) 16 | if hasattr(datasets[0], 'flag'): 17 | flags = [] 18 | for i in range(0, len(datasets)): 19 | flags.append(datasets[i].flag) 20 | self.flag = np.concatenate(flags) 21 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .build_loader import build_dataloader 2 | from .sampler import GroupSampler, DistributedGroupSampler 3 | 4 | __all__ = [ 5 | 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader' 6 | ] 7 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | from mmcv.runner import get_dist_info 4 | from mmcv.parallel import collate 5 | from torch.utils.data import DataLoader 6 | from .sampler import GroupSampler, DistributedGroupSampler 7 | 8 | # https://github.com/pytorch/pytorch/issues/973 9 | import resource 10 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 11 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 12 | 13 | 14 | def build_dataloader(dataset, 15 | imgs_per_gpu, 16 | workers_per_gpu, 17 | num_gpus=1, 18 | dist=True, 19 | **kwargs): 20 | if dist: 21 | rank, world_size = get_dist_info() 22 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size, 23 | rank) 24 | batch_size = imgs_per_gpu 25 | num_workers = workers_per_gpu 26 | else: 27 | if not kwargs.get('shuffle', True): 28 | sampler = None 29 | else: 30 | sampler = GroupSampler(dataset, imgs_per_gpu) 31 | batch_size = num_gpus * imgs_per_gpu 32 | num_workers = num_gpus * workers_per_gpu 33 | 34 | data_loader = DataLoader( 35 | dataset, 36 | batch_size=batch_size, 37 | sampler=sampler, 38 | num_workers=num_workers, 39 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), 40 | pin_memory=False, 41 | **kwargs) 42 | 43 | return data_loader 44 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/sampler.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import math 4 | import torch 5 | import numpy as np 6 | 7 | from torch.distributed import get_world_size, get_rank 8 | from torch.utils.data.sampler import Sampler 9 | 10 | 11 | class GroupSampler(Sampler): 12 | 13 | def __init__(self, dataset, samples_per_gpu=1): 14 | assert hasattr(dataset, 'flag') 15 | self.dataset = dataset 16 | self.samples_per_gpu = samples_per_gpu 17 | self.flag = dataset.flag.astype(np.int64) 18 | self.group_sizes = np.bincount(self.flag) 19 | self.num_samples = 0 20 | for i, size in enumerate(self.group_sizes): 21 | self.num_samples += int(np.ceil( 22 | size / self.samples_per_gpu)) * self.samples_per_gpu 23 | 24 | def __iter__(self): 25 | indices = [] 26 | for i, size in enumerate(self.group_sizes): 27 | if size == 0: 28 | continue 29 | indice = np.where(self.flag == i)[0] 30 | assert len(indice) == size 31 | np.random.shuffle(indice) 32 | num_extra = int(np.ceil(size / self.samples_per_gpu) 33 | ) * self.samples_per_gpu - len(indice) 34 | indice = np.concatenate([indice, indice[:num_extra]]) 35 | indices.append(indice) 36 | indices = np.concatenate(indices) 37 | indices = [ 38 | indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu] 39 | for i in np.random.permutation( 40 | range(len(indices) // self.samples_per_gpu)) 41 | ] 42 | indices = np.concatenate(indices) 43 | indices = torch.from_numpy(indices).long() 44 | assert len(indices) == self.num_samples 45 | return iter(indices) 46 | 47 | def __len__(self): 48 | return self.num_samples 49 | 50 | 51 | class DistributedGroupSampler(Sampler): 52 | """Sampler that restricts data loading to a subset of the dataset. 53 | It is especially useful in conjunction with 54 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 55 | process can pass a DistributedSampler instance as a DataLoader sampler, 56 | and load a subset of the original dataset that is exclusive to it. 57 | .. note:: 58 | Dataset is assumed to be of constant size. 59 | Arguments: 60 | dataset: Dataset used for sampling. 61 | num_replicas (optional): Number of processes participating in 62 | distributed training. 63 | rank (optional): Rank of the current process within num_replicas. 64 | """ 65 | 66 | def __init__(self, 67 | dataset, 68 | samples_per_gpu=1, 69 | num_replicas=None, 70 | rank=None): 71 | if num_replicas is None: 72 | num_replicas = get_world_size() 73 | if rank is None: 74 | rank = get_rank() 75 | self.dataset = dataset 76 | self.samples_per_gpu = samples_per_gpu 77 | self.num_replicas = num_replicas 78 | self.rank = rank 79 | self.epoch = 0 80 | 81 | assert hasattr(self.dataset, 'flag') 82 | self.flag = self.dataset.flag 83 | self.group_sizes = np.bincount(self.flag) 84 | 85 | self.num_samples = 0 86 | for i, j in enumerate(self.group_sizes): 87 | self.num_samples += int( 88 | math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu / 89 | self.num_replicas)) * self.samples_per_gpu 90 | self.total_size = self.num_samples * self.num_replicas 91 | 92 | def __iter__(self): 93 | # deterministically shuffle based on epoch 94 | g = torch.Generator() 95 | g.manual_seed(self.epoch) 96 | 97 | indices = [] 98 | for i, size in enumerate(self.group_sizes): 99 | if size > 0: 100 | indice = np.where(self.flag == i)[0] 101 | assert len(indice) == size 102 | indice = indice[list(torch.randperm(int(size), 103 | generator=g))].tolist() 104 | extra = int( 105 | math.ceil( 106 | size * 1.0 / self.samples_per_gpu / self.num_replicas) 107 | ) * self.samples_per_gpu * self.num_replicas - len(indice) 108 | indice += indice[:extra] 109 | indices += indice 110 | 111 | assert len(indices) == self.total_size 112 | 113 | indices = [ 114 | indices[j] for i in list( 115 | torch.randperm( 116 | len(indices) // self.samples_per_gpu, generator=g)) 117 | for j in range(i * self.samples_per_gpu, (i + 1) * 118 | self.samples_per_gpu) 119 | ] 120 | 121 | # subsample 122 | offset = self.num_samples * self.rank 123 | indices = indices[offset:offset + self.num_samples] 124 | assert len(indices) == self.num_samples 125 | 126 | return iter(indices) 127 | 128 | def __len__(self): 129 | return self.num_samples 130 | 131 | def set_epoch(self, epoch): 132 | self.epoch = epoch 133 | -------------------------------------------------------------------------------- /mmdet/datasets/transforms.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import torch 4 | 5 | __all__ = ['ImageTransform', 'BboxTransform', 'MaskTransform', 'Numpy2Tensor'] 6 | 7 | 8 | class ImageTransform(object): 9 | """Preprocess an image. 10 | 11 | 1. rescale the image to expected size 12 | 2. normalize the image 13 | 3. flip the image (if needed) 14 | 4. pad the image (if needed) 15 | 5. transpose to (c, h, w) 16 | """ 17 | 18 | def __init__(self, 19 | mean=(0, 0, 0), 20 | std=(1, 1, 1), 21 | to_rgb=True, 22 | size_divisor=None): 23 | self.mean = np.array(mean, dtype=np.float32) 24 | self.std = np.array(std, dtype=np.float32) 25 | self.to_rgb = to_rgb 26 | self.size_divisor = size_divisor 27 | 28 | def __call__(self, img, scale, flip=False): 29 | img, scale_factor = mmcv.imrescale(img, scale, return_scale=True) 30 | img_shape = img.shape 31 | img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) 32 | if flip: 33 | img = mmcv.imflip(img) 34 | if self.size_divisor is not None: 35 | img = mmcv.impad_to_multiple(img, self.size_divisor) 36 | pad_shape = img.shape 37 | else: 38 | pad_shape = img_shape 39 | img = img.transpose(2, 0, 1) 40 | return img, img_shape, pad_shape, scale_factor 41 | 42 | 43 | def bbox_flip(bboxes, img_shape): 44 | """Flip bboxes horizontally. 45 | 46 | Args: 47 | bboxes(ndarray): shape (..., 4*k) 48 | img_shape(tuple): (height, width) 49 | """ 50 | assert bboxes.shape[-1] % 4 == 0 51 | w = img_shape[1] 52 | flipped = bboxes.copy() 53 | flipped[..., 0::4] = w - bboxes[..., 2::4] - 1 54 | flipped[..., 2::4] = w - bboxes[..., 0::4] - 1 55 | return flipped 56 | 57 | 58 | class BboxTransform(object): 59 | """Preprocess gt bboxes. 60 | 61 | 1. rescale bboxes according to image size 62 | 2. flip bboxes (if needed) 63 | 3. pad the first dimension to `max_num_gts` 64 | """ 65 | 66 | def __init__(self, max_num_gts=None): 67 | self.max_num_gts = max_num_gts 68 | 69 | def __call__(self, bboxes, img_shape, scale_factor, flip=False): 70 | gt_bboxes = bboxes * scale_factor 71 | if flip: 72 | gt_bboxes = bbox_flip(gt_bboxes, img_shape) 73 | gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1]) 74 | gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0]) 75 | if self.max_num_gts is None: 76 | return gt_bboxes 77 | else: 78 | num_gts = gt_bboxes.shape[0] 79 | padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32) 80 | padded_bboxes[:num_gts, :] = gt_bboxes 81 | return padded_bboxes 82 | 83 | 84 | class MaskTransform(object): 85 | """Preprocess masks. 86 | 87 | 1. resize masks to expected size and stack to a single array 88 | 2. flip the masks (if needed) 89 | 3. pad the masks (if needed) 90 | """ 91 | 92 | def __call__(self, masks, pad_shape, scale_factor, flip=False): 93 | masks = [ 94 | mmcv.imrescale(mask, scale_factor, interpolation='nearest') 95 | for mask in masks 96 | ] 97 | if flip: 98 | masks = [mask[:, ::-1] for mask in masks] 99 | padded_masks = [ 100 | mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks 101 | ] 102 | padded_masks = np.stack(padded_masks, axis=0) 103 | return padded_masks 104 | 105 | 106 | class Numpy2Tensor(object): 107 | 108 | def __init__(self): 109 | pass 110 | 111 | def __call__(self, *args): 112 | if len(args) == 1: 113 | return torch.from_numpy(args[0]) 114 | else: 115 | return tuple([torch.from_numpy(np.array(array)) for array in args]) -------------------------------------------------------------------------------- /mmdet/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from collections import Sequence 3 | 4 | import mmcv 5 | from mmcv.runner import obj_from_dict 6 | import torch 7 | from collections import defaultdict 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | from .concat_dataset import ConcatDataset 11 | from .. import datasets 12 | from mmdet.core.point_cloud import voxel_generator 13 | from mmdet.core.point_cloud import point_augmentor 14 | from mmdet.core.bbox3d import bbox3d_target 15 | from mmdet.core.anchor import anchor3d_generator 16 | def to_tensor(data): 17 | """Convert objects of various python types to :obj:`torch.Tensor`. 18 | 19 | Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, 20 | :class:`Sequence`, :class:`int` and :class:`float`. 21 | """ 22 | if isinstance(data, torch.Tensor): 23 | return data 24 | elif isinstance(data, np.ndarray): 25 | return torch.from_numpy(data) 26 | elif isinstance(data, Sequence) and not mmcv.is_str(data): 27 | return [to_tensor(d) for d in data] 28 | elif isinstance(data, int): 29 | return torch.LongTensor([data]) 30 | elif isinstance(data, float): 31 | return torch.FloatTensor([data]) 32 | elif data is None: 33 | return data 34 | else: 35 | raise TypeError('type {} cannot be converted to tensor.'.format( 36 | type(data))) 37 | 38 | 39 | def random_scale(img_scales, mode='range'): 40 | """Randomly select a scale from a list of scales or scale ranges. 41 | 42 | Args: 43 | img_scales (list[tuple]): Image scale or scale range. 44 | mode (str): "range" or "value". 45 | 46 | Returns: 47 | tuple: Sampled image scale. 48 | """ 49 | num_scales = len(img_scales) 50 | if num_scales == 1: # fixed scale is specified 51 | img_scale = img_scales[0] 52 | elif num_scales == 2: # randomly sample a scale 53 | if mode == 'range': 54 | img_scale_long = [max(s) for s in img_scales] 55 | img_scale_short = [min(s) for s in img_scales] 56 | long_edge = np.random.randint( 57 | min(img_scale_long), 58 | max(img_scale_long) + 1) 59 | short_edge = np.random.randint( 60 | min(img_scale_short), 61 | max(img_scale_short) + 1) 62 | img_scale = (long_edge, short_edge) 63 | elif mode == 'value': 64 | img_scale = img_scales[np.random.randint(num_scales)] 65 | else: 66 | if mode != 'value': 67 | raise ValueError( 68 | 'Only "value" mode supports more than 2 image scales') 69 | img_scale = img_scales[np.random.randint(num_scales)] 70 | return img_scale 71 | 72 | 73 | def show_ann(coco, img, ann_info): 74 | plt.imshow(mmcv.bgr2rgb(img)) 75 | plt.axis('off') 76 | coco.showAnns(ann_info) 77 | plt.show() 78 | 79 | 80 | def get_dataset(data_cfg): 81 | 82 | if isinstance(data_cfg['ann_file'], (list, tuple)): 83 | ann_files = data_cfg['ann_file'] 84 | num_dset = len(ann_files) 85 | else: 86 | ann_files = [data_cfg['ann_file']] 87 | num_dset = 1 88 | 89 | if isinstance(data_cfg['img_prefix'], (list, tuple)): 90 | img_prefixes = data_cfg['img_prefix'] 91 | else: 92 | img_prefixes = [data_cfg['img_prefix']] * num_dset 93 | assert len(img_prefixes) == num_dset 94 | 95 | if 'generator' in data_cfg.keys() and data_cfg['generator'] is not None: 96 | generator = obj_from_dict(data_cfg['generator'], voxel_generator) 97 | else: 98 | generator = None 99 | 100 | if 'augmentor' in data_cfg.keys() and data_cfg['augmentor'] is not None: 101 | augmentor = obj_from_dict(data_cfg['augmentor'], point_augmentor) 102 | else: 103 | augmentor = None 104 | 105 | if 'anchor_generator' in data_cfg.keys() and data_cfg['anchor_generator'] is not None: 106 | anchor_generator = {cls: obj_from_dict(cfg, anchor3d_generator) for cls, cfg in data_cfg['anchor_generator'].items()} 107 | else: 108 | anchor_generator = None 109 | 110 | dsets = [] 111 | for i in range(num_dset): 112 | data_info = copy.deepcopy(data_cfg) 113 | data_info['ann_file'] = ann_files[i] 114 | data_info['img_prefix'] = img_prefixes[i] 115 | if generator is not None: 116 | data_info['generator'] = generator 117 | if anchor_generator is not None: 118 | data_info['anchor_generator'] = anchor_generator 119 | if augmentor is not None: 120 | data_info['augmentor'] = augmentor 121 | dset = obj_from_dict(data_info, datasets) 122 | dsets.append(dset) 123 | if len(dsets) > 1: 124 | dset = ConcatDataset(dsets) 125 | else: 126 | dset = dsets[0] 127 | return dset 128 | 129 | # def example_convert_to_torch(example, device=None) -> dict: 130 | # example_torch = {} 131 | # torch_names = [ 132 | # 'img', 'voxels','coordinates',\ 133 | # # 'anchors_mask','anchors',\ 134 | # #'gt_labels','gt_bboxes','gt_bboxes_ignore',\ 135 | # 'num_points', 'right', 'grid' 136 | # ] 137 | # for k, v in example.items(): 138 | # if k in torch_names: 139 | # example_torch[k] = to_tensor(v) 140 | # else: 141 | # example_torch[k] = v 142 | # 143 | # return example_torch 144 | 145 | # def merge_second_batch(batch_list, samples_per_gpu=1, to_torch=True): 146 | # example_merged = defaultdict(list) 147 | # for example in batch_list: 148 | # for k, v in example.items(): 149 | # example_merged[k].append(v) 150 | # ret = {} 151 | # 152 | # for key, elems in example_merged.items(): 153 | # if key in [ 154 | # 'voxels', 'num_points', 155 | # ]: 156 | # ret[key] = np.concatenate(elems, axis=0) 157 | # elif key == 'coordinates': 158 | # coors = [] 159 | # for i, coor in enumerate(elems): 160 | # coor_pad = np.pad( 161 | # coor, ((0, 0), (1, 0)), 162 | # mode='constant', 163 | # constant_values=i) 164 | # coors.append(coor_pad) 165 | # ret[key] = np.concatenate(coors, axis=0) 166 | # elif key in [ 167 | # 'img_meta', 'img_shape', 'calib', 'sample_idx', 'gt_labels', 'gt_bboxes','gt_bboxes_ignore' 168 | # ]: 169 | # ret[key] = elems 170 | # else: 171 | # ret[key] = np.stack(elems, axis=0) 172 | # 173 | # if to_torch: 174 | # ret = example_convert_to_torch(ret) 175 | # return ret -------------------------------------------------------------------------------- /mmdet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from .xml_style import XMLDataset 2 | 3 | 4 | class VOCDataset(XMLDataset): 5 | 6 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 7 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 8 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 9 | 'tvmonitor') 10 | 11 | def __init__(self, **kwargs): 12 | super(VOCDataset, self).__init__(**kwargs) 13 | if 'VOC2007' in self.img_prefix: 14 | self.year = 2007 15 | elif 'VOC2012' in self.img_prefix: 16 | self.year = 2012 17 | else: 18 | raise ValueError('Cannot infer dataset year from img_prefix') -------------------------------------------------------------------------------- /mmdet/datasets/xml_style.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | from .custom import CustomDataset 8 | 9 | 10 | class XMLDataset(CustomDataset): 11 | 12 | def __init__(self, **kwargs): 13 | super(XMLDataset, self).__init__(**kwargs) 14 | self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)} 15 | 16 | def load_annotations(self, ann_file): 17 | img_infos = [] 18 | img_ids = mmcv.list_from_file(ann_file) 19 | for img_id in img_ids: 20 | filename = 'JPEGImages/{}.jpg'.format(img_id) 21 | xml_path = osp.join(self.img_prefix, 'Annotations', 22 | '{}.xml'.format(img_id)) 23 | tree = ET.parse(xml_path) 24 | root = tree.getroot() 25 | size = root.find('size') 26 | width = int(size.find('width').text) 27 | height = int(size.find('height').text) 28 | img_infos.append( 29 | dict(id=img_id, filename=filename, width=width, height=height)) 30 | return img_infos 31 | 32 | def get_ann_info(self, idx): 33 | img_id = self.img_infos[idx]['id'] 34 | xml_path = osp.join(self.img_prefix, 'Annotations', 35 | '{}.xml'.format(img_id)) 36 | tree = ET.parse(xml_path) 37 | root = tree.getroot() 38 | bboxes = [] 39 | labels = [] 40 | bboxes_ignore = [] 41 | labels_ignore = [] 42 | for obj in root.findall('object'): 43 | name = obj.find('name').text 44 | label = self.cat2label[name] 45 | difficult = int(obj.find('difficult').text) 46 | bnd_box = obj.find('bndbox') 47 | bbox = [ 48 | int(bnd_box.find('xmin').text), 49 | int(bnd_box.find('ymin').text), 50 | int(bnd_box.find('xmax').text), 51 | int(bnd_box.find('ymax').text) 52 | ] 53 | if difficult: 54 | bboxes_ignore.append(bbox) 55 | labels_ignore.append(label) 56 | else: 57 | bboxes.append(bbox) 58 | labels.append(label) 59 | if not bboxes: 60 | bboxes = np.zeros((0, 4)) 61 | labels = np.zeros((0, )) 62 | else: 63 | bboxes = np.array(bboxes, ndmin=2) - 1 64 | labels = np.array(labels) 65 | if not bboxes_ignore: 66 | bboxes_ignore = np.zeros((0, 4)) 67 | labels_ignore = np.zeros((0, )) 68 | else: 69 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 70 | labels_ignore = np.array(labels_ignore) 71 | ann = dict( 72 | bboxes=bboxes.astype(np.float32), 73 | labels=labels.astype(np.int64), 74 | bboxes_ignore=bboxes_ignore.astype(np.float32), 75 | labels_ignore=labels_ignore.astype(np.int64)) 76 | return ann -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .detectors import (BaseDetector,RPN) 2 | from .builder import (build_neck, build_rpn_head, build_roi_extractor,build_backbone, 3 | build_bbox_head, build_mask_head, build_detector) 4 | 5 | __all__ = [ 6 | 'BaseDetector', 'RPN', 'build_backbone', 'build_neck', 'build_rpn_head', 7 | 'build_roi_extractor', 'build_bbox_head', 'build_mask_head', 8 | 'build_detector' 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import ResNet 2 | from .vxnet import * 3 | from .pillar import * 4 | __all__ = ['ResNet','VoxelFeatNet','SimpleVoxel', 'PillarFeatureNet', 'PointPillarsScatter'] 5 | -------------------------------------------------------------------------------- /mmdet/models/backbones/pillar.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | from ..utils import change_default_args, Empty, get_paddings_indicator 5 | 6 | 7 | class PFNLayer(nn.Module): 8 | def __init__(self, 9 | in_channels, 10 | out_channels, 11 | use_norm=True, 12 | last_layer=False): 13 | super(PFNLayer, self).__init__() 14 | self.name = 'PFNLayer' 15 | self.last_vfe = last_layer 16 | if not self.last_vfe: 17 | out_channels = out_channels // 2 18 | self.units = out_channels 19 | 20 | if use_norm: 21 | BatchNorm1d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm1d) 22 | Linear = change_default_args(bias=False)(nn.Linear) 23 | else: 24 | BatchNorm1d = Empty 25 | Linear = change_default_args(bias=True)(nn.Linear) 26 | 27 | self.linear = Linear(in_channels, self.units) 28 | self.norm = BatchNorm1d(self.units) 29 | 30 | def forward(self, inputs): 31 | 32 | x = self.linear(inputs) 33 | x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2, 1).contiguous() 34 | x = F.relu(x) 35 | x_max = torch.max(x, dim=1, keepdim=True)[0] 36 | 37 | if self.last_vfe: 38 | return x_max 39 | else: 40 | x_repeat = x_max.repeat(1, inputs.shape[1], 1) 41 | x_concatenated = torch.cat([x, x_repeat], dim=2) 42 | return x_concatenated 43 | 44 | 45 | class PillarFeatureNet(nn.Module): 46 | def __init__(self, 47 | num_input_features=4, 48 | use_norm=True, 49 | num_filters=(64,), 50 | with_distance=False, 51 | voxel_size=(0.2, 0.2, 4), 52 | pc_range=(0, -40, -3, 70.4, 40, 1) 53 | ): 54 | super(PillarFeatureNet, self).__init__() 55 | self.name = 'PillarFeatureNet' 56 | assert len(num_filters) > 0 57 | num_input_features += 5 58 | if with_distance: 59 | num_input_features += 1 60 | self._with_distance = with_distance 61 | 62 | # Create PillarFeatureNet layers 63 | num_filters = [num_input_features] + list(num_filters) 64 | pfn_layers = [] 65 | for i in range(len(num_filters) - 1): 66 | in_filters = num_filters[i] 67 | out_filters = num_filters[i + 1] 68 | if i < len(num_filters) - 2: 69 | last_layer = False 70 | else: 71 | last_layer = True 72 | pfn_layers.append(PFNLayer(in_filters, out_filters, use_norm, last_layer=last_layer)) 73 | self.pfn_layers = nn.ModuleList(pfn_layers) 74 | 75 | # Need pillar (voxel) size and x/y offset in order to calculate pillar offset 76 | self.vx = voxel_size[0] 77 | self.vy = voxel_size[1] 78 | self.x_offset = self.vx / 2 + pc_range[0] 79 | self.y_offset = self.vy / 2 + pc_range[1] 80 | 81 | nx = int((pc_range[3] - pc_range[0]) / self.vx) 82 | ny = int((pc_range[4] - pc_range[1]) / self.vy) 83 | self.scatter = PointPillarsScatter(nx, ny) 84 | 85 | def forward(self, features, coors, num_voxels, batch_size): 86 | 87 | # Find distance of x, y, and z from cluster center 88 | points_mean = features[:, :, :3].sum(dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1) 89 | f_cluster = features[:, :, :3] - points_mean 90 | 91 | # Find distance of x, y, and z from pillar center 92 | f_center = torch.zeros_like(features[:, :, :2]) 93 | f_center[:, :, 0] = features[:, :, 0] - (coors[:, 3].float().unsqueeze(1) * self.vx + self.x_offset) 94 | f_center[:, :, 1] = features[:, :, 1] - (coors[:, 2].float().unsqueeze(1) * self.vy + self.y_offset) 95 | 96 | # Combine together feature decorations 97 | features_ls = [features, f_cluster, f_center] 98 | if self._with_distance: 99 | points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) 100 | features_ls.append(points_dist) 101 | features = torch.cat(features_ls, dim=-1) 102 | 103 | # The feature decorations were calculated without regard to whether pillar was empty. Need to ensure that 104 | # empty pillars remain set to zeros. 105 | voxel_count = features.shape[1] 106 | mask = get_paddings_indicator(num_voxels, voxel_count, axis=0) 107 | mask = torch.unsqueeze(mask, -1).type_as(features) 108 | features *= mask 109 | 110 | # Forward pass through PFNLayers 111 | for pfn in self.pfn_layers: 112 | features = pfn(features) 113 | 114 | return self.scatter(features.squeeze(), coors, batch_size) 115 | 116 | 117 | class PointPillarsScatter(nn.Module): 118 | def __init__(self, 119 | nx, ny, 120 | num_input_features=64): 121 | """ 122 | Point Pillar's Scatter. 123 | Converts learned features from dense tensor to sparse pseudo image. This replaces SECOND's 124 | second.pytorch.voxelnet.SparseMiddleExtractor. 125 | :param output_shape: ([int]: 4). Required output shape of features. 126 | :param num_input_features: . Number of input features. 127 | """ 128 | super(PointPillarsScatter, self).__init__() 129 | self.name = 'PointPillarsScatter' 130 | self.nx = nx 131 | self.ny = ny 132 | self.nchannels = num_input_features 133 | 134 | def forward(self, voxel_features, coords, batch_size): 135 | # batch_canvas will be the final output. 136 | batch_canvas = [] 137 | for batch_itt in range(batch_size): 138 | # Create the canvas for this sample 139 | canvas = torch.zeros(self.nchannels, self.nx * self.ny, dtype=voxel_features.dtype, 140 | device=voxel_features.device) 141 | 142 | # Only include non-empty pillars 143 | batch_mask = coords[:, 0] == batch_itt 144 | this_coords = coords[batch_mask, :] 145 | indices = this_coords[:, 2] * self.nx + this_coords[:, 3] 146 | indices = indices.type(torch.long) 147 | voxels = voxel_features[batch_mask, :] 148 | voxels = voxels.t() 149 | 150 | # Now scatter the blob back to the canvas. 151 | canvas[:, indices] = voxels 152 | 153 | # Append to a list for later stacking. 154 | batch_canvas.append(canvas) 155 | 156 | # Stack to 3-dim tensor (batch-size, nchannels, nrows*ncols) 157 | batch_canvas = torch.stack(batch_canvas, 0) 158 | 159 | # Undo the column stacking to final 4-dim tensor 160 | batch_canvas = batch_canvas.view(batch_size, self.nchannels, self.ny, self.nx) 161 | 162 | return batch_canvas 163 | -------------------------------------------------------------------------------- /mmdet/models/backbones/vxnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.nn import functional as F 4 | from ..utils import change_default_args, Empty, get_paddings_indicator 5 | 6 | 7 | class VFELayer(nn.Module): 8 | def __init__(self, in_channels, out_channels, use_norm=True, name='vfe'): 9 | super(VFELayer, self).__init__() 10 | self.name = name 11 | self.units = int(out_channels / 2) 12 | if use_norm: 13 | BatchNorm1d = change_default_args( 14 | eps=1e-3, momentum=0.01)(nn.BatchNorm1d) 15 | Linear = change_default_args(bias=False)(nn.Linear) 16 | else: 17 | BatchNorm1d = Empty 18 | Linear = change_default_args(bias=True)(nn.Linear) 19 | self.linear = Linear(in_channels, self.units) 20 | self.norm = BatchNorm1d(self.units) 21 | 22 | def forward(self, inputs): 23 | # [K, T, 7] tensordot [7, units] = [K, T, units] 24 | voxel_count = inputs.shape[1] 25 | x = self.linear(inputs) 26 | x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2, 27 | 1).contiguous() 28 | pointwise = F.relu(x) 29 | # [K, T, units] 30 | 31 | aggregated = torch.max(pointwise, dim=1, keepdim=True)[0] 32 | # [K, 1, units] 33 | repeated = aggregated.repeat(1, voxel_count, 1) 34 | 35 | concatenated = torch.cat([pointwise, repeated], dim=2) 36 | # [K, T, 2 * units] 37 | return concatenated 38 | 39 | class VoxelFeatNet(nn.Module): 40 | def __init__(self, 41 | num_input_features=4, 42 | use_norm=True, 43 | num_filters=[32, 128], 44 | with_distance=False, 45 | name='VoxelFeatureExtractor'): 46 | super(VoxelFeatNet, self).__init__() 47 | self.name = name 48 | if use_norm: 49 | BatchNorm1d = change_default_args( 50 | eps=1e-3, momentum=0.01)(nn.BatchNorm1d) 51 | Linear = change_default_args(bias=False)(nn.Linear) 52 | else: 53 | BatchNorm1d = Empty 54 | Linear = change_default_args(bias=True)(nn.Linear) 55 | assert len(num_filters) == 2 56 | num_input_features += 3 # add mean features 57 | if with_distance: 58 | num_input_features += 1 59 | self._with_distance = with_distance 60 | self.vfe1 = VFELayer(num_input_features, num_filters[0], use_norm) 61 | self.vfe2 = VFELayer(num_filters[0], num_filters[1], use_norm) 62 | self.linear = Linear(num_filters[1], num_filters[1]) 63 | # var_torch_init(self.linear.weight) 64 | # var_torch_init(self.linear.bias) 65 | self.norm = BatchNorm1d(num_filters[1]) 66 | 67 | def init_weights(self, pretrained=None): 68 | pass 69 | 70 | def forward(self, features, num_voxels): 71 | # features: [concated_num_points, num_voxel_size, 3(4)] 72 | # num_voxels: [concated_num_points] 73 | points_mean = features[:, :, :3].sum( 74 | dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1) 75 | features_relative = features[:, :, :3] - points_mean 76 | if self._with_distance: 77 | points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True) 78 | features = torch.cat( 79 | [features, features_relative, points_dist], dim=-1) 80 | else: 81 | features = torch.cat([features, features_relative], dim=-1) 82 | voxel_count = features.shape[1] 83 | mask = get_paddings_indicator(num_voxels, voxel_count, axis=0) 84 | mask = torch.unsqueeze(mask, -1).type_as(features) 85 | # mask = features.max(dim=2, keepdim=True)[0] != 0 86 | x = self.vfe1(features) 87 | x *= mask 88 | x = self.vfe2(x) 89 | x *= mask 90 | x = self.linear(x) 91 | x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2, 92 | 1).contiguous() 93 | x = F.relu(x) 94 | x *= mask 95 | # x: [concated_num_points, num_voxel_size, 128] 96 | voxelwise = torch.max(x, dim=1)[0] 97 | return voxelwise 98 | 99 | class SimpleVoxel(nn.Module): 100 | def __init__(self, 101 | num_input_features=4, 102 | use_norm=True, 103 | num_filters=[32, 128], 104 | with_distance=False, 105 | name='VoxelFeatureExtractor'): 106 | super(SimpleVoxel, self).__init__() 107 | self.name = name 108 | self.num_input_features = num_input_features 109 | 110 | def forward(self, features, num_voxels): 111 | #return features 112 | # features: [concated_num_points, num_voxel_size, 3(4)] 113 | # num_voxels: [concated_num_points] 114 | points_mean = features[:, :, :self.num_input_features].sum( 115 | dim=1, keepdim=False) / num_voxels.type_as(features).view(-1, 1) 116 | return points_mean.contiguous() 117 | 118 | 119 | -------------------------------------------------------------------------------- /mmdet/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | from ..single_stage_heads import PSWarpHead 4 | __all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'PSWarpHead'] 5 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner import obj_from_dict 2 | from torch import nn 3 | 4 | from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads, 5 | mask_heads, single_stage_heads) 6 | 7 | __all__ = [ 8 | 'build_backbone', 'build_neck', 'build_rpn_head', 'build_roi_extractor', 9 | 'build_bbox_head', 'build_mask_head', 'build_single_stage_head','build_detector', 10 | ] 11 | 12 | 13 | def _build_module(cfg, parrent=None, default_args=None): 14 | return cfg if isinstance(cfg, nn.Module) else obj_from_dict( 15 | cfg, parrent, default_args) 16 | 17 | 18 | def build(cfg, parrent=None, default_args=None): 19 | if isinstance(cfg, list): 20 | modules = [_build_module(cfg_, parrent, default_args) for cfg_ in cfg] 21 | return nn.Sequential(*modules) 22 | else: 23 | return _build_module(cfg, parrent, default_args) 24 | 25 | 26 | def build_backbone(cfg): 27 | return build(cfg, backbones) 28 | 29 | 30 | def build_neck(cfg): 31 | return build(cfg, necks) 32 | 33 | 34 | def build_rpn_head(cfg): 35 | return build(cfg, rpn_heads) 36 | 37 | 38 | def build_roi_extractor(cfg): 39 | return build(cfg, roi_extractors) 40 | 41 | 42 | def build_bbox_head(cfg): 43 | return build(cfg, bbox_heads) 44 | 45 | 46 | def build_mask_head(cfg): 47 | return build(cfg, mask_heads) 48 | 49 | 50 | def build_single_stage_head(cfg): 51 | return build(cfg, single_stage_heads) 52 | 53 | 54 | def build_detector(cfg, train_cfg=None, test_cfg=None): 55 | from . import detectors 56 | return build(cfg, detectors, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 57 | -------------------------------------------------------------------------------- /mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .single_stage import SingleStageDetector 3 | from .rpn import RPN 4 | from .pointpillars import PointPillars 5 | 6 | __all__ = [ 7 | 'BaseDetector', 'SingleStageDetector', 'RPN', 'PointPillars', 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/models/detectors/base.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from abc import ABCMeta, abstractmethod 3 | 4 | import mmcv 5 | import numpy as np 6 | import torch.nn as nn 7 | 8 | from mmdet.core import tensor2imgs, get_classes 9 | 10 | 11 | class BaseDetector(nn.Module): 12 | """Base class for detectors""" 13 | 14 | __metaclass__ = ABCMeta 15 | 16 | def __init__(self): 17 | super(BaseDetector, self).__init__() 18 | 19 | @property 20 | def with_neck(self): 21 | return hasattr(self, 'neck') and self.neck is not None 22 | 23 | @property 24 | def with_bbox(self): 25 | return hasattr(self, 'bbox_head') and self.bbox_head is not None 26 | 27 | @property 28 | def with_mask(self): 29 | return hasattr(self, 'mask_head') and self.mask_head is not None 30 | 31 | @abstractmethod 32 | def extract_feat(self, imgs): 33 | pass 34 | 35 | def extract_feats(self, imgs): 36 | assert isinstance(imgs, list) 37 | for img in imgs: 38 | yield self.extract_feat(img) 39 | 40 | @abstractmethod 41 | def forward_train(self, imgs, img_metas, **kwargs): 42 | pass 43 | 44 | @abstractmethod 45 | def simple_test(self, img, img_meta, **kwargs): 46 | pass 47 | 48 | @abstractmethod 49 | def aug_test(self, imgs, img_metas, **kwargs): 50 | pass 51 | 52 | def init_weights(self, pretrained=None): 53 | if pretrained is not None: 54 | logger = logging.getLogger() 55 | logger.info('load model from: {}'.format(pretrained)) 56 | 57 | def forward_test(self, imgs, img_metas, **kwargs): 58 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 59 | if not isinstance(var, list): 60 | raise TypeError('{} must be a list, but got {}'.format( 61 | name, type(var))) 62 | 63 | num_augs = len(imgs) 64 | if num_augs != len(img_metas): 65 | raise ValueError( 66 | 'num of augmentations ({}) != num of image meta ({})'.format( 67 | len(imgs), len(img_metas))) 68 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared 69 | imgs_per_gpu = imgs[0].size(0) 70 | assert imgs_per_gpu == 1 71 | 72 | if num_augs == 1: 73 | return self.simple_test(imgs[0], img_metas[0], **kwargs) 74 | else: 75 | return self.aug_test(imgs, img_metas, **kwargs) 76 | 77 | def forward(self, img, img_meta, return_loss=True, **kwargs): 78 | if return_loss: 79 | return self.forward_train(img, img_meta, **kwargs) 80 | else: 81 | return self.forward_test(img, img_meta, **kwargs) 82 | 83 | def show_result(self, 84 | data, 85 | result, 86 | img_norm_cfg, 87 | dataset='coco', 88 | score_thr=0.3): 89 | img_tensor = data['img'][0] 90 | img_metas = data['img_meta'][0].data[0] 91 | imgs = tensor2imgs(img_tensor, **img_norm_cfg) 92 | assert len(imgs) == len(img_metas) 93 | 94 | if isinstance(dataset, str): 95 | class_names = get_classes(dataset) 96 | elif isinstance(dataset, list): 97 | class_names = dataset 98 | else: 99 | raise TypeError('dataset must be a valid dataset name or a list' 100 | ' of class names, not {}'.format(type(dataset))) 101 | 102 | for img, img_meta in zip(imgs, img_metas): 103 | h, w, _ = img_meta['img_shape'] 104 | img_show = img[:h, :w, :] 105 | labels = [ 106 | np.full(bbox.shape[0], i, dtype=np.int32) 107 | for i, bbox in enumerate(result) 108 | ] 109 | labels = np.concatenate(labels) 110 | bboxes = np.vstack(result) 111 | mmcv.imshow_det_bboxes( 112 | img_show, 113 | bboxes, 114 | labels, 115 | class_names=class_names, 116 | score_thr=score_thr) 117 | -------------------------------------------------------------------------------- /mmdet/models/detectors/pointpillars.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import torch 3 | from .. import builder 4 | from mmcv.runner import load_checkpoint 5 | from .base import BaseDetector 6 | import torch.nn.functional as F 7 | 8 | class PointPillars(BaseDetector): 9 | 10 | def __init__(self, 11 | backbone, 12 | neck, 13 | rpn_head=None, 14 | bbox_head=None, 15 | rcnn_head=None, 16 | train_cfg=None, 17 | test_cfg=None, 18 | pretrained=None): 19 | super(PointPillars, self).__init__() 20 | self.backbone = builder.build_backbone(backbone) 21 | self.neck = builder.build_neck(neck) 22 | self.bbox_head = builder.build_single_stage_head(bbox_head) 23 | 24 | if rpn_head is not None: 25 | self.rpn_head = builder.build_rpn_head(rpn_head) 26 | 27 | self.train_cfg = train_cfg 28 | self.test_cfg = test_cfg 29 | 30 | if rcnn_head is not None: 31 | self.rcnn_head = builder.build_bbox_head(rcnn_head) 32 | 33 | self.init_weights(pretrained=pretrained) 34 | 35 | @property 36 | def with_rpn(self): 37 | return hasattr(self, 'rpn_head') and self.rpn_head is not None 38 | 39 | def init_weights(self, pretrained=None): 40 | if isinstance(pretrained, str): 41 | logger = logging.getLogger() 42 | load_checkpoint(self, pretrained, strict=False, logger=logger) 43 | def freeze_layers(self, model): 44 | for param in model.parameters(): 45 | param.requires_grad = False 46 | 47 | def merge_second_batch(self, batch_args): 48 | ret = {} 49 | for key, elems in batch_args.items(): 50 | if key in [ 51 | 'voxels', 'num_points', 52 | ]: 53 | ret[key] = torch.cat(elems, dim=0) 54 | elif key == 'coordinates': 55 | coors = [] 56 | for i, coor in enumerate(elems): 57 | coor_pad = F.pad( 58 | coor, [1, 0, 0, 0], 59 | mode='constant', 60 | value=i) 61 | coors.append(coor_pad) 62 | ret[key] = torch.cat(coors, dim=0) 63 | elif key in [ 64 | 'img_meta', 'gt_labels', 'gt_bboxes', 65 | ]: 66 | ret[key] = elems 67 | else: 68 | ret[key] = torch.stack(elems, dim=0) 69 | return ret 70 | 71 | def forward_train(self, img, img_meta, **kwargs): 72 | 73 | batch_size = len(img_meta) 74 | ret = self.merge_second_batch(kwargs) 75 | 76 | losses = dict() 77 | 78 | canvas = self.backbone(ret['voxels'], ret['coordinates'], ret['num_points'], batch_size) 79 | 80 | x = self.neck(canvas) 81 | 82 | bbox_outs = self.bbox_head(x) 83 | bbox_loss_inputs = bbox_outs + (ret['gt_bboxes'], ret['gt_labels'], ret['anchors'], ret['anchors_mask'], self.train_cfg) 84 | bbox_losses = self.bbox_head.loss(*bbox_loss_inputs) 85 | losses.update(bbox_losses) 86 | 87 | return losses 88 | 89 | def forward_test(self, img, img_meta, **kwargs): 90 | 91 | batch_size = len(img_meta) 92 | ret = self.merge_second_batch(kwargs) 93 | canvas = self.backbone(ret['voxels'], ret['coordinates'], ret['num_points'], batch_size) 94 | x = self.neck(canvas) 95 | 96 | rpn_outs = self.bbox_head.forward(x) 97 | proposal_inputs = rpn_outs + (ret['anchors'], ret['anchors_mask'], img_meta, self.test_cfg) 98 | 99 | return self.bbox_head.get_det_bboxes_nms(*proposal_inputs) 100 | 101 | 102 | 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /mmdet/models/detectors/rpn.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from mmdet.core import tensor2imgs, bbox_mapping 4 | from .base import BaseDetector 5 | from .test_mixins import RPNTestMixin 6 | from .. import builder 7 | 8 | 9 | class RPN(BaseDetector, RPNTestMixin): 10 | 11 | def __init__(self, 12 | backbone, 13 | neck, 14 | rpn_head, 15 | train_cfg, 16 | test_cfg, 17 | pretrained=None): 18 | super(RPN, self).__init__() 19 | self.backbone = builder.build_backbone(backbone) 20 | self.neck = builder.build_neck(neck) if neck is not None else None 21 | self.rpn_head = builder.build_rpn_head(rpn_head) 22 | self.train_cfg = train_cfg 23 | self.test_cfg = test_cfg 24 | self.init_weights(pretrained=pretrained) 25 | 26 | def init_weights(self, pretrained=None): 27 | super(RPN, self).init_weights(pretrained) 28 | self.backbone.init_weights(pretrained=pretrained) 29 | if self.with_neck: 30 | self.neck.init_weights() 31 | self.rpn_head.init_weights() 32 | 33 | def extract_feat(self, img): 34 | x = self.backbone(img) 35 | if self.with_neck: 36 | x = self.neck(x) 37 | return x 38 | 39 | def forward_train(self, img, img_meta, gt_bboxes=None): 40 | if self.train_cfg.rpn.get('debug', False): 41 | self.rpn_head.debug_imgs = tensor2imgs(img) 42 | 43 | x = self.extract_feat(img) 44 | rpn_outs = self.rpn_head(x) 45 | 46 | rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn) 47 | losses = self.rpn_head.loss(*rpn_loss_inputs) 48 | return losses 49 | 50 | def simple_test(self, img, img_meta, rescale=False): 51 | x = self.extract_feat(img) 52 | proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn) 53 | if rescale: 54 | for proposals, meta in zip(proposal_list, img_meta): 55 | proposals[:, :4] /= meta['scale_factor'] 56 | # TODO: remove this restriction 57 | return proposal_list[0].cpu().numpy() 58 | 59 | def aug_test(self, imgs, img_metas, rescale=False): 60 | proposal_list = self.aug_test_rpn( 61 | self.extract_feats(imgs), img_metas, self.test_cfg.rpn) 62 | if not rescale: 63 | for proposals, img_meta in zip(proposal_list, img_metas[0]): 64 | img_shape = img_meta['img_shape'] 65 | scale_factor = img_meta['scale_factor'] 66 | flip = img_meta['flip'] 67 | proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape, 68 | scale_factor, flip) 69 | # TODO: remove this restriction 70 | return proposal_list[0].cpu().numpy() 71 | 72 | def show_result(self, data, result, img_norm_cfg): 73 | """Show RPN proposals on the image. 74 | 75 | Although we assume batch size is 1, this method supports arbitrary 76 | batch size. 77 | """ 78 | img_tensor = data['img'][0] 79 | img_metas = data['img_meta'][0].data[0] 80 | imgs = tensor2imgs(img_tensor, **img_norm_cfg) 81 | assert len(imgs) == len(img_metas) 82 | for img, img_meta in zip(imgs, img_metas): 83 | h, w, _ = img_meta['img_shape'] 84 | img_show = img[:h, :w, :] 85 | mmcv.imshow_bboxes(img_show, result, top_k=20) 86 | -------------------------------------------------------------------------------- /mmdet/models/detectors/single_stage.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import logging 4 | from mmcv.runner import load_checkpoint 5 | from .base import BaseDetector 6 | from .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin 7 | from .. import builder 8 | from mmdet.core import (assign_and_sample, bbox2roi, rbbox2roi, bbox2result, multi_apply, kitti_bbox2results,\ 9 | tensor2points, delta2rbbox3d, weighted_binary_cross_entropy) 10 | import torch.nn.functional as F 11 | 12 | 13 | class SingleStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin, 14 | MaskTestMixin): 15 | 16 | def __init__(self, 17 | backbone, 18 | neck=None, 19 | bbox_head=None, 20 | extra_head=None, 21 | train_cfg=None, 22 | test_cfg=None, 23 | pretrained=None): 24 | super(SingleStageDetector, self).__init__() 25 | self.backbone = builder.build_backbone(backbone) 26 | 27 | if neck is not None: 28 | self.neck = builder.build_neck(neck) 29 | else: 30 | raise NotImplementedError 31 | 32 | if bbox_head is not None: 33 | self.rpn_head = builder.build_single_stage_head(bbox_head) 34 | 35 | if extra_head is not None: 36 | self.extra_head = builder.build_single_stage_head(extra_head) 37 | 38 | self.train_cfg = train_cfg 39 | self.test_cfg = test_cfg 40 | 41 | self.init_weights(pretrained) 42 | 43 | @property 44 | def with_rpn(self): 45 | return hasattr(self, 'rpn_head') and self.rpn_head is not None 46 | 47 | def init_weights(self, pretrained=None): 48 | if isinstance(pretrained, str): 49 | logger = logging.getLogger() 50 | load_checkpoint(self, pretrained, strict=False, logger=logger) 51 | 52 | def merge_second_batch(self, batch_args): 53 | ret = {} 54 | for key, elems in batch_args.items(): 55 | if key in ['voxels', 'num_points', ]: 56 | ret[key] = torch.cat(elems, dim=0) 57 | elif key in ['coordinates', ]: 58 | coors = [] 59 | for i, coor in enumerate(elems): 60 | coor_pad = F.pad( 61 | coor, [1, 0, 0, 0], 62 | mode='constant', 63 | value=i) 64 | coors.append(coor_pad) 65 | ret[key] = torch.cat(coors, dim=0) 66 | elif key in ['img_meta', 'gt_labels', 'gt_bboxes', 'gt_types', ]: 67 | ret[key] = elems 68 | else: 69 | if isinstance(elems, dict): 70 | ret[key] = {k: torch.stack(v, dim=0) for k, v in elems.items()} 71 | else: 72 | ret[key] = torch.stack(elems, dim=0) 73 | return ret 74 | 75 | def forward_train(self, img, img_meta, **kwargs): 76 | 77 | batch_size = len(img_meta) 78 | 79 | ret = self.merge_second_batch(kwargs) 80 | 81 | vx = self.backbone(ret['voxels'], ret['num_points']) 82 | x, conv6, point_misc = self.neck(vx, ret['coordinates'], batch_size, is_test=False) 83 | 84 | losses = dict() 85 | 86 | aux_loss = self.neck.aux_loss(*point_misc, gt_bboxes=ret['gt_bboxes']) 87 | losses.update(aux_loss) 88 | 89 | # RPN forward and loss 90 | if self.with_rpn: 91 | rpn_outs = self.rpn_head(x) 92 | rpn_loss_inputs = rpn_outs + (ret['gt_bboxes'], ret['gt_labels'], ret['gt_types'],\ 93 | ret['anchors'], ret['anchors_mask'], self.train_cfg.rpn) 94 | rpn_losses = self.rpn_head.loss(*rpn_loss_inputs) 95 | losses.update(rpn_losses) 96 | guided_anchors, _ = self.rpn_head.get_guided_anchors(*rpn_outs, ret['anchors'],\ 97 | ret['anchors_mask'], ret['gt_bboxes'], ret['gt_labels'], thr=self.train_cfg.rpn.anchor_thr) 98 | else: 99 | raise NotImplementedError 100 | 101 | # bbox head forward and loss 102 | if self.extra_head: 103 | bbox_score = self.extra_head(conv6, guided_anchors) 104 | refine_loss_inputs = (bbox_score, ret['gt_bboxes'], ret['gt_labels'], guided_anchors, self.train_cfg.extra) 105 | refine_losses = self.extra_head.loss(*refine_loss_inputs) 106 | losses.update(refine_losses) 107 | 108 | return losses 109 | 110 | def forward_test(self, img, img_meta, **kwargs): 111 | 112 | batch_size = len(img_meta) 113 | 114 | ret = self.merge_second_batch(kwargs) 115 | 116 | vx = self.backbone(ret['voxels'], ret['num_points']) 117 | (x, conv6) = self.neck(vx, ret['coordinates'], batch_size, is_test=True) 118 | 119 | rpn_outs = self.rpn_head.forward(x) 120 | 121 | guided_anchors, anchor_labels = self.rpn_head.get_guided_anchors(*rpn_outs, ret['anchors'], ret['anchors_mask'], 122 | None, None, thr=.1) 123 | 124 | bbox_score = self.extra_head(conv6, guided_anchors, is_test=True) 125 | 126 | det_bboxes, det_scores, det_labels = self.extra_head.get_rescore_bboxes( 127 | guided_anchors, bbox_score, anchor_labels, img_meta, self.test_cfg.extra) 128 | 129 | results = [kitti_bbox2results(*param, class_names=self.class_names) for param in zip(det_bboxes, det_scores, det_labels, img_meta)] 130 | 131 | return results 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /mmdet/models/detectors/test_mixins.py: -------------------------------------------------------------------------------- 1 | from mmdet.core import (bbox2roi, bbox_mapping, merge_aug_proposals, 2 | merge_aug_bboxes, merge_aug_masks) 3 | 4 | import numpy as np 5 | 6 | class RPNTestMixin(object): 7 | 8 | def simple_test_rpn(self, x, img_meta, rpn_test_cfg): 9 | rpn_outs = self.rpn_head(x) 10 | proposal_inputs = rpn_outs + (img_meta, rpn_test_cfg) 11 | proposal_list = self.rpn_head.get_proposals(*proposal_inputs) 12 | return proposal_list 13 | 14 | def aug_test_rpn(self, feats, img_metas, rpn_test_cfg): 15 | imgs_per_gpu = len(img_metas[0]) 16 | aug_proposals = [[] for _ in range(imgs_per_gpu)] 17 | for x, img_meta in zip(feats, img_metas): 18 | proposal_list = self.simple_test_rpn(x, img_meta, rpn_test_cfg) 19 | for i, proposals in enumerate(proposal_list): 20 | aug_proposals[i].append(proposals) 21 | # after merging, proposals will be rescaled to the original image size 22 | merged_proposals = [ 23 | merge_aug_proposals(proposals, img_meta, rpn_test_cfg) 24 | for proposals, img_meta in zip(aug_proposals, img_metas) 25 | ] 26 | return merged_proposals 27 | 28 | 29 | class BBoxTestMixin(object): 30 | 31 | def simple_test_bboxes(self, 32 | x, 33 | img_meta, 34 | proposals, 35 | rcnn_test_cfg, 36 | rescale=False): 37 | """Test only det bboxes without augmentation.""" 38 | rois = bbox2roi(proposals) 39 | roi_feats = self.bbox_roi_extractor( 40 | x[:len(self.bbox_roi_extractor.featmap_strides)], rois) 41 | cls_score, bbox_pred = self.bbox_head(roi_feats) 42 | img_shape = img_meta[0]['img_shape'] 43 | scale_factor = img_meta[0]['scale_factor'] 44 | det_bboxes, det_labels = self.bbox_head.get_det_bboxes_nms( 45 | rois, 46 | cls_score, 47 | bbox_pred, 48 | img_shape, 49 | scale_factor, 50 | rescale=rescale, 51 | cfg=rcnn_test_cfg) 52 | return det_bboxes, det_labels 53 | 54 | def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg): 55 | aug_bboxes = [] 56 | aug_scores = [] 57 | for x, img_meta in zip(feats, img_metas): 58 | # only one image in the batch 59 | img_shape = img_meta[0]['img_shape'] 60 | scale_factor = img_meta[0]['scale_factor'] 61 | flip = img_meta[0]['flip'] 62 | # TODO more flexible 63 | proposals = bbox_mapping(proposal_list[0][:, :4], img_shape, 64 | scale_factor, flip) 65 | rois = bbox2roi([proposals]) 66 | # recompute feature maps to save GPU memory 67 | roi_feats = self.bbox_roi_extractor( 68 | x[:len(self.bbox_roi_extractor.featmap_strides)], rois) 69 | cls_score, bbox_pred = self.bbox_head(roi_feats) 70 | bboxes, scores = self.bbox_head.get_det_bboxes_nms( 71 | rois, 72 | cls_score, 73 | bbox_pred, 74 | img_shape, 75 | scale_factor, 76 | rescale=False, 77 | cfg=None) 78 | aug_bboxes.append(bboxes) 79 | aug_scores.append(scores) 80 | # after merging, bboxes will be rescaled to the original image size 81 | merged_bboxes, merged_scores = merge_aug_bboxes( 82 | aug_bboxes, aug_scores, img_metas, rcnn_test_cfg) 83 | det_bboxes, det_labels = multiclass_nms( 84 | merged_bboxes, merged_scores, rcnn_test_cfg.score_thr, 85 | rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img) 86 | return det_bboxes, det_labels 87 | 88 | class MaskTestMixin(object): 89 | 90 | def simple_test_mask(self, 91 | x, 92 | img_meta, 93 | det_bboxes, 94 | det_labels, 95 | rescale=False): 96 | # image shape of the first image in the batch (only one) 97 | ori_shape = img_meta[0]['ori_shape'] 98 | scale_factor = img_meta[0]['scale_factor'] 99 | if det_bboxes.shape[0] == 0: 100 | segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] 101 | else: 102 | # if det_bboxes is rescaled to the original image size, we need to 103 | # rescale it back to the testing scale to obtain RoIs. 104 | _bboxes = (det_bboxes[:, :4] * scale_factor 105 | if rescale else det_bboxes) 106 | mask_rois = bbox2roi([_bboxes]) 107 | mask_feats = self.mask_roi_extractor( 108 | x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois) 109 | mask_pred = self.mask_head(mask_feats) 110 | segm_result = self.mask_head.get_seg_masks( 111 | mask_pred, _bboxes, det_labels, self.test_cfg.rcnn, ori_shape, 112 | scale_factor, rescale) 113 | return segm_result 114 | 115 | def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels): 116 | if det_bboxes.shape[0] == 0: 117 | segm_result = [[] for _ in range(self.mask_head.num_classes - 1)] 118 | else: 119 | aug_masks = [] 120 | for x, img_meta in zip(feats, img_metas): 121 | img_shape = img_meta[0]['img_shape'] 122 | scale_factor = img_meta[0]['scale_factor'] 123 | flip = img_meta[0]['flip'] 124 | _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, 125 | scale_factor, flip) 126 | mask_rois = bbox2roi([_bboxes]) 127 | mask_feats = self.mask_roi_extractor( 128 | x[:len(self.mask_roi_extractor.featmap_strides)], 129 | mask_rois) 130 | mask_pred = self.mask_head(mask_feats) 131 | # convert to numpy array to save memory 132 | aug_masks.append(mask_pred.sigmoid().cpu().numpy()) 133 | merged_masks = merge_aug_masks(aug_masks, img_metas, 134 | self.test_cfg.rcnn) 135 | 136 | ori_shape = img_metas[0][0]['ori_shape'] 137 | segm_result = self.mask_head.get_seg_masks( 138 | merged_masks, 139 | det_bboxes, 140 | det_labels, 141 | self.test_cfg.rcnn, 142 | ori_shape, 143 | scale_factor=1.0, 144 | rescale=False) 145 | return segm_result 146 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | 3 | __all__ = ['FCNMaskHead'] 4 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/fcn_mask_head.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import pycocotools.mask as mask_util 4 | import torch 5 | import torch.nn as nn 6 | 7 | from ..utils import ConvModule 8 | from mmdet.core import mask_cross_entropy, mask_target 9 | 10 | 11 | class FCNMaskHead(nn.Module): 12 | 13 | def __init__(self, 14 | num_convs=4, 15 | roi_feat_size=14, 16 | in_channels=256, 17 | conv_kernel_size=3, 18 | conv_out_channels=256, 19 | upsample_method='deconv', 20 | upsample_ratio=2, 21 | num_classes=81, 22 | class_agnostic=False, 23 | normalize=None): 24 | super(FCNMaskHead, self).__init__() 25 | if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']: 26 | raise ValueError( 27 | 'Invalid upsample method {}, accepted methods ' 28 | 'are "deconv", "nearest", "bilinear"'.format(upsample_method)) 29 | self.num_convs = num_convs 30 | self.roi_feat_size = roi_feat_size # WARN: not used and reserved 31 | self.in_channels = in_channels 32 | self.conv_kernel_size = conv_kernel_size 33 | self.conv_out_channels = conv_out_channels 34 | self.upsample_method = upsample_method 35 | self.upsample_ratio = upsample_ratio 36 | self.num_classes = num_classes 37 | self.class_agnostic = class_agnostic 38 | self.normalize = normalize 39 | self.with_bias = normalize is None 40 | 41 | self.convs = nn.ModuleList() 42 | for i in range(self.num_convs): 43 | in_channels = (self.in_channels 44 | if i == 0 else self.conv_out_channels) 45 | padding = (self.conv_kernel_size - 1) // 2 46 | self.convs.append( 47 | ConvModule( 48 | in_channels, 49 | self.conv_out_channels, 50 | 3, 51 | padding=padding, 52 | normalize=normalize, 53 | bias=self.with_bias)) 54 | if self.upsample_method is None: 55 | self.upsample = None 56 | elif self.upsample_method == 'deconv': 57 | self.upsample = nn.ConvTranspose2d( 58 | self.conv_out_channels, 59 | self.conv_out_channels, 60 | self.upsample_ratio, 61 | stride=self.upsample_ratio) 62 | else: 63 | self.upsample = nn.Upsample( 64 | scale_factor=self.upsample_ratio, mode=self.upsample_method) 65 | 66 | out_channels = 1 if self.class_agnostic else self.num_classes 67 | self.conv_logits = nn.Conv2d(self.conv_out_channels, out_channels, 1) 68 | self.relu = nn.ReLU(inplace=True) 69 | self.debug_imgs = None 70 | 71 | def init_weights(self): 72 | for m in [self.upsample, self.conv_logits]: 73 | if m is None: 74 | continue 75 | nn.init.kaiming_normal_( 76 | m.weight, mode='fan_out', nonlinearity='relu') 77 | nn.init.constant_(m.bias, 0) 78 | 79 | def forward(self, x): 80 | for conv in self.convs: 81 | x = conv(x) 82 | if self.upsample is not None: 83 | x = self.upsample(x) 84 | if self.upsample_method == 'deconv': 85 | x = self.relu(x) 86 | mask_pred = self.conv_logits(x) 87 | return mask_pred 88 | 89 | def get_target(self, sampling_results, gt_masks, rcnn_train_cfg): 90 | pos_proposals = [res.pos_bboxes for res in sampling_results] 91 | pos_assigned_gt_inds = [ 92 | res.pos_assigned_gt_inds for res in sampling_results 93 | ] 94 | mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds, 95 | gt_masks, rcnn_train_cfg) 96 | return mask_targets 97 | 98 | def loss(self, mask_pred, mask_targets, labels): 99 | loss = dict() 100 | loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels) 101 | loss['loss_mask'] = loss_mask 102 | return loss 103 | 104 | def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg, 105 | ori_shape, scale_factor, rescale): 106 | """Get segmentation masks from mask_pred and bboxes. 107 | 108 | Args: 109 | mask_pred (Tensor or ndarray): shape (n, #class+1, h, w). 110 | For single-scale testing, mask_pred is the direct output of 111 | model, whose type is Tensor, while for multi-scale testing, 112 | it will be converted to numpy array outside of this method. 113 | det_bboxes (Tensor): shape (n, 4/5) 114 | det_labels (Tensor): shape (n, ) 115 | img_shape (Tensor): shape (3, ) 116 | rcnn_test_cfg (dict): rcnn testing config 117 | ori_shape: original image size 118 | 119 | Returns: 120 | list[list]: encoded masks 121 | """ 122 | if isinstance(mask_pred, torch.Tensor): 123 | mask_pred = mask_pred.sigmoid().cpu().numpy() 124 | assert isinstance(mask_pred, np.ndarray) 125 | 126 | cls_segms = [[] for _ in range(self.num_classes - 1)] 127 | bboxes = det_bboxes.cpu().numpy()[:, :4] 128 | labels = det_labels.cpu().numpy() + 1 129 | 130 | if rescale: 131 | img_h, img_w = ori_shape[:2] 132 | else: 133 | img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32) 134 | img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32) 135 | scale_factor = 1.0 136 | 137 | for i in range(bboxes.shape[0]): 138 | bbox = (bboxes[i, :] / scale_factor).astype(np.int32) 139 | label = labels[i] 140 | w = max(bbox[2] - bbox[0] + 1, 1) 141 | h = max(bbox[3] - bbox[1] + 1, 1) 142 | 143 | if not self.class_agnostic: 144 | mask_pred_ = mask_pred[i, label, :, :] 145 | else: 146 | mask_pred_ = mask_pred[i, 0, :, :] 147 | im_mask = np.zeros((img_h, img_w), dtype=np.uint8) 148 | 149 | bbox_mask = mmcv.imresize(mask_pred_, (w, h)) 150 | bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype( 151 | np.uint8) 152 | im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask 153 | rle = mask_util.encode( 154 | np.array(im_mask[:, :, np.newaxis], order='F'))[0] 155 | cls_segms[label - 1].append(rle) 156 | 157 | return cls_segms 158 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | from .cmn import SpMiddleFHD 3 | from .rpn import RPN 4 | __all__ = ['FPN','SpMiddleFHD','RPN'] 5 | -------------------------------------------------------------------------------- /mmdet/models/necks/fpn.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from ..utils import ConvModule 4 | from ..utils import xavier_init 5 | 6 | 7 | class FPN(nn.Module): 8 | 9 | def __init__(self, 10 | in_channels, 11 | out_channels, 12 | num_outs, 13 | start_level=0, 14 | end_level=-1, 15 | add_extra_convs=False, 16 | normalize=None, 17 | activation=None): 18 | super(FPN, self).__init__() 19 | assert isinstance(in_channels, list) 20 | self.in_channels = in_channels 21 | self.out_channels = out_channels 22 | self.num_ins = len(in_channels) 23 | self.num_outs = num_outs 24 | self.activation = activation 25 | self.with_bias = normalize is None 26 | 27 | if end_level == -1: 28 | self.backbone_end_level = self.num_ins 29 | assert num_outs >= self.num_ins - start_level 30 | else: 31 | # if end_level < inputs, no extra level is allowed 32 | self.backbone_end_level = end_level 33 | assert end_level <= len(in_channels) 34 | assert num_outs == end_level - start_level 35 | self.start_level = start_level 36 | self.end_level = end_level 37 | self.add_extra_convs = add_extra_convs 38 | 39 | self.lateral_convs = nn.ModuleList() 40 | self.fpn_convs = nn.ModuleList() 41 | 42 | for i in range(self.start_level, self.backbone_end_level): 43 | l_conv = ConvModule( 44 | in_channels[i], 45 | out_channels, 46 | 1, 47 | normalize=normalize, 48 | bias=self.with_bias, 49 | activation=self.activation, 50 | inplace=False) 51 | fpn_conv = ConvModule( 52 | out_channels, 53 | out_channels, 54 | 3, 55 | padding=1, 56 | normalize=normalize, 57 | bias=self.with_bias, 58 | activation=self.activation, 59 | inplace=False) 60 | 61 | self.lateral_convs.append(l_conv) 62 | self.fpn_convs.append(fpn_conv) 63 | 64 | # lvl_id = i - self.start_level 65 | # setattr(self, 'lateral_conv{}'.format(lvl_id), l_conv) 66 | # setattr(self, 'fpn_conv{}'.format(lvl_id), fpn_conv) 67 | 68 | # add extra conv layers (e.g., RetinaNet) 69 | extra_levels = num_outs - self.backbone_end_level + self.start_level 70 | if add_extra_convs and extra_levels >= 1: 71 | for i in range(extra_levels): 72 | in_channels = (self.in_channels[self.backbone_end_level - 1] 73 | if i == 0 else out_channels) 74 | extra_fpn_conv = ConvModule( 75 | in_channels, 76 | out_channels, 77 | 3, 78 | stride=2, 79 | padding=1, 80 | normalize=normalize, 81 | bias=self.with_bias, 82 | activation=self.activation, 83 | inplace=False) 84 | self.fpn_convs.append(extra_fpn_conv) 85 | 86 | # default init_weights for conv(msra) and norm in ConvModule 87 | def init_weights(self): 88 | for m in self.modules(): 89 | if isinstance(m, nn.Conv2d): 90 | xavier_init(m, distribution='uniform') 91 | 92 | def forward(self, inputs): 93 | assert len(inputs) == len(self.in_channels) 94 | 95 | # build laterals 96 | laterals = [ 97 | lateral_conv(inputs[i + self.start_level]) 98 | for i, lateral_conv in enumerate(self.lateral_convs) 99 | ] 100 | 101 | # build top-down path 102 | used_backbone_levels = len(laterals) 103 | for i in range(used_backbone_levels - 1, 0, -1): 104 | laterals[i - 1] += F.interpolate( 105 | laterals[i], scale_factor=2, mode='nearest') 106 | 107 | # build outputs 108 | # part 1: from original levels 109 | outs = [ 110 | self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) 111 | ] 112 | # part 2: add extra levels 113 | if self.num_outs > len(outs): 114 | # use max pool to get more levels on top of outputs 115 | # (e.g., Faster R-CNN, Mask R-CNN) 116 | if not self.add_extra_convs: 117 | for i in range(self.num_outs - used_backbone_levels): 118 | outs.append(F.max_pool2d(outs[-1], 1, stride=2)) 119 | # add conv layers on top of original feature maps (RetinaNet) 120 | else: 121 | orig = inputs[self.backbone_end_level - 1] 122 | outs.append(self.fpn_convs[used_backbone_levels](orig)) 123 | for i in range(used_backbone_levels + 1, self.num_outs): 124 | # BUG: we should add relu before each extra conv 125 | outs.append(self.fpn_convs[i](outs[-1])) 126 | return tuple(outs) 127 | -------------------------------------------------------------------------------- /mmdet/models/necks/rpn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch import nn 3 | from ..utils import Empty, change_default_args, Sequential 4 | import torch 5 | 6 | class RPNBase(nn.Module): 7 | def __init__(self, 8 | use_norm=True, 9 | layer_nums=(3, 5, 5), 10 | layer_strides=(2, 2, 2), 11 | num_filters=(128, 128, 256), 12 | upsample_strides=(1, 2, 4), 13 | num_upsample_filters=(256, 256, 256), 14 | num_input_features=128): 15 | 16 | """upsample_strides support float: [0.25, 0.5, 1] 17 | if upsample_strides < 1, conv2d will be used instead of convtranspose2d. 18 | """ 19 | super(RPNBase, self).__init__() 20 | self._layer_strides = layer_strides 21 | self._num_filters = num_filters 22 | self._layer_nums = layer_nums 23 | self._upsample_strides = upsample_strides 24 | self._num_upsample_filters = num_upsample_filters 25 | self._num_input_features = num_input_features 26 | self._use_norm = use_norm 27 | 28 | assert len(layer_strides) == len(layer_nums) 29 | assert len(num_filters) == len(layer_nums) 30 | assert len(num_upsample_filters) == len(upsample_strides) 31 | self._upsample_start_idx = len(layer_nums) - len(upsample_strides) 32 | must_equal_list = [] 33 | for i in range(len(upsample_strides)): 34 | must_equal_list.append(upsample_strides[i] / np.prod( 35 | layer_strides[:i + self._upsample_start_idx + 1])) 36 | for val in must_equal_list: 37 | assert val == must_equal_list[0] 38 | 39 | if use_norm: 40 | BatchNorm2d = change_default_args( 41 | eps=1e-3, momentum=0.01)(nn.BatchNorm2d) 42 | Conv2d = change_default_args(bias=False)(nn.Conv2d) 43 | ConvTranspose2d = change_default_args(bias=False)( 44 | nn.ConvTranspose2d) 45 | else: 46 | BatchNorm2d = Empty 47 | Conv2d = change_default_args(bias=True)(nn.Conv2d) 48 | ConvTranspose2d = change_default_args(bias=True)( 49 | nn.ConvTranspose2d) 50 | 51 | in_filters = [num_input_features, *num_filters[:-1]] 52 | blocks = [] 53 | deblocks = [] 54 | 55 | for i, layer_num in enumerate(layer_nums): 56 | block, num_out_filters = self._make_layer( 57 | in_filters[i], 58 | num_filters[i], 59 | layer_num, 60 | stride=layer_strides[i]) 61 | blocks.append(block) 62 | if i - self._upsample_start_idx >= 0: 63 | stride = upsample_strides[i - self._upsample_start_idx] 64 | if stride >= 1: 65 | stride = np.round(stride).astype(np.int64) 66 | deblock = nn.Sequential( 67 | ConvTranspose2d( 68 | num_out_filters, 69 | num_upsample_filters[i - self._upsample_start_idx], 70 | stride, 71 | stride=stride), 72 | BatchNorm2d( 73 | num_upsample_filters[i - self._upsample_start_idx]), 74 | nn.ReLU(), 75 | ) 76 | else: 77 | stride = np.round(1 / stride).astype(np.int64) 78 | deblock = nn.Sequential( 79 | Conv2d( 80 | num_out_filters, 81 | num_upsample_filters[i - self._upsample_start_idx], 82 | stride, 83 | stride=stride), 84 | BatchNorm2d( 85 | num_upsample_filters[i - self._upsample_start_idx]), 86 | nn.ReLU(), 87 | ) 88 | deblocks.append(deblock) 89 | 90 | self._num_out_filters = num_out_filters 91 | self.blocks = nn.ModuleList(blocks) 92 | self.deblocks = nn.ModuleList(deblocks) 93 | 94 | @property 95 | def downsample_factor(self): 96 | factor = np.prod(self._layer_strides) 97 | if len(self._upsample_strides) > 0: 98 | factor /= self._upsample_strides[-1] 99 | return factor 100 | 101 | def _make_layer(self, inplanes, planes, num_blocks, stride=1): 102 | raise NotImplementedError 103 | 104 | def forward(self, x): 105 | ups = [] 106 | stage_outputs = [] 107 | for i in range(len(self.blocks)): 108 | x = self.blocks[i](x) 109 | stage_outputs.append(x) 110 | if i - self._upsample_start_idx >= 0: 111 | ups.append(self.deblocks[i - self._upsample_start_idx](x)) 112 | 113 | if len(ups) > 0: 114 | x = torch.cat(ups, dim=1) 115 | 116 | return x 117 | 118 | class RPN(RPNBase): 119 | def _make_layer(self, inplanes, planes, num_blocks, stride=1): 120 | if self._use_norm: 121 | BatchNorm2d = change_default_args( 122 | eps=1e-3, momentum=0.01)(nn.BatchNorm2d) 123 | Conv2d = change_default_args(bias=False)(nn.Conv2d) 124 | ConvTranspose2d = change_default_args(bias=False)( 125 | nn.ConvTranspose2d) 126 | else: 127 | BatchNorm2d = Empty 128 | Conv2d = change_default_args(bias=True)(nn.Conv2d) 129 | ConvTranspose2d = change_default_args(bias=True)( 130 | nn.ConvTranspose2d) 131 | 132 | block = Sequential( 133 | nn.ZeroPad2d(1), 134 | Conv2d(inplanes, planes, 3, stride=stride), 135 | BatchNorm2d(planes), 136 | nn.ReLU(), 137 | ) 138 | for j in range(num_blocks): 139 | block.add(Conv2d(planes, planes, 3, padding=1)) 140 | block.add(BatchNorm2d(planes)) 141 | block.add(nn.ReLU()) 142 | 143 | return block, planes -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | 3 | __all__ = ['SingleRoIExtractor'] 4 | -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/single_level.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import torch 3 | import torch.nn as nn 4 | from mmdet.core import tensor2points 5 | from mmdet import ops 6 | import numpy as np 7 | 8 | class SingleRoIExtractor(nn.Module): 9 | """Extract RoI features from a single level feature map. 10 | 11 | If there are mulitple input feature levels, each RoI is mapped to a level 12 | according to its scale. 13 | 14 | Args: 15 | roi_layer (dict): Specify RoI layer type and arguments. 16 | out_channels (int): Output channels of RoI layers. 17 | featmap_strides (int): Strides of input feature maps. 18 | finest_scale (int): Scale threshold of mapping to level 0. 19 | """ 20 | 21 | def __init__(self, 22 | roi_layer, 23 | out_channels, 24 | featmap_strides, 25 | finest_scale=56): 26 | super(SingleRoIExtractor, self).__init__() 27 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides) 28 | self.out_channels = out_channels 29 | self.featmap_strides = featmap_strides 30 | self.finest_scale = finest_scale 31 | 32 | @property 33 | def num_inputs(self): 34 | """int: Input feature map levels.""" 35 | return len(self.featmap_strides) 36 | 37 | def init_weights(self): 38 | pass 39 | 40 | def build_roi_layers(self, layer_cfg, featmap_strides): 41 | cfg = layer_cfg.copy() 42 | layer_type = cfg.pop('type') 43 | assert hasattr(ops, layer_type) 44 | layer_cls = getattr(ops, layer_type) 45 | roi_layers = nn.ModuleList( 46 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides]) 47 | return roi_layers 48 | 49 | def map_roi_levels(self, rois, num_levels): 50 | """Map rois to corresponding feature levels by scales. 51 | 52 | - scale < finest_scale: level 0 53 | - finest_scale <= scale < finest_scale * 2: level 1 54 | - finest_scale * 2 <= scale < finest_scale * 4: level 2 55 | - scale >= finest_scale * 4: level 3 56 | 57 | Args: 58 | rois (Tensor): Input RoIs, shape (k, 5). 59 | num_levels (int): Total level number. 60 | 61 | Returns: 62 | Tensor: Level index (0-based) of each RoI, shape (k, ) 63 | """ 64 | scale = torch.sqrt( 65 | (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1)) 66 | target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6)) 67 | target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long() 68 | return target_lvls 69 | 70 | def forward(self, feats, rois): 71 | if len(feats) == 1: 72 | return self.roi_layers[0](feats[0], rois) 73 | 74 | out_size = self.roi_layers[0].out_size 75 | num_levels = len(feats) 76 | target_lvls = self.map_roi_levels(rois, num_levels) 77 | roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels, 78 | out_size, out_size).fill_(0) 79 | for i in range(num_levels): 80 | inds = target_lvls == i 81 | if inds.any(): 82 | rois_ = rois[inds, :] 83 | roi_feats_t = self.roi_layers[i](feats[i], rois_) 84 | roi_feats[inds] += roi_feats_t 85 | return roi_feats 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /mmdet/models/rpn_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .rpn_head import RPNHead 2 | 3 | __all__ = ['RPNHead'] 4 | -------------------------------------------------------------------------------- /mmdet/models/single_stage_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .retina_head import RetinaHead 2 | from .ssd_rotate_head import * 3 | 4 | __all__ = ['RetinaHead', "SSDRotateHead"] 5 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv_module import ConvModule 2 | from .norm import build_norm_layer 3 | from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init, 4 | bias_init_with_prob) 5 | from .empty import Empty 6 | from .sequential import Sequential 7 | import inspect 8 | import torch 9 | 10 | def get_paddings_indicator(actual_num, max_num, axis=0): 11 | """Create boolean mask by actually number of a padded tensor. 12 | Args: 13 | actual_num ([type]): [description] 14 | max_num ([type]): [description] 15 | Returns: 16 | [type]: [description] 17 | """ 18 | 19 | actual_num = torch.unsqueeze(actual_num, axis + 1) 20 | # tiled_actual_num: [N, M, 1] 21 | max_num_shape = [1] * len(actual_num.shape) 22 | max_num_shape[axis + 1] = -1 23 | max_num = torch.arange( 24 | max_num, dtype=torch.int, device=actual_num.device).view(max_num_shape) 25 | # tiled_actual_num: [[3,3,3,3,3], [4,4,4,4,4], [2,2,2,2,2]] 26 | # tiled_max_num: [[0,1,2,3,4], [0,1,2,3,4], [0,1,2,3,4]] 27 | paddings_indicator = actual_num.int() > max_num 28 | # paddings_indicator shape: [batch_size, max_num] 29 | return paddings_indicator 30 | 31 | def get_pos_to_kw_map(func): 32 | pos_to_kw = {} 33 | fsig = inspect.signature(func) 34 | pos = 0 35 | for name, info in fsig.parameters.items(): 36 | if info.kind is info.POSITIONAL_OR_KEYWORD: 37 | pos_to_kw[pos] = name 38 | pos += 1 39 | return pos_to_kw 40 | 41 | def change_default_args(**kwargs): 42 | def layer_wrapper(layer_class): 43 | class DefaultArgLayer(layer_class): 44 | def __init__(self, *args, **kw): 45 | pos_to_kw = get_pos_to_kw_map(layer_class.__init__) 46 | kw_to_pos = {kw: pos for pos, kw in pos_to_kw.items()} 47 | for key, val in kwargs.items(): 48 | if key not in kw and kw_to_pos[key] > len(args): 49 | kw[key] = val 50 | super().__init__(*args, **kw) 51 | 52 | return DefaultArgLayer 53 | 54 | return layer_wrapper 55 | 56 | def one_hot(tensor, depth, dim=-1, on_value=1.0, dtype=torch.float32): 57 | tensor_onehot = torch.zeros( 58 | *list(tensor.shape), depth, dtype=dtype, device=tensor.device) 59 | tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value) 60 | return tensor_onehot 61 | 62 | __all__ = [ 63 | 'ConvModule', 'build_norm_layer', 'xavier_init', 'normal_init', 64 | 'uniform_init', 'kaiming_init', 'bias_init_with_prob','Empty', 65 | 'change_default_args','Sequential','one_hot', 'get_paddings_indicator' 66 | ] 67 | -------------------------------------------------------------------------------- /mmdet/models/utils/conv_module.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import torch.nn as nn 4 | from mmcv.cnn import kaiming_init, constant_init 5 | 6 | from .norm import build_norm_layer 7 | 8 | class ConvModule(nn.Module): 9 | 10 | def __init__(self, 11 | in_channels, 12 | out_channels, 13 | kernel_size, 14 | stride=1, 15 | padding=0, 16 | dilation=1, 17 | groups=1, 18 | bias=True, 19 | normalize=None, 20 | activation='relu', 21 | inplace=True, 22 | activate_last=True): 23 | super(ConvModule, self).__init__() 24 | self.with_norm = normalize is not None 25 | self.with_activatation = activation is not None 26 | self.with_bias = bias 27 | self.activation = activation 28 | self.activate_last = activate_last 29 | 30 | if self.with_norm and self.with_bias: 31 | warnings.warn('ConvModule has norm and bias at the same time') 32 | 33 | self.conv = nn.Conv2d( 34 | in_channels, 35 | out_channels, 36 | kernel_size, 37 | stride, 38 | padding, 39 | dilation, 40 | groups, 41 | bias=bias) 42 | 43 | self.in_channels = self.conv.in_channels 44 | self.out_channels = self.conv.out_channels 45 | self.kernel_size = self.conv.kernel_size 46 | self.stride = self.conv.stride 47 | self.padding = self.conv.padding 48 | self.dilation = self.conv.dilation 49 | self.transposed = self.conv.transposed 50 | self.output_padding = self.conv.output_padding 51 | self.groups = self.conv.groups 52 | 53 | if self.with_norm: 54 | norm_channels = out_channels if self.activate_last else in_channels 55 | self.norm = build_norm_layer(normalize, norm_channels) 56 | 57 | if self.with_activatation: 58 | assert activation in ['relu'], 'Only ReLU supported.' 59 | if self.activation == 'relu': 60 | self.activate = nn.ReLU(inplace=inplace) 61 | 62 | # Default using msra init 63 | self.init_weights() 64 | 65 | def init_weights(self): 66 | nonlinearity = 'relu' if self.activation is None else self.activation 67 | kaiming_init(self.conv, nonlinearity=nonlinearity) 68 | if self.with_norm: 69 | constant_init(self.norm, 1, bias=0) 70 | 71 | def forward(self, x, activate=True, norm=True): 72 | if self.activate_last: 73 | x = self.conv(x) 74 | if norm and self.with_norm: 75 | x = self.norm(x) 76 | if activate and self.with_activatation: 77 | x = self.activate(x) 78 | else: 79 | if norm and self.with_norm: 80 | x = self.norm(x) 81 | if activate and self.with_activatation: 82 | x = self.activate(x) 83 | x = self.conv(x) 84 | return x 85 | 86 | 87 | -------------------------------------------------------------------------------- /mmdet/models/utils/empty.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class Empty(torch.nn.Module): 5 | def __init__(self, *args, **kwargs): 6 | super(Empty, self).__init__() 7 | 8 | def forward(self, *args, **kwargs): 9 | if len(args) == 1: 10 | return args[0] 11 | elif len(args) == 0: 12 | return None 13 | return args -------------------------------------------------------------------------------- /mmdet/models/utils/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | norm_cfg = {'BN': nn.BatchNorm2d, 'SyncBN': None, 'GN': None} 4 | 5 | 6 | def build_norm_layer(cfg, num_features): 7 | assert isinstance(cfg, dict) and 'type' in cfg 8 | cfg_ = cfg.copy() 9 | cfg_.setdefault('eps', 1e-5) 10 | layer_type = cfg_.pop('type') 11 | 12 | if layer_type not in norm_cfg: 13 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 14 | elif norm_cfg[layer_type] is None: 15 | raise NotImplementedError 16 | 17 | return norm_cfg[layer_type](num_features, **cfg_) 18 | -------------------------------------------------------------------------------- /mmdet/models/utils/sequential.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from collections import OrderedDict 3 | class Sequential(torch.nn.Module): 4 | r"""A sequential container. 5 | Modules will be added to it in the order they are passed in the constructor. 6 | Alternatively, an ordered dict of modules can also be passed in. 7 | 8 | To make it easier to understand, given is a small example:: 9 | 10 | # Example of using Sequential 11 | model = Sequential( 12 | nn.Conv2d(1,20,5), 13 | nn.ReLU(), 14 | nn.Conv2d(20,64,5), 15 | nn.ReLU() 16 | ) 17 | 18 | # Example of using Sequential with OrderedDict 19 | model = Sequential(OrderedDict([ 20 | ('conv1', nn.Conv2d(1,20,5)), 21 | ('relu1', nn.ReLU()), 22 | ('conv2', nn.Conv2d(20,64,5)), 23 | ('relu2', nn.ReLU()) 24 | ])) 25 | 26 | # Example of using Sequential with kwargs(python 3.6+) 27 | model = Sequential( 28 | conv1=nn.Conv2d(1,20,5), 29 | relu1=nn.ReLU(), 30 | conv2=nn.Conv2d(20,64,5), 31 | relu2=nn.ReLU() 32 | ) 33 | """ 34 | 35 | def __init__(self, *args, **kwargs): 36 | super(Sequential, self).__init__() 37 | if len(args) == 1 and isinstance(args[0], OrderedDict): 38 | for key, module in args[0].items(): 39 | self.add_module(key, module) 40 | else: 41 | for idx, module in enumerate(args): 42 | self.add_module(str(idx), module) 43 | for name, module in kwargs.items(): 44 | if sys.version_info < (3, 6): 45 | raise ValueError("kwargs only supported in py36+") 46 | if name in self._modules: 47 | raise ValueError("name exists.") 48 | self.add_module(name, module) 49 | 50 | def __getitem__(self, idx): 51 | if not (-len(self) <= idx < len(self)): 52 | raise IndexError('index {} is out of range'.format(idx)) 53 | if idx < 0: 54 | idx += len(self) 55 | it = iter(self._modules.values()) 56 | for i in range(idx): 57 | next(it) 58 | return next(it) 59 | 60 | def __len__(self): 61 | return len(self._modules) 62 | 63 | def add(self, module, name=None): 64 | if name is None: 65 | name = str(len(self._modules)) 66 | if name in self._modules: 67 | raise KeyError("name exists") 68 | self.add_module(name, module) 69 | 70 | def forward(self, input): 71 | # i = 0 72 | for module in self._modules.values(): 73 | # print(i) 74 | input = module(input) 75 | # i += 1 76 | return input -------------------------------------------------------------------------------- /mmdet/models/utils/weight_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 6 | assert distribution in ['uniform', 'normal'] 7 | if distribution == 'uniform': 8 | nn.init.xavier_uniform_(module.weight, gain=gain) 9 | else: 10 | nn.init.xavier_normal_(module.weight, gain=gain) 11 | if hasattr(module, 'bias'): 12 | nn.init.constant_(module.bias, bias) 13 | 14 | 15 | def normal_init(module, mean=0, std=1, bias=0): 16 | nn.init.normal_(module.weight, mean, std) 17 | if hasattr(module, 'bias'): 18 | nn.init.constant_(module.bias, bias) 19 | 20 | 21 | def uniform_init(module, a=0, b=1, bias=0): 22 | nn.init.uniform_(module.weight, a, b) 23 | if hasattr(module, 'bias'): 24 | nn.init.constant_(module.bias, bias) 25 | 26 | 27 | def kaiming_init(module, 28 | mode='fan_out', 29 | nonlinearity='relu', 30 | bias=0, 31 | distribution='normal'): 32 | assert distribution in ['uniform', 'normal'] 33 | if distribution == 'uniform': 34 | nn.init.kaiming_uniform_( 35 | module.weight, mode=mode, nonlinearity=nonlinearity) 36 | else: 37 | nn.init.kaiming_normal_( 38 | module.weight, mode=mode, nonlinearity=nonlinearity) 39 | if hasattr(module, 'bias'): 40 | nn.init.constant_(module.bias, bias) 41 | 42 | 43 | def bias_init_with_prob(prior_prob): 44 | """ initialize conv/fc bias value according to giving probablity""" 45 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 46 | return bias_init 47 | -------------------------------------------------------------------------------- /mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .points_op import pts_in_boxes3d 2 | __all__ = ['pts_in_boxes3d'] 3 | -------------------------------------------------------------------------------- /mmdet/ops/iou3d/iou3d_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import mmdet.ops.iou3d.iou3d_cuda as iou3d_cuda 3 | import math 4 | 5 | def limit_period(val, offset=0.5, period=math.pi): 6 | return val - torch.floor(val / period + offset) * period 7 | 8 | def boxes3d_to_near_torch(boxes3d): 9 | rboxes = boxes3d[:, [0, 1, 3, 4, 6]] 10 | """convert rotated bbox to nearest 'standing' or 'lying' bbox. 11 | Args: 12 | rboxes: [N, 5(x, y, xdim, ydim, rad)] rotated bboxes 13 | Returns: 14 | boxes_near: [N, 4(xmin, ymin, xmax, ymax)] nearest boxes 15 | """ 16 | rots = rboxes[..., -1] 17 | rots_0_pi_div_2 = torch.abs(limit_period(rots, 0.5, math.pi)) 18 | cond = (rots_0_pi_div_2 > math.pi / 4)[..., None] 19 | boxes_center = torch.where(cond, rboxes[:, [0, 1, 3, 2]], rboxes[:, :4]) 20 | boxes_near = torch.cat([boxes_center[:, :2] - boxes_center[:, 2:] / 2, \ 21 | boxes_center[:, :2] + boxes_center[:, 2:] / 2], dim=-1) 22 | return boxes_near 23 | 24 | def boxes_iou(bboxes1, bboxes2, mode='iou', eps=0.0): 25 | assert mode in ['iou', 'iof'] 26 | 27 | rows = bboxes1.size(0) 28 | cols = bboxes2.size(0) 29 | 30 | if rows * cols == 0: 31 | return bboxes1.new(rows, cols) 32 | 33 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] 34 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] 35 | wh = (rb - lt + eps).clamp(min=0) # [rows, cols, 2] 36 | overlap = wh[:, :, 0] * wh[:, :, 1] 37 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + eps) * ( 38 | bboxes1[:, 3] - bboxes1[:, 1] + eps) 39 | if mode == 'iou': 40 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + eps) * ( 41 | bboxes2[:, 3] - bboxes2[:, 1] + eps) 42 | ious = overlap / (area1[:, None] + area2 - overlap) 43 | else: 44 | ious = overlap / (area1[:, None]) 45 | return ious 46 | 47 | def boxes3d_to_bev_torch(boxes3d): 48 | """ 49 | :param boxes3d: (N, 7) [x, y, z, h, w, l, ry] 50 | :return: 51 | boxes_bev: (N, 5) [x1, y1, x2, y2, ry] 52 | """ 53 | boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5))) 54 | 55 | cu, cv = boxes3d[:, 0], boxes3d[:, 1] 56 | half_l, half_w = boxes3d[:, 3] / 2, boxes3d[:, 4] / 2 57 | boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w 58 | boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w 59 | boxes_bev[:, 4] = boxes3d[:, 6] 60 | return boxes_bev 61 | 62 | def boxes_iou_bev(boxes_a, boxes_b): 63 | """ 64 | :param boxes_a: (M, 5) 65 | :param boxes_b: (N, 5) 66 | :return: 67 | ans_iou: (M, N) 68 | """ 69 | boxes_a_bev = boxes3d_to_bev_torch(boxes_a) 70 | boxes_b_bev = boxes3d_to_bev_torch(boxes_b) 71 | 72 | ans_iou = torch.cuda.FloatTensor(torch.Size((boxes_a_bev.shape[0], boxes_b_bev.shape[0]))).zero_() 73 | 74 | iou3d_cuda.boxes_iou_bev_gpu(boxes_a_bev.contiguous(), boxes_b_bev.contiguous(), ans_iou) 75 | 76 | return ans_iou 77 | 78 | 79 | def boxes_iou3d_gpu(boxes_a, boxes_b): 80 | """ 81 | :param boxes_a: (N, 7) [x, y, z, h, w, l, ry] 82 | :param boxes_b: (M, 7) [x, y, z, h, w, l, ry] 83 | :return: 84 | ans_iou: (M, N) 85 | """ 86 | boxes_a_bev = boxes3d_to_bev_torch(boxes_a) 87 | boxes_b_bev = boxes3d_to_bev_torch(boxes_b) 88 | 89 | # bev overlap 90 | overlaps_bev = torch.cuda.FloatTensor(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_() # (N, M) 91 | iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(), boxes_b_bev.contiguous(), overlaps_bev) 92 | 93 | # height overlap 94 | boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5]).view(-1, 1) 95 | boxes_a_height_min = boxes_a[:, 2].view(-1, 1) 96 | boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5]).view(1, -1) 97 | boxes_b_height_min = boxes_b[:, 2].view(1, -1) 98 | 99 | max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min) 100 | min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max) 101 | overlaps_h = torch.clamp(min_of_max - max_of_min, min=0) 102 | 103 | # 3d iou 104 | overlaps_3d = overlaps_bev * overlaps_h 105 | 106 | vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1) 107 | vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1) 108 | 109 | iou3d = overlaps_3d / torch.clamp(vol_a + vol_b - overlaps_3d, min=1e-7) 110 | 111 | return iou3d 112 | 113 | 114 | def nms_gpu(boxes, scores, thresh): 115 | """ 116 | :param boxes: (N, 5) [x1, y1, x2, y2, ry] 117 | :param scores: (N) 118 | :param thresh: 119 | :return: 120 | """ 121 | # areas = (x2 - x1) * (y2 - y1) 122 | order = scores.sort(0, descending=True)[1] 123 | 124 | boxes = boxes[order].contiguous() 125 | 126 | keep = torch.LongTensor(boxes.size(0)) 127 | num_out = iou3d_cuda.nms_gpu(boxes, keep, thresh) 128 | return order[keep[:num_out].cuda()].contiguous() 129 | 130 | def nms_normal_gpu(boxes, scores, thresh): 131 | """ 132 | :param boxes: (N, 5) [x1, y1, x2, y2, ry] 133 | :param scores: (N) 134 | :param thresh: 135 | :return: 136 | """ 137 | # areas = (x2 - x1) * (y2 - y1) 138 | order = scores.sort(0, descending=True)[1] 139 | 140 | boxes = boxes[order].contiguous() 141 | 142 | keep = torch.LongTensor(boxes.size(0)) 143 | num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh) 144 | return order[keep[:num_out].cuda()].contiguous() 145 | 146 | class RotateIou2dSimilarity(object): 147 | """Class to compute similarity based on Intersection over Union (IOU) metric. 148 | 149 | This class computes pairwise similarity between two BoxLists based on IOU. 150 | """ 151 | def __call__(self, boxes1, boxes2): 152 | return boxes_iou_bev(boxes1, boxes2) 153 | 154 | class RotateIou3dSimilarity(object): 155 | """Class to compute similarity based on Intersection over Union (IOU) metric. 156 | 157 | This class computes pairwise similarity between two BoxLists based on IOU. 158 | """ 159 | def __call__(self, boxes1, boxes2): 160 | return boxes_iou3d_gpu(boxes1, boxes2) 161 | 162 | 163 | class NearestIouSimilarity(object): 164 | """Class to compute similarity based on the squared distance metric. 165 | 166 | This class computes pairwise similarity between two BoxLists based on the 167 | negative squared distance metric. 168 | """ 169 | 170 | def __call__(self, boxes1, boxes2): 171 | """Compute matrix of (negated) sq distances. 172 | 173 | Args: 174 | boxlist1: BoxList holding N boxes. 175 | boxlist2: BoxList holding M boxes. 176 | 177 | Returns: 178 | A tensor with shape [N, M] representing negated pairwise squared distance. 179 | """ 180 | 181 | boxes1_near = boxes3d_to_near_torch(boxes1) 182 | boxes2_near = boxes3d_to_near_torch(boxes2) 183 | return boxes_iou(boxes1_near, boxes2_near) 184 | 185 | if __name__ == '__main__': 186 | pass -------------------------------------------------------------------------------- /mmdet/ops/iou3d/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='iou3d', 6 | ext_modules=[ 7 | CUDAExtension('iou3d_cuda', [ 8 | 'src/iou3d.cpp', 9 | 'src/iou3d_kernel.cu', 10 | ], 11 | extra_compile_args={'cxx': ['-g'], 12 | 'nvcc': ['-O2']}) 13 | ], 14 | cmdclass={'build_ext': BuildExtension}) 15 | -------------------------------------------------------------------------------- /mmdet/ops/pointnet2/pointnet2_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.autograd import Function 4 | import torch.nn as nn 5 | from typing import Tuple 6 | 7 | import mmdet.ops.pointnet2.pointnet2_cuda as pointnet2 8 | 9 | class ThreeNN(Function): 10 | 11 | @staticmethod 12 | def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: 13 | """ 14 | Find the three nearest neighbors of unknown in known 15 | :param ctx: 16 | :param unknown: (N, 3) 17 | :param known: (M, 3) 18 | :return: 19 | dist: (N, 3) l2 distance to the three nearest neighbors 20 | idx: (N, 3) index of 3 nearest neighbors 21 | """ 22 | assert unknown.is_contiguous() 23 | assert known.is_contiguous() 24 | 25 | N, _ = unknown.size() 26 | m = known.size(0) 27 | dist2 = torch.cuda.FloatTensor(N, 3) 28 | idx = torch.cuda.IntTensor(N, 3) 29 | 30 | pointnet2.three_nn_wrapper(N, m, unknown, known, dist2, idx) 31 | return torch.sqrt(dist2), idx 32 | 33 | @staticmethod 34 | def backward(ctx, a=None, b=None): 35 | return None, None 36 | 37 | 38 | three_nn = ThreeNN.apply 39 | 40 | 41 | class ThreeInterpolate(Function): 42 | 43 | @staticmethod 44 | def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: 45 | """ 46 | Performs weight linear interpolation on 3 features 47 | :param ctx: 48 | :param features: (M, C) Features descriptors to be interpolated from 49 | :param idx: (n, 3) three nearest neighbors of the target features in features 50 | :param weight: (n, 3) weights 51 | :return: 52 | output: (N, C) tensor of the interpolated features 53 | """ 54 | assert features.is_contiguous() 55 | assert idx.is_contiguous() 56 | assert weight.is_contiguous() 57 | 58 | m, c = features.size() 59 | n = idx.size(0) 60 | ctx.three_interpolate_for_backward = (idx, weight, m) 61 | output = torch.cuda.FloatTensor(n, c) 62 | 63 | pointnet2.three_interpolate_wrapper(c, m, n, features, idx, weight, output) 64 | return output 65 | 66 | @staticmethod 67 | def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: 68 | """ 69 | :param ctx: 70 | :param grad_out: (N, C) tensor with gradients of outputs 71 | :return: 72 | grad_features: (M, C) tensor with gradients of features 73 | None: 74 | None: 75 | """ 76 | idx, weight, m = ctx.three_interpolate_for_backward 77 | n, c = grad_out.size() 78 | 79 | grad_features = Variable(torch.cuda.FloatTensor(m, c).zero_()) 80 | grad_out_data = grad_out.data.contiguous() 81 | 82 | pointnet2.three_interpolate_grad_wrapper( c, n, m, grad_out_data, idx, weight, grad_features.data) 83 | return grad_features, None, None 84 | 85 | 86 | three_interpolate = ThreeInterpolate.apply 87 | 88 | 89 | -------------------------------------------------------------------------------- /mmdet/ops/pointnet2/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='pointnet2', 6 | ext_modules=[ 7 | CUDAExtension('pointnet2_cuda', [ 8 | 'src/pointnet2_api.cpp', 9 | 'src/interpolate.cpp', 10 | 'src/interpolate_gpu.cu', 11 | ], 12 | extra_compile_args={'cxx': ['-g'], 13 | 'nvcc': ['-O2']}) 14 | ], 15 | cmdclass={'build_ext': BuildExtension} 16 | ) 17 | -------------------------------------------------------------------------------- /mmdet/ops/pointnet2/src/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_UTILS_H 2 | #define _CUDA_UTILS_H 3 | 4 | #include 5 | 6 | #define TOTAL_THREADS 1024 7 | #define THREADS_PER_BLOCK 256 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 9 | 10 | inline int opt_n_threads(int work_size) { 11 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); 12 | 13 | return max(min(1 << pow_2, TOTAL_THREADS), 1); 14 | } 15 | #endif 16 | -------------------------------------------------------------------------------- /mmdet/ops/pointnet2/src/interpolate.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "interpolate_gpu.h" 10 | 11 | extern THCState *state; 12 | 13 | void three_nn_wrapper_fast(int n, int m, at::Tensor unknown_tensor, 14 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) { 15 | const float *unknown = unknown_tensor.data(); 16 | const float *known = known_tensor.data(); 17 | float *dist2 = dist2_tensor.data(); 18 | int *idx = idx_tensor.data(); 19 | 20 | cudaStream_t stream = THCState_getCurrentStream(state); 21 | three_nn_kernel_launcher_fast(n, m, unknown, known, dist2, idx, stream); 22 | } 23 | 24 | 25 | void three_interpolate_wrapper_fast(int c, int m, int n, 26 | at::Tensor points_tensor, 27 | at::Tensor idx_tensor, 28 | at::Tensor weight_tensor, 29 | at::Tensor out_tensor) { 30 | 31 | const float *points = points_tensor.data(); 32 | const float *weight = weight_tensor.data(); 33 | float *out = out_tensor.data(); 34 | const int *idx = idx_tensor.data(); 35 | 36 | cudaStream_t stream = THCState_getCurrentStream(state); 37 | three_interpolate_kernel_launcher_fast(c, m, n, points, idx, weight, out, stream); 38 | } 39 | 40 | void three_interpolate_grad_wrapper_fast(int c, int n, int m, 41 | at::Tensor grad_out_tensor, 42 | at::Tensor idx_tensor, 43 | at::Tensor weight_tensor, 44 | at::Tensor grad_points_tensor) { 45 | 46 | const float *grad_out = grad_out_tensor.data(); 47 | const float *weight = weight_tensor.data(); 48 | float *grad_points = grad_points_tensor.data(); 49 | const int *idx = idx_tensor.data(); 50 | 51 | cudaStream_t stream = THCState_getCurrentStream(state); 52 | three_interpolate_grad_kernel_launcher_fast(c, n, m, grad_out, idx, weight, grad_points, stream); 53 | } -------------------------------------------------------------------------------- /mmdet/ops/pointnet2/src/interpolate_gpu.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "cuda_utils.h" 6 | #include "interpolate_gpu.h" 7 | 8 | 9 | __global__ void three_nn_kernel_fast(int n, int m, const float *__restrict__ unknown, 10 | const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) { 11 | // unknown: (N, 4) 12 | // known: (M, 4) 13 | // output: 14 | // dist2: (N, 3) 15 | // idx: (N, 3) 16 | 17 | 18 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 19 | if (pt_idx >= n) return; 20 | 21 | unknown += pt_idx * 4; 22 | 23 | dist2 += pt_idx * 3; 24 | idx += pt_idx * 3; 25 | 26 | float ub = unknown[0]; 27 | float ux = unknown[1]; 28 | float uy = unknown[2]; 29 | float uz = unknown[3]; 30 | 31 | double best1 = 1e40, best2 = 1e40, best3 = 1e40; 32 | int besti1 = 0, besti2 = 0, besti3 = 0; 33 | for (int k = 0; k < m; ++k) { 34 | float b = known[k * 4 + 0]; //batch number 35 | if (b!=ub) 36 | continue; 37 | float x = known[k * 4 + 1]; 38 | float y = known[k * 4 + 2]; 39 | float z = known[k * 4 + 3]; 40 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); 41 | if (d < best1) { 42 | best3 = best2; besti3 = besti2; 43 | best2 = best1; besti2 = besti1; 44 | best1 = d; besti1 = k; 45 | } 46 | else if (d < best2) { 47 | best3 = best2; besti3 = besti2; 48 | best2 = d; besti2 = k; 49 | } 50 | else if (d < best3) { 51 | best3 = d; besti3 = k; 52 | } 53 | } 54 | dist2[0] = best1; dist2[1] = best2; dist2[2] = best3; 55 | idx[0] = besti1; idx[1] = besti2; idx[2] = besti3; 56 | } 57 | 58 | void three_nn_kernel_launcher_fast(int n, int m, const float *unknown, 59 | const float *known, float *dist2, int *idx, cudaStream_t stream) { 60 | // unknown: (N, 4) 61 | // known: (M, 4) 62 | // output: 63 | // dist2: (N, 3) 64 | // idx: (N, 3) 65 | 66 | cudaError_t err; 67 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row) 68 | dim3 threads(THREADS_PER_BLOCK); 69 | 70 | three_nn_kernel_fast<<>>(n, m, unknown, known, dist2, idx); 71 | 72 | err = cudaGetLastError(); 73 | if (cudaSuccess != err) { 74 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 75 | exit(-1); 76 | } 77 | } 78 | 79 | 80 | __global__ void three_interpolate_kernel_fast(int c, int m, int n, const float *__restrict__ points, 81 | const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) { 82 | // points: (M, C) 83 | // idx: (N, 3) 84 | // weight: (N, 3) 85 | // output: 86 | // out: (N, C) 87 | 88 | 89 | int c_idx = blockIdx.y; 90 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 91 | 92 | if (c_idx >= c || pt_idx >= n) return; 93 | 94 | weight += pt_idx * 3; 95 | //points += c_idx * m; 96 | 97 | idx += pt_idx * 3; 98 | 99 | out += pt_idx * c; 100 | 101 | out[c_idx] = weight[0] * points[idx[0] * c + c_idx] + weight[1] * points[idx[1] * c + c_idx] + weight[2] * points[idx[2] * c + c_idx]; 102 | } 103 | 104 | void three_interpolate_kernel_launcher_fast(int c, int m, int n, 105 | const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream) { 106 | // points: (M, C) 107 | // idx: (N, 3) 108 | // weight: (N, 3) 109 | // output: 110 | // out: (N, C) 111 | 112 | cudaError_t err; 113 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c); // blockIdx.x(col), blockIdx.y(row) 114 | dim3 threads(THREADS_PER_BLOCK); 115 | three_interpolate_kernel_fast<<>>(c, m, n, points, idx, weight, out); 116 | 117 | err = cudaGetLastError(); 118 | if (cudaSuccess != err) { 119 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 120 | exit(-1); 121 | } 122 | } 123 | 124 | __global__ void three_interpolate_grad_kernel_fast(int c, int n, int m, const float *__restrict__ grad_out, 125 | const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ grad_points) { 126 | // grad_out: (N, C) 127 | // weight: (N, 3) 128 | // idx: (N, 3) 129 | // output: 130 | // grad_points: (M, C) 131 | 132 | 133 | int c_idx = blockIdx.y; 134 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; 135 | 136 | if (c_idx >= c || pt_idx >= n) return; 137 | 138 | grad_out += pt_idx * c + c_idx; 139 | weight += pt_idx * 3; 140 | //grad_points += c_idx * m; 141 | idx += pt_idx * 3; 142 | 143 | atomicAdd(grad_points + idx[0] * c + c_idx, grad_out[0] * weight[0]); 144 | atomicAdd(grad_points + idx[1] * c + c_idx, grad_out[0] * weight[1]); 145 | atomicAdd(grad_points + idx[2] * c + c_idx, grad_out[0] * weight[2]); 146 | } 147 | 148 | void three_interpolate_grad_kernel_launcher_fast(int c, int n, int m, const float *grad_out, 149 | const int *idx, const float *weight, float *grad_points, cudaStream_t stream) { 150 | // grad_out: (N, C) 151 | // weight: (N, 3) 152 | // idx: (N, 3) 153 | // output: 154 | // grad_points: (M, C) 155 | 156 | cudaError_t err; 157 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c); // blockIdx.x(col), blockIdx.y(row) 158 | dim3 threads(THREADS_PER_BLOCK); 159 | three_interpolate_grad_kernel_fast<<>>(c, n, m, grad_out, idx, weight, grad_points); 160 | 161 | err = cudaGetLastError(); 162 | if (cudaSuccess != err) { 163 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); 164 | exit(-1); 165 | } 166 | } -------------------------------------------------------------------------------- /mmdet/ops/pointnet2/src/interpolate_gpu.h: -------------------------------------------------------------------------------- 1 | #ifndef _INTERPOLATE_GPU_H 2 | #define _INTERPOLATE_GPU_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | void three_nn_wrapper_fast(int n, int m, at::Tensor unknown_tensor, 11 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor); 12 | 13 | void three_nn_kernel_launcher_fast(int n, int m, const float *unknown, 14 | const float *known, float *dist2, int *idx, cudaStream_t stream); 15 | 16 | 17 | void three_interpolate_wrapper_fast(int c, int m, int n, at::Tensor points_tensor, 18 | at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor); 19 | 20 | void three_interpolate_kernel_launcher_fast(int c, int m, int n, 21 | const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream); 22 | 23 | 24 | void three_interpolate_grad_wrapper_fast(int c, int n, int m, at::Tensor grad_out_tensor, 25 | at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor); 26 | 27 | void three_interpolate_grad_kernel_launcher_fast(int c, int n, int m, const float *grad_out, 28 | const int *idx, const float *weight, float *grad_points, cudaStream_t stream); 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /mmdet/ops/pointnet2/src/pointnet2_api.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "interpolate_gpu.h" 5 | 6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 7 | m.def("three_nn_wrapper", &three_nn_wrapper_fast, "three_nn_wrapper_fast"); 8 | m.def("three_interpolate_wrapper", &three_interpolate_wrapper_fast, "three_interpolate_wrapper_fast"); 9 | m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_fast, "three_interpolate_grad_wrapper_fast"); 10 | } 11 | -------------------------------------------------------------------------------- /mmdet/ops/points_op/__init__.py: -------------------------------------------------------------------------------- 1 | from .points_ops import * 2 | from mmdet.ops.points_op import points_op_cpu 3 | import torch 4 | 5 | def pts_in_boxes3d(pts, boxes3d): 6 | N = len(pts) 7 | M = len(boxes3d) 8 | pts_in_flag = torch.IntTensor(M, N).fill_(0) 9 | reg_target = torch.FloatTensor(N, 3).fill_(0) 10 | points_op_cpu.pts_in_boxes3d(pts.contiguous(), boxes3d.contiguous(), pts_in_flag, reg_target) 11 | return pts_in_flag, reg_target 12 | 13 | -------------------------------------------------------------------------------- /mmdet/ops/points_op/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension 3 | 4 | class get_pybind_include(object): 5 | """Helper class to determine the pybind11 include path 6 | The purpose of this class is to postpone importing pybind11 7 | until it is actually installed, so that the ``get_include()`` 8 | method can be invoked. """ 9 | 10 | def __init__(self, user=False): 11 | self.user = user 12 | 13 | def __str__(self): 14 | import pybind11 15 | return pybind11.get_include(self.user) 16 | 17 | ext_modules = [ 18 | CppExtension( 19 | name='points_op_cpu', 20 | sources = ['src/points_op.cpp'], 21 | extra_compile_args=['-g'], 22 | include_dirs=[ 23 | # Path to pybind11 headers 24 | get_pybind_include(), 25 | get_pybind_include(user=True) 26 | ], 27 | ), 28 | ] 29 | 30 | setup( 31 | name='cpplib', 32 | ext_modules=ext_modules, 33 | cmdclass={ 34 | 'build_ext': BuildExtension 35 | }) 36 | 37 | -------------------------------------------------------------------------------- /mmdet/version.py: -------------------------------------------------------------------------------- 1 | # GENERATED VERSION FILE 2 | # TIME: Thu Mar 7 20:30:16 2019 3 | 4 | __version__ = '0.5.4+a6ee053' 5 | short_version = '0.5.4' 6 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## SA-SSD: Structure Aware Single-stage 3D Object Detection from Point Cloud (CVPR 2020) [\[paper\]](https://www4.comp.polyu.edu.hk/~cslzhang/paper/SA-SSD.pdf) 2 | Currently 1st place in KITTI BEV and 3rd in KITTI 3D. The detector can run at 25 FPS. 3 | 4 | **Authors**: [Chenhang He](https://github.com/skyhehe123), [Zeng Hui](https://github.com/HuiZeng), Jianqiang Huang, Xiansheng Hua, [Lei Zhang](https://www4.comp.polyu.edu.hk/~cslzhang/). 5 | 6 | ## Updates 7 | 2020-04-13: Add one_cycle (with Adam) training as default scheduler. 8 | 9 | 2020-08-04: Multi-class training is supported. (The multi-class traning is not well tuned and will slightly deteriote the performance of model with single class training (i.e. each class has a individual model), please find the bellow AP@(11 recall points) for your reference.) 10 | ``` 11 | Car AP@0.70, 0.70, 0.70: 12 | bbox AP:98.96, 90.06, 89.52 13 | bev AP:90.59, 88.43, 87.49 14 | 3d AP:89.69, 79.41, 78.33 15 | aos AP:98.94, 89.89, 89.19 16 | Car AP@0.70, 0.50, 0.50: 17 | bbox AP:98.96, 90.06, 89.52 18 | bev AP:98.99, 90.13, 89.68 19 | 3d AP:98.97, 90.10, 89.63 20 | aos AP:98.94, 89.89, 89.19 21 | 22 | Pedestrian AP@0.50, 0.50, 0.50: 23 | bbox AP:62.88, 60.26, 53.58 24 | bev AP:58.52, 50.29, 44.10 25 | 3d AP:55.75, 48.01, 41.94 26 | aos AP:58.57, 55.19, 49.07 27 | Pedestrian AP@0.50, 0.25, 0.25: 28 | bbox AP:62.88, 60.26, 53.58 29 | bev AP:71.34, 62.80, 55.64 30 | 3d AP:71.33, 62.76, 55.60 31 | aos AP:58.57, 55.19, 49.07 32 | 33 | Cyclist AP@0.50, 0.50, 0.50: 34 | bbox AP:87.25, 73.74, 67.84 35 | bev AP:85.40, 70.48, 64.59 36 | 3d AP:82.80, 63.37, 61.60 37 | aos AP:86.93, 73.26, 67.41 38 | Cyclist AP@0.50, 0.25, 0.25: 39 | bbox AP:87.25, 73.74, 67.84 40 | bev AP:86.78, 71.55, 65.85 41 | 3d AP:86.78, 71.54, 65.85 42 | aos AP:86.93, 73.26, 67.41 43 | ``` 44 | 45 | ## Demo 46 | [![Demo](https://github.com/skyhehe123/SA-SSD/blob/master/doc/hqdefault.jpg)](https://www.youtube.com/watch?v=jrAb3ts4tAs) 47 | 48 | # Introduction 49 | ![model](https://github.com/skyhehe123/SA-SSD/blob/master/doc/model.png) 50 | Current single-stage detectors are efficient by progressively downscaling the 3D point clouds in a fully convolutional manner. However, the downscaled features inevitably lose spatial information and cannot make full use of the structure information of 3D point cloud, degrading their localization precision. In this work, we propose to improve the localization precision of single-stage detectors by explicitly leveraging the structure information of 3D point cloud. Specifically, we design an auxiliary network which converts the convolutional features in the backbone network back to point-level representations. The auxiliary network is jointly optimized, by two point-level supervisions, to guide the convolutional features in the backbone network to be aware of the object structure. The auxiliary network can be detached after training and therefore introduces no extra computation in the inference stage. Besides, considering that single-stage detectors suffer from the discordance between the predicted bounding boxes and corresponding classification confidences, we develop an efficient part-sensitive warping operation to align the confidences to the predicted bounding boxes. 51 | 52 | # Dependencies 53 | - `python3.5+` 54 | - `pytorch` (tested on 1.1.0) 55 | - `opencv` 56 | - `shapely` 57 | - `mayavi` 58 | - `spconv` (v1.0) 59 | 60 | # Installation 61 | 1. Clone this repository. 62 | 2. Compile C++/CUDA modules in mmdet/ops by running the following command at each directory, e.g. 63 | ```bash 64 | $ cd mmdet/ops/points_op 65 | $ python3 setup.py build_ext --inplace 66 | ``` 67 | 3. Setup following Environment variables, you may add them to ~/.bashrc: 68 | ```bash 69 | export NUMBAPRO_CUDA_DRIVER=/usr/lib/x86_64-linux-gnu/libcuda.so 70 | export NUMBAPRO_NVVM=/usr/local/cuda/nvvm/lib64/libnvvm.so 71 | export NUMBAPRO_LIBDEVICE=/usr/local/cuda/nvvm/libdevice 72 | export LD_LIBRARY_PATH=/home/billyhe/anaconda3/lib/python3.7/site-packages/spconv; 73 | ``` 74 | 75 | # Data Preparation 76 | 1. Download the 3D KITTI detection dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Data to download include: 77 | * Velodyne point clouds (29 GB): input data to VoxelNet 78 | * Training labels of object data set (5 MB): input label to VoxelNet 79 | * Camera calibration matrices of object data set (16 MB): for visualization of predictions 80 | * Left color images of object data set (12 GB): for visualization of predictions 81 | 82 | 2. Create cropped point cloud and sample pool for data augmentation, please refer to [SECOND](https://github.com/traveller59/second.pytorch). 83 | ```bash 84 | $ python3 tools/create_data.py 85 | ``` 86 | 87 | 3. Split the training set into training and validation set according to the protocol [here](https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz). 88 | ```plain 89 | └── DATA_DIR 90 | ├── training <-- training data 91 | | ├── image_2 92 | | ├── label_2 93 | | ├── velodyne 94 | | └── velodyne_reduced 95 | └── testing <--- testing data 96 | | ├── image_2 97 | | ├── label_2 98 | | ├── velodyne 99 | | └── velodyne_reduced 100 | ``` 101 | 102 | # Pretrained Model 103 | You can download the pretrained model [here](https://drive.google.com/file/d/1WJnJDMOeNKszdZH3P077wKXcoty7XOUb/view?usp=sharing), 104 | which is trained on the train split (3712 samples) and evaluated on the val split (3769 samples) and test split (7518 samples). 105 | The performance (using 40 recall poisitions) on validation set is as follows: 106 | ``` 107 | Car AP@0.70, 0.70, 0.70: 108 | bbox AP:99.12, 96.09, 93.61 109 | bev AP:96.55, 92.79, 90.32 110 | 3d AP:93.13, 84.54, 81.71 111 | ``` 112 | # Train 113 | To train the SA-SSD with single GPU, run the following command: 114 | ``` 115 | cd mmdet/tools 116 | python3 train.py ../configs/car_cfg.py 117 | ``` 118 | To train the SA-SSD with multiple GPUs, run the following command: 119 | ``` 120 | bash dist_train.sh 121 | ``` 122 | # Eval 123 | To evaluate the model, run the following command: 124 | ``` 125 | cd mmdet/tools 126 | python3 test.py ../configs/car_cfg.py ../saved_model_vehicle/epoch_50.pth 127 | ``` 128 | ## Citation 129 | If you find this work useful in your research, please consider cite: 130 | ``` 131 | @inproceedings{he2020sassd, 132 | title={Structure Aware Single-stage 3D Object Detection from Point Cloud}, 133 | author={He, Chenhang and Zeng, Hui and Huang, Jianqiang and Hua, Xian-Sheng and Zhang, Lei}, 134 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 135 | year={2020} 136 | } 137 | ``` 138 | 139 | ## Acknowledgement 140 | The code is devloped based on mmdetection, some part of codes are borrowed from SECOND and PointRCNN. 141 | * [mmdetection](https://github.com/open-mmlab/mmdetection) 142 | * [mmcv](https://github.com/open-mmlab/mmcv) 143 | * [second.pytorch](https://github.com/traveller59/second.pytorch) 144 | * [PointRCNN](https://github.com/sshaoshuai/PointRCNN) 145 | 146 | 147 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python"} 4 | 5 | $PYTHON -m torch.distributed.launch --nproc_per_node=2 $(dirname "$0")/train.py ../configs/car_cfg.py --launcher pytorch ${@:3} 6 | -------------------------------------------------------------------------------- /tools/env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import random 4 | import time 5 | import numpy as np 6 | import torch 7 | import torch.distributed as dist 8 | import torch.multiprocessing as mp 9 | from mmcv.runner import get_dist_info 10 | 11 | 12 | def init_dist(launcher, backend='nccl', **kwargs): 13 | if mp.get_start_method(allow_none=True) is None: 14 | mp.set_start_method('spawn') 15 | if launcher == 'pytorch': 16 | _init_dist_pytorch(backend, **kwargs) 17 | elif launcher == 'mpi': 18 | _init_dist_mpi(backend, **kwargs) 19 | elif launcher == 'slurm': 20 | _init_dist_slurm(backend, **kwargs) 21 | else: 22 | raise ValueError('Invalid launcher type: {}'.format(launcher)) 23 | 24 | 25 | def _init_dist_pytorch(backend, **kwargs): 26 | # TODO: use local_rank instead of rank % num_gpus 27 | rank = int(os.environ['RANK']) 28 | num_gpus = torch.cuda.device_count() 29 | torch.cuda.set_device(rank % num_gpus) 30 | dist.init_process_group(backend=backend, **kwargs) 31 | 32 | 33 | def _init_dist_mpi(backend, **kwargs): 34 | raise NotImplementedError 35 | 36 | 37 | def _init_dist_slurm(backend, **kwargs): 38 | raise NotImplementedError 39 | 40 | 41 | def set_random_seed(seed): 42 | random.seed(seed) 43 | np.random.seed(seed) 44 | torch.manual_seed(seed) 45 | torch.cuda.manual_seed_all(seed) 46 | 47 | 48 | def get_root_logger(work_dir): 49 | logging.basicConfig( 50 | format='%(asctime)s - %(levelname)s - %(message)s', 51 | level=logging.INFO) 52 | 53 | logger = logging.getLogger() 54 | rank, _ = get_dist_info() 55 | if rank != 0: 56 | logger.setLevel('ERROR') 57 | 58 | filename = '{}.log'.format(time.strftime('%Y%m%d_%H%M%S', time.localtime())) 59 | log_file = os.path.join(work_dir, filename) 60 | file_handler = logging.FileHandler(log_file, 'w') 61 | file_handler.setLevel(logging.INFO) 62 | logger.addHandler(file_handler) 63 | 64 | return logger 65 | -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | sys.path.append('/home/billyhe/SA-SSD') 4 | import torch 5 | import mmcv 6 | from mmcv.runner import load_checkpoint, parallel_test 7 | from mmcv.parallel import scatter, collate, MMDataParallel 8 | from mmdet.core.evaluation.kitti_eval import get_official_eval_result 9 | from mmdet.core import results2json, coco_eval 10 | from mmdet.datasets import build_dataloader 11 | from mmdet.models import build_detector, detectors 12 | import tools.kitti_common as kitti 13 | import numpy as np 14 | import torch.utils.data 15 | import os 16 | from tools.train_utils import load_params_from_file 17 | from mmdet.datasets import utils 18 | 19 | def single_test(model, data_loader, saveto=None, class_names=['Car']): 20 | template = '{} ' + ' '.join(['{:.4f}' for _ in range(15)]) + '\n' 21 | if saveto is not None: 22 | mmcv.mkdir_or_exist(saveto) 23 | 24 | model.eval() 25 | annos = [] 26 | 27 | prog_bar = mmcv.ProgressBar(len(data_loader)) 28 | 29 | for i, data in enumerate(data_loader): 30 | with torch.no_grad(): 31 | results = model(return_loss=False, **data) 32 | annos+=results 33 | # image_shape = (375,1242) 34 | # for re in results: 35 | # img_idx = re['image_idx'] 36 | # if re['bbox'] is not None: 37 | # box2d = re['bbox'] 38 | # box3d = re['box3d_camera'] 39 | # labels = re['label_preds'] 40 | # scores = re['scores'] 41 | # alphas = re['alphas'] 42 | # anno = kitti.get_start_result_anno() 43 | # num_example = 0 44 | # for bbox2d, bbox3d, label, score, alpha in zip(box2d, box3d, labels, scores, alphas): 45 | # if bbox2d[0] > image_shape[1] or bbox2d[1] > image_shape[0]: 46 | # continue 47 | # if bbox2d[2] < 0 or bbox2d[3] < 0: 48 | # continue 49 | # bbox2d[2:] = np.minimum(bbox2d[2:], image_shape[::-1]) 50 | # bbox2d[:2] = np.maximum(bbox2d[:2], [0, 0]) 51 | # anno["name"].append(class_names[int(label)]) 52 | # anno["truncated"].append(0.0) 53 | # anno["occluded"].append(0) 54 | # # anno["alpha"].append(-10) 55 | # anno["alpha"].append(alpha) 56 | # anno["bbox"].append(bbox2d) 57 | # # anno["dimensions"].append(np.array([-1,-1,-1])) 58 | # anno["dimensions"].append(bbox3d[[3, 4, 5]]) 59 | # # anno["location"].append(np.array([-1000,-1000,-1000])) 60 | # anno["location"].append(bbox3d[:3]) 61 | # # anno["rotation_y"].append(-10) 62 | # anno["rotation_y"].append(bbox3d[6]) 63 | # anno["score"].append(score) 64 | # num_example += 1 65 | # if num_example != 0: 66 | # if saveto is not None: 67 | # of_path = os.path.join(saveto, '%06d.txt' % img_idx) 68 | # with open(of_path, 'w+') as f: 69 | # for name, bbox, dim, loc, ry, score, alpha in zip(anno['name'], anno["bbox"], \ 70 | # anno["dimensions"], anno["location"], anno["rotation_y"], anno["score"],anno["alpha"]): 71 | # line = template.format(name, 0, 0, alpha, *bbox, *dim[[1,2,0]], *loc, ry, score) 72 | # f.write(line) 73 | # 74 | # anno = {n: np.stack(v) for n, v in anno.items()} 75 | # annos.append(anno) 76 | # else: 77 | # if saveto is not None: 78 | # of_path = os.path.join(saveto, '%06d.txt' % img_idx) 79 | # f = open(of_path, 'w+') 80 | # f.close() 81 | # annos.append(kitti.empty_result_anno()) 82 | # else: 83 | # if saveto is not None: 84 | # of_path = os.path.join(saveto, '%06d.txt' % img_idx) 85 | # f = open(of_path, 'w+') 86 | # f.close() 87 | # annos.append(kitti.empty_result_anno()) 88 | # 89 | # num_example = annos[-1]["name"].shape[0] 90 | # annos[-1]["image_idx"] = np.array( 91 | # [img_idx] * num_example, dtype=np.int64) 92 | prog_bar.update() 93 | 94 | return annos 95 | 96 | 97 | def _data_func(data, device_id): 98 | data = scatter(collate([data], samples_per_gpu=1), [device_id])[0] 99 | return dict(return_loss=False, rescale=True, **data) 100 | 101 | 102 | def parse_args(): 103 | parser = argparse.ArgumentParser(description='MMDet test detector') 104 | parser.add_argument('config', help='test config file path') 105 | parser.add_argument('checkpoint', help='checkpoint file') 106 | parser.add_argument( 107 | '--gpus', default=1, type=int, help='GPU number used for testing') 108 | parser.add_argument( 109 | '--proc_per_gpu', 110 | default=1, 111 | type=int, 112 | help='Number of processes per GPU') 113 | parser.add_argument('--out', help='output result file') 114 | parser.add_argument( 115 | '--eval', 116 | type=str, 117 | nargs='+', 118 | choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'], 119 | help='eval types') 120 | parser.add_argument('--show', action='store_true', help='show results') 121 | args = parser.parse_args() 122 | return args 123 | 124 | 125 | def main(): 126 | args = parse_args() 127 | 128 | cfg = mmcv.Config.fromfile(args.config) 129 | cfg.model.pretrained = None 130 | 131 | dataset = utils.get_dataset(cfg.data.val) 132 | class_names = cfg.data.val.class_names 133 | 134 | if args.gpus == 1: 135 | model = build_detector( 136 | cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) 137 | 138 | print("Evaluate on", cfg.data.val.class_names) 139 | setattr(model, 'class_names', class_names) 140 | 141 | #load_checkpoint(model, args.checkpoint) 142 | model = MMDataParallel(model, device_ids=[0]) 143 | load_params_from_file(model, args.checkpoint) 144 | data_loader = build_dataloader( 145 | dataset, 146 | 1, 147 | cfg.data.workers_per_gpu, 148 | num_gpus=1, 149 | shuffle=False, 150 | dist=False) 151 | outputs = single_test(model, data_loader, args.out) 152 | else: 153 | NotImplementedError 154 | 155 | # kitti evaluation 156 | gt_annos = kitti.get_label_annos(dataset.label_prefix, dataset.sample_ids) 157 | result = get_official_eval_result(gt_annos, outputs, current_classes=class_names) 158 | print(result) 159 | 160 | 161 | if __name__ == '__main__': 162 | main() 163 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import argparse 3 | import sys 4 | sys.path.append('/home/billyhe/SA-SSD') 5 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 6 | from mmdet.datasets import build_dataloader 7 | from tools.env import get_root_logger, init_dist, set_random_seed 8 | from tools.train_utils import train_model 9 | import pathlib 10 | from mmcv import Config 11 | from mmdet.datasets import get_dataset 12 | from mmdet.models import build_detector 13 | from tools.train_utils.optimization import build_optimizer, build_scheduler 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description='Train a detector') 17 | parser.add_argument('config', help='train config file path') 18 | parser.add_argument('--work_dir', help='the dir to save logs and models') 19 | parser.add_argument( 20 | '--validate', 21 | action='store_true', 22 | help='whether to evaluate the checkpoint during training') 23 | parser.add_argument( 24 | '--gpus', 25 | type=int, 26 | default=1, 27 | help='number of gpus to use ' 28 | '(only applicable to non-distributed training)') 29 | parser.add_argument('--seed', type=int, default=0, help='random seed') 30 | parser.add_argument( 31 | '--launcher', 32 | choices=['none', 'pytorch', 'slurm', 'mpi'], 33 | default='none', 34 | help='job launcher') 35 | parser.add_argument('--local_rank', type=int, default=0) 36 | parser.add_argument('--max_ckpt_save_num', type=int, default=10) 37 | 38 | args = parser.parse_args() 39 | 40 | return args 41 | 42 | 43 | 44 | def main(): 45 | 46 | args = parse_args() 47 | 48 | cfg = Config.fromfile(args.config) 49 | 50 | if args.work_dir is not None: 51 | cfg.work_dir = args.work_dir 52 | 53 | pathlib.Path(cfg.work_dir).mkdir(parents=True, exist_ok=True) 54 | 55 | cfg.gpus = args.gpus 56 | 57 | # init distributed env first, since logger depends on the dist info. 58 | if args.launcher == 'none': 59 | distributed = False 60 | else: 61 | distributed = True 62 | init_dist(args.launcher, **cfg.dist_params) 63 | 64 | # init logger before other steps 65 | logger = get_root_logger(cfg.work_dir) 66 | 67 | logger.info('Distributed training: {}'.format(distributed)) 68 | 69 | # set random seeds 70 | if args.seed is not None: 71 | logger.info('Set random seed to {}'.format(args.seed)) 72 | set_random_seed(args.seed) 73 | 74 | model = build_detector( 75 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) 76 | 77 | if distributed: 78 | model = MMDistributedDataParallel(model.cuda()) 79 | else: 80 | model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda() 81 | 82 | train_dataset = get_dataset(cfg.data.train) 83 | 84 | optimizer = build_optimizer(model, cfg.optimizer) 85 | 86 | train_loader = build_dataloader( 87 | train_dataset, 88 | cfg.data.imgs_per_gpu, 89 | cfg.data.workers_per_gpu, 90 | dist=distributed) 91 | 92 | start_epoch = it = 0 93 | last_epoch = -1 94 | 95 | lr_scheduler, lr_warmup_scheduler = build_scheduler( 96 | optimizer, total_iters_each_epoch=len(train_loader), total_epochs=cfg.total_epochs, 97 | last_epoch=last_epoch, optim_cfg=cfg.optimizer, lr_cfg=cfg.lr_config 98 | ) 99 | # -----------------------start training--------------------------- 100 | logger.info('**********************Start training**********************') 101 | 102 | train_model( 103 | model, 104 | optimizer, 105 | train_loader, 106 | lr_scheduler=lr_scheduler, 107 | optim_cfg=cfg.optimizer, 108 | start_epoch=start_epoch, 109 | total_epochs=cfg.total_epochs, 110 | start_iter=it, 111 | rank=args.local_rank, 112 | logger = logger, 113 | ckpt_save_dir=cfg.work_dir, 114 | lr_warmup_scheduler=lr_warmup_scheduler, 115 | ckpt_save_interval=cfg.checkpoint_config.interval, 116 | max_ckpt_save_num=args.max_ckpt_save_num, 117 | log_interval = cfg.log_config.interval 118 | ) 119 | 120 | logger.info('**********************End training**********************') 121 | 122 | 123 | 124 | 125 | 126 | if __name__ == '__main__': 127 | main() 128 | -------------------------------------------------------------------------------- /tools/train_utils/optimization/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | from functools import partial 5 | from .fastai_optim import OptimWrapper 6 | from .learning_schedules_fastai import OneCycle, CosineWarmupLR 7 | 8 | 9 | def build_optimizer(model, optim_cfg): 10 | if optim_cfg.type == 'adam': 11 | optimizer = optim.Adam(model.parameters(), lr=optim_cfg.lr, weight_decay=optim_cfg.weight_decay) 12 | elif optim_cfg.type == 'sgd': 13 | optimizer = optim.SGD( 14 | model.parameters(), lr=optim_cfg.lr, weight_decay=optim_cfg.weight_decay, 15 | momentum=optim_cfg.momentum 16 | ) 17 | elif optim_cfg.type == 'adam_onecycle': 18 | def children(m: nn.Module): 19 | return list(m.children()) 20 | 21 | def num_children(m: nn.Module) -> int: 22 | return len(children(m)) 23 | 24 | flatten_model = lambda m: sum(map(flatten_model, m.children()), []) if num_children(m) else [m] 25 | get_layer_groups = lambda m: [nn.Sequential(*flatten_model(m))] 26 | 27 | optimizer_func = partial(optim.Adam, betas=(0.9, 0.99)) 28 | optimizer = OptimWrapper.create( 29 | optimizer_func, optim_cfg.lr, get_layer_groups(model), wd=optim_cfg.weight_decay, true_wd=True, bn_wd=True 30 | ) 31 | else: 32 | raise NotImplementedError 33 | 34 | return optimizer 35 | 36 | 37 | def build_scheduler(optimizer, total_iters_each_epoch, total_epochs, last_epoch, optim_cfg, lr_cfg): 38 | 39 | lr_warmup_scheduler = None 40 | total_steps = total_iters_each_epoch * total_epochs 41 | 42 | if lr_cfg.policy == 'onecycle': 43 | lr_scheduler = OneCycle( 44 | optimizer, total_steps, optim_cfg.lr, list(lr_cfg.moms), lr_cfg.div_factor, lr_cfg.pct_start 45 | ) 46 | 47 | elif lr_cfg.policy == 'cosine': 48 | lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, total_steps, last_epoch=last_epoch) 49 | 50 | elif lr_cfg.policy == 'step': 51 | 52 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, lr_cfg.step, last_epoch=last_epoch) 53 | 54 | else: 55 | raise NotImplementedError 56 | 57 | if 'warmup' in lr_cfg: 58 | lr_warmup_scheduler = CosineWarmupLR( 59 | optimizer, T_max=lr_cfg.warmup_iters, 60 | eta_min=optim_cfg.lr * lr_cfg.warmup_ratio 61 | ) 62 | 63 | return lr_scheduler, lr_warmup_scheduler 64 | -------------------------------------------------------------------------------- /tools/train_utils/optimization/learning_schedules_fastai.py: -------------------------------------------------------------------------------- 1 | # This file is modified from https://github.com/traveller59/second.pytorch 2 | 3 | import numpy as np 4 | import math 5 | from functools import partial 6 | import torch.optim.lr_scheduler as lr_sched 7 | from .fastai_optim import OptimWrapper 8 | 9 | 10 | class LRSchedulerStep(object): 11 | def __init__(self, fai_optimizer: OptimWrapper, total_step, lr_phases, 12 | mom_phases): 13 | # if not isinstance(fai_optimizer, OptimWrapper): 14 | # raise TypeError('{} is not a fastai OptimWrapper'.format( 15 | # type(fai_optimizer).__name__)) 16 | self.optimizer = fai_optimizer 17 | self.total_step = total_step 18 | self.lr_phases = [] 19 | 20 | for i, (start, lambda_func) in enumerate(lr_phases): 21 | if len(self.lr_phases) != 0: 22 | assert self.lr_phases[-1][0] < start 23 | if isinstance(lambda_func, str): 24 | lambda_func = eval(lambda_func) 25 | if i < len(lr_phases) - 1: 26 | self.lr_phases.append((int(start * total_step), int(lr_phases[i + 1][0] * total_step), lambda_func)) 27 | else: 28 | self.lr_phases.append((int(start * total_step), total_step, lambda_func)) 29 | assert self.lr_phases[0][0] == 0 30 | self.mom_phases = [] 31 | for i, (start, lambda_func) in enumerate(mom_phases): 32 | if len(self.mom_phases) != 0: 33 | assert self.mom_phases[-1][0] < start 34 | if isinstance(lambda_func, str): 35 | lambda_func = eval(lambda_func) 36 | if i < len(mom_phases) - 1: 37 | self.mom_phases.append((int(start * total_step), int(mom_phases[i + 1][0] * total_step), lambda_func)) 38 | else: 39 | self.mom_phases.append((int(start * total_step), total_step, lambda_func)) 40 | assert self.mom_phases[0][0] == 0 41 | 42 | def step(self, step): 43 | for start, end, func in self.lr_phases: 44 | if step >= start: 45 | self.optimizer.lr = func((step - start) / (end - start)) 46 | for start, end, func in self.mom_phases: 47 | if step >= start: 48 | self.optimizer.mom = func((step - start) / (end - start)) 49 | 50 | 51 | def annealing_cos(start, end, pct): 52 | # print(pct, start, end) 53 | "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0." 54 | cos_out = np.cos(np.pi * pct) + 1 55 | return end + (start - end) / 2 * cos_out 56 | 57 | 58 | class OneCycle(LRSchedulerStep): 59 | def __init__(self, fai_optimizer, total_step, lr_max, moms, div_factor, 60 | pct_start): 61 | self.lr_max = lr_max 62 | self.moms = moms 63 | self.div_factor = div_factor 64 | self.pct_start = pct_start 65 | a1 = int(total_step * self.pct_start) 66 | a2 = total_step - a1 67 | low_lr = self.lr_max / self.div_factor 68 | lr_phases = ((0, partial(annealing_cos, low_lr, self.lr_max)), 69 | (self.pct_start, 70 | partial(annealing_cos, self.lr_max, low_lr / 1e4))) 71 | mom_phases = ((0, partial(annealing_cos, *self.moms)), 72 | (self.pct_start, partial(annealing_cos, 73 | *self.moms[::-1]))) 74 | fai_optimizer.lr, fai_optimizer.mom = low_lr, self.moms[0] 75 | super().__init__(fai_optimizer, total_step, lr_phases, mom_phases) 76 | 77 | 78 | class CosineWarmupLR(lr_sched._LRScheduler): 79 | def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1): 80 | self.T_max = T_max 81 | self.eta_min = eta_min 82 | super(CosineWarmupLR, self).__init__(optimizer, last_epoch) 83 | 84 | def get_lr(self): 85 | return [self.eta_min + (base_lr - self.eta_min) * 86 | (1 - math.cos(math.pi * self.last_epoch / self.T_max)) / 2 87 | for base_lr in self.base_lrs] 88 | 89 | 90 | class FakeOptim: 91 | def __init__(self): 92 | self.lr = 0 93 | self.mom = 0 94 | 95 | 96 | if __name__ == "__main__": 97 | import matplotlib.pyplot as plt 98 | 99 | opt = FakeOptim() # 3e-3, wd=0.4, div_factor=10 100 | schd = OneCycle(opt, 100, 3e-3, (0.95, 0.85), 10.0, 0.1) 101 | 102 | lrs = [] 103 | moms = [] 104 | for i in range(100): 105 | schd.step(i) 106 | lrs.append(opt.lr) 107 | moms.append(opt.mom) 108 | plt.plot(lrs) 109 | # plt.plot(moms) 110 | plt.show() 111 | plt.plot(moms) 112 | plt.show() 113 | --------------------------------------------------------------------------------