├── .gitignore
├── .idea
├── .gitignore
├── SA-SSD.iml
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── configs
├── car_cfg.py
└── multi_cfg.py
├── doc
├── hqdefault.jpg
└── model.png
├── mmdet
├── __init__.py
├── core
│ ├── __init__.py
│ ├── anchor
│ │ ├── __init__.py
│ │ ├── anchor3d_generator.py
│ │ ├── anchor_generator.py
│ │ └── anchor_target.py
│ ├── bbox
│ │ ├── __init__.py
│ │ ├── assignment.py
│ │ ├── bbox_target.py
│ │ ├── geometry.py
│ │ ├── sampling.py
│ │ └── transforms.py
│ ├── bbox3d
│ │ ├── __init__.py
│ │ ├── bbox3d_target.py
│ │ ├── box_coders.py
│ │ ├── geometry.py
│ │ ├── region_similarity.py
│ │ └── target_ops.py
│ ├── evaluation
│ │ ├── __init__.py
│ │ ├── bbox_overlaps.py
│ │ ├── class_names.py
│ │ ├── coco_utils.py
│ │ ├── eval_hooks.py
│ │ ├── kitti_eval.py
│ │ ├── mean_ap.py
│ │ └── recall.py
│ ├── loss
│ │ ├── __init__.py
│ │ └── losses.py
│ ├── mask
│ │ ├── __init__.py
│ │ ├── mask_target.py
│ │ └── utils.py
│ ├── point_cloud
│ │ ├── __init__.py
│ │ ├── point_augmentor.py
│ │ └── voxel_generator.py
│ ├── post_processing
│ │ ├── __init__.py
│ │ ├── bbox_nms.py
│ │ ├── merge_augs.py
│ │ └── rotate_nms_gpu.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── dist_utils.py
│ │ └── misc.py
├── datasets
│ ├── __init__.py
│ ├── coco.py
│ ├── concat_dataset.py
│ ├── custom.py
│ ├── kitti.py
│ ├── kitti_utils.py
│ ├── loader
│ │ ├── __init__.py
│ │ ├── build_loader.py
│ │ └── sampler.py
│ ├── transforms.py
│ ├── utils.py
│ ├── voc.py
│ └── xml_style.py
├── models
│ ├── __init__.py
│ ├── backbones
│ │ ├── __init__.py
│ │ ├── pillar.py
│ │ ├── resnet.py
│ │ └── vxnet.py
│ ├── bbox_heads
│ │ ├── __init__.py
│ │ ├── bbox_head.py
│ │ └── convfc_bbox_head.py
│ ├── builder.py
│ ├── detectors
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── pointpillars.py
│ │ ├── rpn.py
│ │ ├── single_stage.py
│ │ └── test_mixins.py
│ ├── mask_heads
│ │ ├── __init__.py
│ │ └── fcn_mask_head.py
│ ├── necks
│ │ ├── __init__.py
│ │ ├── cmn.py
│ │ ├── fpn.py
│ │ └── rpn.py
│ ├── roi_extractors
│ │ ├── __init__.py
│ │ └── single_level.py
│ ├── rpn_heads
│ │ ├── __init__.py
│ │ └── rpn_head.py
│ ├── single_stage_heads
│ │ ├── __init__.py
│ │ ├── retina_head.py
│ │ └── ssd_rotate_head.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── conv_module.py
│ │ ├── empty.py
│ │ ├── norm.py
│ │ ├── sequential.py
│ │ └── weight_init.py
├── ops
│ ├── __init__.py
│ ├── iou3d
│ │ ├── iou3d_utils.py
│ │ ├── setup.py
│ │ └── src
│ │ │ ├── iou3d.cpp
│ │ │ └── iou3d_kernel.cu
│ ├── pointnet2
│ │ ├── pointnet2_utils.py
│ │ ├── setup.py
│ │ └── src
│ │ │ ├── cuda_utils.h
│ │ │ ├── interpolate.cpp
│ │ │ ├── interpolate_gpu.cu
│ │ │ ├── interpolate_gpu.h
│ │ │ └── pointnet2_api.cpp
│ └── points_op
│ │ ├── __init__.py
│ │ ├── points_ops.py
│ │ ├── setup.py
│ │ └── src
│ │ └── points_op.cpp
└── version.py
├── readme.md
└── tools
├── create_data.py
├── dist_train.sh
├── env.py
├── kitti_common.py
├── test.py
├── train.py
└── train_utils
├── __init__.py
└── optimization
├── __init__.py
├── fastai_optim.py
└── learning_schedules_fastai.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Compiled source #
2 | ###################
3 | *.com
4 | *.class
5 | *.dll
6 | *.exe
7 | *.o
8 | *.so
9 | *.pyc
10 |
11 | # Packages #
12 | ############
13 | # it's better to unpack these files and commit the raw source
14 | # git has its own built in compression methods
15 | *.7z
16 | *.dmg
17 | *.gz
18 | *.iso
19 | *.jar
20 | *.rar
21 | *.tar
22 | *.zip
23 |
24 |
25 | # Specific directory #
26 | saved_model_vehicle/
27 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml
--------------------------------------------------------------------------------
/.idea/SA-SSD.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/doc/hqdefault.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skyhehe123/SA-SSD/2d75c973af65453186bd9242d7fa5e62dc44ec03/doc/hqdefault.jpg
--------------------------------------------------------------------------------
/doc/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skyhehe123/SA-SSD/2d75c973af65453186bd9242d7fa5e62dc44ec03/doc/model.png
--------------------------------------------------------------------------------
/mmdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 |
3 | __all__ = ['__version__', 'short_version']
4 |
--------------------------------------------------------------------------------
/mmdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import * # noqa: F401, F403
2 | from .bbox import * # noqa: F401, F403
3 | from .mask import * # noqa: F401, F403
4 | from .loss import * # noqa: F401, F403
5 | from .evaluation import * # noqa: F401, F403
6 | from .post_processing import * # noqa: F401, F403
7 | from .utils import * # noqa: F401, F403
8 |
--------------------------------------------------------------------------------
/mmdet/core/anchor/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_generator import AnchorGenerator
2 | from .anchor_target import anchor_target
3 |
4 | __all__ = ['AnchorGenerator', 'anchor_target']
5 |
--------------------------------------------------------------------------------
/mmdet/core/anchor/anchor3d_generator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def create_anchors_3d_stride(feature_size,
4 | sizes=[1.6, 3.9, 1.56],
5 | anchor_strides=[0.4, 0.4, 0.0],
6 | anchor_offsets=[0.2, -39.8, -1.78],
7 | rotations=[0, np.pi / 2],
8 | dtype=np.float32):
9 | """
10 | Args:
11 | feature_size: list [D, H, W](zyx)
12 | sizes: [N, 3] list of list or array, size of anchors, xyz
13 |
14 | Returns:
15 | anchors: [*feature_size, num_sizes, num_rots, 7] tensor.
16 | """
17 | # almost 2x faster than v1
18 | x_stride, y_stride, z_stride = anchor_strides
19 | x_offset, y_offset, z_offset = anchor_offsets
20 | z_centers = np.arange(feature_size[0], dtype=dtype)
21 | y_centers = np.arange(feature_size[1], dtype=dtype)
22 | x_centers = np.arange(feature_size[2], dtype=dtype)
23 | z_centers = z_centers * z_stride + z_offset
24 | y_centers = y_centers * y_stride + y_offset
25 | x_centers = x_centers * x_stride + x_offset
26 | sizes = np.reshape(np.array(sizes, dtype=dtype), [-1, 3])
27 | rotations = np.array(rotations, dtype=dtype)
28 | rets = np.meshgrid(
29 | x_centers, y_centers, z_centers, rotations, indexing='ij')
30 | tile_shape = [1] * 5
31 | tile_shape[-2] = int(sizes.shape[0])
32 | for i in range(len(rets)):
33 | rets[i] = np.tile(rets[i][..., np.newaxis, :], tile_shape)
34 | rets[i] = rets[i][..., np.newaxis] # for concat
35 | sizes = np.reshape(sizes, [1, 1, 1, -1, 1, 3])
36 | tile_size_shape = list(rets[0].shape)
37 | tile_size_shape[3] = 1
38 | sizes = np.tile(sizes, tile_size_shape)
39 | rets.insert(3, sizes)
40 | ret = np.concatenate(rets, axis=-1)
41 | return np.transpose(ret, [2, 1, 0, 3, 4, 5])
42 |
43 |
44 | def create_anchors_3d_range(feature_size,
45 | anchor_range,
46 | sizes=[1.6, 3.9, 1.56],
47 | rotations=[0, np.pi / 2],
48 | dtype=np.float32):
49 | """
50 | Args:
51 | feature_size: list [D, H, W](zyx)
52 | sizes: [N, 3] list of list or array, size of anchors, xyz
53 |
54 | Returns:
55 | anchors: [*feature_size, num_sizes, num_rots, 7] tensor.
56 | """
57 | anchor_range = np.array(anchor_range, dtype)
58 | z_centers = np.linspace(
59 | anchor_range[2], anchor_range[5], feature_size[0], dtype=dtype)
60 | y_centers = np.linspace(
61 | anchor_range[1], anchor_range[4], feature_size[1], dtype=dtype)
62 | x_centers = np.linspace(
63 | anchor_range[0], anchor_range[3], feature_size[2], dtype=dtype)
64 | sizes = np.reshape(np.array(sizes, dtype=dtype), [-1, 3])
65 | rotations = np.array(rotations, dtype=dtype)
66 | rets = np.meshgrid(
67 | x_centers, y_centers, z_centers, rotations, indexing='ij')
68 | tile_shape = [1] * 5
69 | tile_shape[-2] = int(sizes.shape[0])
70 | for i in range(len(rets)):
71 | rets[i] = np.tile(rets[i][..., np.newaxis, :], tile_shape)
72 | rets[i] = rets[i][..., np.newaxis] # for concat
73 | sizes = np.reshape(sizes, [1, 1, 1, -1, 1, 3])
74 | tile_size_shape = list(rets[0].shape)
75 | tile_size_shape[3] = 1
76 | sizes = np.tile(sizes, tile_size_shape)
77 | rets.insert(3, sizes)
78 | ret = np.concatenate(rets, axis=-1)
79 | return np.transpose(ret, [2, 1, 0, 3, 4, 5])
80 |
81 | class AnchorGeneratorStride:
82 | def __init__(self,
83 | sizes=[1.6, 3.9, 1.56],
84 | anchor_strides=[0.4, 0.4, 1.0],
85 | anchor_offsets=[0.2, -39.8, -1.78],
86 | rotations=[0, np.pi / 2],
87 | dtype=np.float32):
88 | self._sizes = sizes
89 | self._anchor_strides = anchor_strides
90 | self._anchor_offsets = anchor_offsets
91 | self._rotations = rotations
92 | self._dtype = dtype
93 |
94 | @property
95 | def num_anchors_per_localization(self):
96 | num_rot = len(self._rotations)
97 | num_size = np.array(self._sizes).reshape([-1, 3]).shape[0]
98 | return num_rot * num_size
99 |
100 | def __call__(self, feature_map_size):
101 | return create_anchors_3d_stride(
102 | feature_map_size, self._sizes, self._anchor_strides,
103 | self._anchor_offsets, self._rotations, self._dtype)
104 |
105 | class AnchorGeneratorRange:
106 | def __init__(self,
107 | anchor_ranges,
108 | sizes=[1.6, 3.9, 1.56],
109 | rotations=[0, np.pi / 2],
110 | dtype=np.float32):
111 | self._sizes = sizes
112 | self._anchor_ranges = anchor_ranges
113 | self._rotations = rotations
114 | self._dtype = dtype
115 |
116 | @property
117 | def num_anchors_per_localization(self):
118 | num_rot = len(self._rotations)
119 | num_size = np.array(self._sizes).reshape([-1, 3]).shape[0]
120 | return num_rot * num_size
121 |
122 | def __call__(self, feature_map_size):
123 | return create_anchors_3d_range(
124 | feature_map_size, self._anchor_ranges, self._sizes,
125 | self._rotations, self._dtype)
126 |
--------------------------------------------------------------------------------
/mmdet/core/anchor/anchor_generator.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class AnchorGenerator(object):
5 |
6 | def __init__(self, base_size, scales, ratios, scale_major=True):
7 | self.base_size = base_size
8 | self.scales = torch.Tensor(scales)
9 | self.ratios = torch.Tensor(ratios)
10 | self.scale_major = scale_major
11 | self.base_anchors = self.gen_base_anchors()
12 |
13 | @property
14 | def num_base_anchors(self):
15 | return self.base_anchors.size(0)
16 |
17 | def gen_base_anchors(self):
18 | base_anchor = torch.Tensor(
19 | [0, 0, self.base_size - 1, self.base_size - 1])
20 |
21 | w = base_anchor[2] - base_anchor[0] + 1
22 | h = base_anchor[3] - base_anchor[1] + 1
23 | x_ctr = base_anchor[0] + 0.5 * (w - 1)
24 | y_ctr = base_anchor[1] + 0.5 * (h - 1)
25 |
26 | h_ratios = torch.sqrt(self.ratios)
27 | w_ratios = 1 / h_ratios
28 | if self.scale_major:
29 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
30 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
31 | else:
32 | ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
33 | hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)
34 |
35 | base_anchors = torch.stack(
36 | [
37 | x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
38 | x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
39 | ],
40 | dim=-1).round()
41 |
42 | return base_anchors
43 |
44 | def _meshgrid(self, x, y, row_major=True):
45 | xx = x.repeat(len(y))
46 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
47 | if row_major:
48 | return xx, yy
49 | else:
50 | return yy, xx
51 |
52 | def grid_anchors(self, featmap_size, stride=16, device='cuda'):
53 | base_anchors = self.base_anchors.to(device)
54 |
55 | feat_h, feat_w = featmap_size
56 | shift_x = torch.arange(0, feat_w, device=device) * stride
57 | shift_y = torch.arange(0, feat_h, device=device) * stride
58 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
59 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
60 | shifts = shifts.type_as(base_anchors)
61 | # first feat_w elements correspond to the first row of shifts
62 | # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
63 | # shifted anchors (K, A, 4), reshape to (K*A, 4)
64 |
65 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
66 | all_anchors = all_anchors.view(-1, 4)
67 | # first A rows correspond to A anchors of (0, 0) in feature map,
68 | # then (0, 1), (0, 2), ...
69 | return all_anchors
70 |
71 | def valid_flags(self, featmap_size, valid_size, device='cuda'):
72 | feat_h, feat_w = featmap_size
73 | valid_h, valid_w = valid_size
74 | assert valid_h <= feat_h and valid_w <= feat_w
75 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
76 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
77 | valid_x[:valid_w] = 1
78 | valid_y[:valid_h] = 1
79 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
80 | valid = valid_xx & valid_yy
81 | valid = valid[:, None].expand(
82 | valid.size(0), self.num_base_anchors).contiguous().view(-1)
83 | return valid
84 |
--------------------------------------------------------------------------------
/mmdet/core/anchor/anchor_target.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from ..bbox import assign_and_sample, BBoxAssigner, SamplingResult, bbox2delta, rbbox3d2delta
3 |
4 | def anchor_target(flat_anchors,
5 | inside_flags,
6 | gt_bboxes,
7 | gt_labels,
8 | target_means,
9 | target_stds,
10 | cfg,
11 | cls_out_channels=1,
12 | sampling=True):
13 |
14 | # assign gt and sample anchors
15 |
16 | anchors = flat_anchors[inside_flags]
17 |
18 | if sampling:
19 | assign_result, sampling_result = assign_and_sample(
20 | anchors, gt_bboxes, None, None, cfg)
21 | else:
22 | bbox_assigner = BBoxAssigner(**cfg.assigner)
23 | assign_result = bbox_assigner.assign(anchors, gt_bboxes, None, gt_labels)
24 | pos_inds = torch.nonzero(
25 | assign_result.gt_inds > 0).squeeze(-1).unique()
26 | neg_inds = torch.nonzero(
27 | assign_result.gt_inds == 0).squeeze(-1).unique()
28 | gt_flags = anchors.new_zeros(anchors.shape[0], dtype=torch.uint8)
29 | sampling_result = SamplingResult(pos_inds, neg_inds, anchors,
30 | gt_bboxes, assign_result, gt_flags)
31 |
32 | num_valid_anchors = anchors.shape[0]
33 | bbox_targets = torch.zeros_like(anchors)
34 | bbox_weights = torch.zeros_like(anchors)
35 | labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)
36 | label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
37 | dir_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
38 |
39 | pos_inds = sampling_result.pos_inds
40 | neg_inds = sampling_result.neg_inds
41 | if len(pos_inds) > 0:
42 | pos_bbox_targets = rbbox3d2delta(sampling_result.pos_bboxes,
43 | sampling_result.pos_gt_bboxes,
44 | target_means, target_stds)
45 | bbox_targets[pos_inds, :] = pos_bbox_targets
46 | bbox_weights[pos_inds, :] = 1.0
47 | dir_weights[pos_inds] = 1.
48 | if gt_labels is None:
49 | labels[pos_inds] = 1
50 | else:
51 | labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
52 | if cfg.pos_weight <= 0:
53 | label_weights[pos_inds] = 1.0
54 | else:
55 | label_weights[pos_inds] = cfg.pos_weight
56 | if len(neg_inds) > 0:
57 | label_weights[neg_inds] = 1.0
58 |
59 | # map up to original set of anchors
60 | num_total_anchors = flat_anchors.shape[0]
61 | labels = unmap(labels, num_total_anchors, inside_flags)
62 | label_weights = unmap(label_weights, num_total_anchors, inside_flags)
63 | if cls_out_channels > 1:
64 | labels, label_weights = expand_binary_labels(labels, label_weights,
65 | cls_out_channels)
66 | bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
67 | bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
68 |
69 | dir_labels = get_direction_target(flat_anchors, bbox_targets)
70 | dir_weights = unmap(dir_weights, num_total_anchors, inside_flags)
71 |
72 | return (labels, label_weights, bbox_targets, bbox_weights, dir_labels, dir_weights, pos_inds,
73 | neg_inds)
74 |
75 |
76 | def expand_binary_labels(labels, label_weights, cls_out_channels):
77 | bin_labels = labels.new_full(
78 | (labels.size(0), cls_out_channels), 0, dtype=torch.float32)
79 | inds = torch.nonzero(labels >= 1).squeeze()
80 | if inds.numel() > 0:
81 | bin_labels[inds, labels[inds] - 1] = 1
82 | bin_label_weights = label_weights.view(-1, 1).expand(
83 | label_weights.size(0), cls_out_channels)
84 | return bin_labels, bin_label_weights
85 |
86 | def get_direction_target(anchors, reg_targets):
87 | anchors = anchors.view(-1, 7)
88 | rot_gt = reg_targets[:, -1] + anchors[:, -1]
89 | dir_cls_targets = (rot_gt > 0).long()
90 | return dir_cls_targets
91 |
92 | def anchor_inside_flags(flat_anchors, valid_flags, img_shape,
93 | allowed_border=0):
94 | img_h, img_w = img_shape[:2]
95 | if allowed_border >= 0:
96 | inside_flags = valid_flags & \
97 | (flat_anchors[:, 0] >= -allowed_border) & \
98 | (flat_anchors[:, 1] >= -allowed_border) & \
99 | (flat_anchors[:, 2] < img_w + allowed_border) & \
100 | (flat_anchors[:, 3] < img_h + allowed_border)
101 | else:
102 | inside_flags = valid_flags
103 | return inside_flags
104 |
105 |
106 | def unmap(data, count, inds, fill=0):
107 | """ Unmap a subset of item (data) back to the original set of items (of
108 | size count) """
109 | if data.dim() == 1:
110 | ret = data.new_full((count, ), fill)
111 | ret[inds] = data
112 | else:
113 | new_size = (count, ) + data.size()[1:]
114 | ret = data.new_full(new_size, fill)
115 | ret[inds, :] = data
116 | return ret
117 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/__init__.py:
--------------------------------------------------------------------------------
1 | from .geometry import bbox_overlaps
2 | from .assignment import BBoxAssigner, AssignResult
3 | from .sampling import (BBoxSampler, SamplingResult, assign_and_sample,
4 | random_choice)
5 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, rbbox3d2delta, delta2rbbox3d, add_sin_difference,
6 | bbox_mapping_back, bbox2roi, roi2bbox, bbox2result, rbbox2roi, kitti_bbox2results, tensor2points)
7 | from .bbox_target import bbox_target
8 |
9 | __all__ = [
10 | 'bbox_overlaps', 'BBoxAssigner', 'AssignResult', 'BBoxSampler',
11 | 'SamplingResult', 'assign_and_sample', 'random_choice', 'bbox2delta',
12 | 'delta2bbox', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi',
13 | 'roi2bbox', 'bbox2result', 'bbox_target','rbbox3d2delta','delta2rbbox3d',
14 | 'rbbox2roi', 'kitti_bbox2results','add_sin_difference','tensor2points',
15 | ]
16 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/bbox_target.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .transforms import bbox2delta, rbbox3d2delta
4 | from ..utils import multi_apply
5 |
6 |
7 | def bbox_target(pos_bboxes_list,
8 | neg_bboxes_list,
9 | pos_gt_bboxes_list,
10 | pos_gt_labels_list,
11 | cfg,
12 | reg_classes=1,
13 | target_means=[.0, .0, .0, .0],
14 | target_stds=[1.0, 1.0, 1.0, 1.0],
15 | concat=True):
16 | labels, label_weights, bbox_targets, bbox_weights = multi_apply(
17 | bbox_target_single,
18 | pos_bboxes_list,
19 | neg_bboxes_list,
20 | pos_gt_bboxes_list,
21 | pos_gt_labels_list,
22 | cfg=cfg,
23 | reg_classes=reg_classes,
24 | target_means=target_means,
25 | target_stds=target_stds)
26 |
27 | if concat:
28 | labels = torch.cat(labels, 0)
29 | label_weights = torch.cat(label_weights, 0)
30 | bbox_targets = torch.cat(bbox_targets, 0)
31 | bbox_weights = torch.cat(bbox_weights, 0)
32 | return labels, label_weights, bbox_targets, bbox_weights
33 |
34 |
35 | def bbox_target_single(pos_bboxes,
36 | neg_bboxes,
37 | pos_gt_bboxes,
38 | pos_gt_labels,
39 | cfg,
40 | reg_classes=1,
41 | target_means=[.0, .0, .0, .0],
42 | target_stds=[1.0, 1.0, 1.0, 1.0]):
43 | num_pos = pos_bboxes.size(0)
44 | num_neg = neg_bboxes.size(0)
45 | num_samples = num_pos + num_neg
46 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long)
47 | label_weights = pos_bboxes.new_zeros(num_samples)
48 | bbox_targets = pos_bboxes.new_zeros(num_samples, 7)
49 | bbox_weights = pos_bboxes.new_zeros(num_samples, 7)
50 | if num_pos > 0:
51 | labels[:num_pos] = pos_gt_labels
52 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
53 | label_weights[:num_pos] = pos_weight
54 |
55 | pos_bbox_targets = rbbox3d2delta(pos_bboxes, pos_gt_bboxes, target_means,
56 | target_stds)
57 | bbox_targets[:num_pos, :] = pos_bbox_targets
58 | bbox_weights[:num_pos, :] = 1
59 | if num_neg > 0:
60 | label_weights[-num_neg:] = 1.0
61 | if reg_classes > 1:
62 | bbox_targets, bbox_weights = expand_target(bbox_targets, bbox_weights,
63 | labels, reg_classes)
64 |
65 | return labels, label_weights, bbox_targets, bbox_weights
66 |
67 |
68 | def expand_target(bbox_targets, bbox_weights, labels, num_classes):
69 | bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0),
70 | 7 * num_classes))
71 | bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0),
72 | 7 * num_classes))
73 | for i in torch.nonzero(labels > 0).squeeze(-1):
74 | start, end = labels[i] * 7, (labels[i] + 1) * 7
75 | bbox_targets_expand[i, start:end] = bbox_targets[i, :]
76 | bbox_weights_expand[i, start:end] = bbox_weights[i, :]
77 | return bbox_targets_expand, bbox_weights_expand
78 |
--------------------------------------------------------------------------------
/mmdet/core/bbox/geometry.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
5 | """Calculate overlap between two set of bboxes.
6 |
7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox
8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
9 | bboxes1 and bboxes2.
10 |
11 | Args:
12 | bboxes1 (Tensor): shape (m, 4)
13 | bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n
14 | must be equal.
15 | mode (str): "iou" (intersection over union) or iof (intersection over
16 | foreground).
17 |
18 | Returns:
19 | ious(Tensor): shape (n, k) if is_aligned == False else shape (n, 1)
20 | """
21 |
22 | assert mode in ['iou', 'iof']
23 |
24 | rows = bboxes1.size(0)
25 | cols = bboxes2.size(0)
26 | if is_aligned:
27 | assert rows == cols
28 |
29 | if rows * cols == 0:
30 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
31 |
32 | if is_aligned:
33 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2]
34 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2]
35 |
36 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2]
37 | overlap = wh[:, 0] * wh[:, 1]
38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
39 | bboxes1[:, 3] - bboxes1[:, 1] + 1)
40 |
41 | if mode == 'iou':
42 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
43 | bboxes2[:, 3] - bboxes2[:, 1] + 1)
44 | ious = overlap / (area1 + area2 - overlap)
45 | else:
46 | ious = overlap / area1
47 | else:
48 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2]
49 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2]
50 |
51 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2]
52 | overlap = wh[:, :, 0] * wh[:, :, 1]
53 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
54 | bboxes1[:, 3] - bboxes1[:, 1] + 1)
55 |
56 | if mode == 'iou':
57 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
58 | bboxes2[:, 3] - bboxes2[:, 1] + 1)
59 | ious = overlap / (area1[:, None] + area2 - overlap)
60 | else:
61 | ious = overlap / (area1[:, None])
62 |
63 | return ious
64 |
--------------------------------------------------------------------------------
/mmdet/core/bbox3d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skyhehe123/SA-SSD/2d75c973af65453186bd9242d7fa5e62dc44ec03/mmdet/core/bbox3d/__init__.py
--------------------------------------------------------------------------------
/mmdet/core/bbox3d/bbox3d_target.py:
--------------------------------------------------------------------------------
1 | from mmdet.core.bbox3d.target_ops import create_target_np
2 | from mmdet.core.bbox3d import region_similarity as regionSimilarity
3 | from mmdet.core.bbox3d import box_coders as boxCoders
4 |
5 | class TargetEncoder:
6 | def __init__(self,
7 | box_coders,
8 | region_similarity):
9 |
10 | self._similarity_fn = getattr(regionSimilarity, region_similarity)()
11 | self._box_coder = getattr(boxCoders, box_coders)()
12 |
13 | @property
14 | def box_coder(self):
15 | return self._box_coder
16 |
17 | def assign(self,
18 | anchors,
19 | gt_boxes,
20 | anchors_mask=None,
21 | gt_classes=None,
22 | pos_iou_thr=0.6,
23 | neg_iou_thr=0.45,
24 | positive_fraction=None,
25 | sample_size=512,
26 | ):
27 |
28 | return create_target_np(
29 | anchors,
30 | gt_boxes,
31 | anchors_mask,
32 | gt_classes,
33 | similarity_fn=self._similarity_fn,
34 | box_encoding_fn = self._box_coder.encode,
35 | matched_threshold=pos_iou_thr,
36 | unmatched_threshold=neg_iou_thr,
37 | positive_fraction=positive_fraction,
38 | rpn_batch_size=sample_size,
39 | norm_by_num_examples=False,
40 | box_code_size=self.box_coder.code_size)
41 |
42 |
--------------------------------------------------------------------------------
/mmdet/core/bbox3d/region_similarity.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 |
16 | """Region Similarity Calculators for BoxLists.
17 |
18 | Region Similarity Calculators compare a pairwise measure of similarity
19 | between the boxes in two BoxLists.
20 | """
21 |
22 | from mmdet.core.bbox3d.geometry import rbbox2d_to_near_bbox, iou_jit, distance_similarity
23 | from mmdet.core.post_processing.rotate_nms_gpu import rotate_iou_gpu, rotate_iou_gpu_eval
24 | import numba
25 |
26 | @numba.jit(nopython=True, parallel=True)
27 | def d3_box_overlap_kernel(boxes, qboxes, rinc, criterion=-1):
28 | N, K = boxes.shape[0], qboxes.shape[0]
29 | for i in range(N):
30 | for j in range(K):
31 | if rinc[i, j] > 0:
32 | iw = (min(boxes[i, 2], qboxes[j, 2]) - max(
33 | boxes[i, 2] - boxes[i, 5], qboxes[j, 2] - qboxes[j, 5]))
34 |
35 | if iw > 0:
36 | area1 = boxes[i, 3] * boxes[i, 4] * boxes[i, 5]
37 | area2 = qboxes[j, 3] * qboxes[j, 4] * qboxes[j, 5]
38 | inc = iw * rinc[i, j]
39 | if criterion == -1:
40 | ua = (area1 + area2 - inc)
41 | elif criterion == 0:
42 | ua = area1
43 | elif criterion == 1:
44 | ua = area2
45 | else:
46 | ua = 1.0
47 | rinc[i, j] = inc / ua
48 | else:
49 | rinc[i, j] = 0.0
50 |
51 | class RotateIou2dSimilarity(object):
52 | """Class to compute similarity based on Intersection over Union (IOU) metric.
53 |
54 | This class computes pairwise similarity between two BoxLists based on IOU.
55 | """
56 | def __call__(self, boxes1, boxes2):
57 | boxes1_rbv = boxes1[:, [0, 1, 3, 4, 6]]
58 | boxes2_rbv = boxes2[:, [0, 1, 3, 4, 6]]
59 | return rotate_iou_gpu(boxes1_rbv, boxes2_rbv)
60 |
61 | class RotateIou3dSimilarity(object):
62 | """Class to compute similarity based on Intersection over Union (IOU) metric.
63 |
64 | This class computes pairwise similarity between two BoxLists based on IOU.
65 | """
66 | def __call__(self, boxes1, boxes2):
67 | boxes1_rbv = boxes1[:, [0, 1, 3, 4, 6]]
68 | boxes2_rbv = boxes2[:, [0, 1, 3, 4, 6]]
69 | rinc = rotate_iou_gpu_eval(boxes1_rbv, boxes2_rbv, criterion=2)
70 | d3_box_overlap_kernel(boxes1, boxes2, rinc)
71 | return rinc
72 |
73 | class NearestIouSimilarity(object):
74 | """Class to compute similarity based on the squared distance metric.
75 |
76 | This class computes pairwise similarity between two BoxLists based on the
77 | negative squared distance metric.
78 | """
79 |
80 | def __call__(self, boxes1, boxes2):
81 | """Compute matrix of (negated) sq distances.
82 |
83 | Args:
84 | boxlist1: BoxList holding N boxes.
85 | boxlist2: BoxList holding M boxes.
86 |
87 | Returns:
88 | A tensor with shape [N, M] representing negated pairwise squared distance.
89 | """
90 | boxes1_rbv = boxes1[:, [0, 1, 3, 4, 6]]
91 | boxes2_rbv = boxes2[:, [0, 1, 3, 4, 6]]
92 | boxes1_bv = rbbox2d_to_near_bbox(boxes1_rbv)
93 | boxes2_bv = rbbox2d_to_near_bbox(boxes2_rbv)
94 | ret = iou_jit(boxes1_bv, boxes2_bv, eps=0.0)
95 | return ret
96 |
97 |
98 | class DistanceSimilarity(object):
99 | """Class to compute similarity based on Intersection over Area (IOA) metric.
100 |
101 | This class computes pairwise similarity between two BoxLists based on their
102 | pairwise intersections divided by the areas of second BoxLists.
103 | """
104 |
105 | def __init__(self, distance_norm, with_rotation=False, rotation_alpha=0.5):
106 | self._distance_norm = distance_norm
107 | self._with_rotation = with_rotation
108 | self._rotation_alpha = rotation_alpha
109 |
110 | def __call__(self, boxes1, boxes2):
111 | """Compute matrix of (negated) sq distances.
112 |
113 | Args:
114 | boxlist1: BoxList holding N boxes.
115 | boxlist2: BoxList holding M boxes.
116 |
117 | Returns:
118 | A tensor with shape [N, M] representing negated pairwise squared distance.
119 | """
120 | boxes1_rbv = boxes1[:, [0, 1, 3, 4, 6]]
121 | boxes2_rbv = boxes2[:, [0, 1, 3, 4, 6]]
122 | return distance_similarity(
123 | boxes1_rbv[..., [0, 1, -1]],
124 | boxes2_rbv[..., [0, 1, -1]],
125 | dist_norm=self._distance_norm,
126 | with_rotation=self._with_rotation,
127 | rot_alpha=self._rotation_alpha)
128 |
129 |
--------------------------------------------------------------------------------
/mmdet/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .class_names import (voc_classes, imagenet_det_classes,
2 | imagenet_vid_classes, coco_classes, dataset_aliases,
3 | get_classes)
4 | from .coco_utils import coco_eval, fast_eval_recall, results2json
5 | from .eval_hooks import (DistEvalHook, CocoDistEvalRecallHook,
6 | CocoDistEvalmAPHook,KittiEvalmAPHook, DistEvalmAPHook)
7 | from .mean_ap import average_precision, eval_map, print_map_summary
8 | from .recall import (eval_recalls, print_recall_summary, plot_num_recall,
9 | plot_iou_recall)
10 |
11 | __all__ = [
12 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
13 | 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
14 | 'fast_eval_recall', 'results2json', 'DistEvalHook',
15 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
16 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
17 | 'plot_num_recall', 'plot_iou_recall','KittiEvalmAPHook','DistEvalmAPHook'
18 | ]
19 |
--------------------------------------------------------------------------------
/mmdet/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
5 | """Calculate the ious between each bbox of bboxes1 and bboxes2.
6 |
7 | Args:
8 | bboxes1(ndarray): shape (n, 4)
9 | bboxes2(ndarray): shape (k, 4)
10 | mode(str): iou (intersection over union) or iof (intersection
11 | over foreground)
12 |
13 | Returns:
14 | ious(ndarray): shape (n, k)
15 | """
16 |
17 | assert mode in ['iou', 'iof']
18 |
19 | bboxes1 = bboxes1.astype(np.float32)
20 | bboxes2 = bboxes2.astype(np.float32)
21 | rows = bboxes1.shape[0]
22 | cols = bboxes2.shape[0]
23 | ious = np.zeros((rows, cols), dtype=np.float32)
24 | if rows * cols == 0:
25 | return ious
26 | exchange = False
27 | if bboxes1.shape[0] > bboxes2.shape[0]:
28 | bboxes1, bboxes2 = bboxes2, bboxes1
29 | ious = np.zeros((cols, rows), dtype=np.float32)
30 | exchange = True
31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
32 | bboxes1[:, 3] - bboxes1[:, 1] + 1)
33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
34 | bboxes2[:, 3] - bboxes2[:, 1] + 1)
35 | for i in range(bboxes1.shape[0]):
36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
41 | y_end - y_start + 1, 0)
42 | if mode == 'iou':
43 | union = area1[i] + area2 - overlap
44 | else:
45 | union = area1[i] if not exchange else area2
46 | ious[i, :] = overlap / union
47 | if exchange:
48 | ious = ious.T
49 | return ious
50 |
--------------------------------------------------------------------------------
/mmdet/core/evaluation/class_names.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 |
3 |
4 | def voc_classes():
5 | return [
6 | 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
7 | 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
8 | 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
9 | ]
10 |
11 |
12 | def imagenet_det_classes():
13 | return [
14 | 'accordion', 'airplane', 'ant', 'antelope', 'apple', 'armadillo',
15 | 'artichoke', 'axe', 'baby_bed', 'backpack', 'bagel', 'balance_beam',
16 | 'banana', 'band_aid', 'banjo', 'baseball', 'basketball', 'bathing_cap',
17 | 'beaker', 'bear', 'bee', 'bell_pepper', 'bench', 'bicycle', 'binder',
18 | 'bird', 'bookshelf', 'bow_tie', 'bow', 'bowl', 'brassiere', 'burrito',
19 | 'bus', 'butterfly', 'camel', 'can_opener', 'car', 'cart', 'cattle',
20 | 'cello', 'centipede', 'chain_saw', 'chair', 'chime', 'cocktail_shaker',
21 | 'coffee_maker', 'computer_keyboard', 'computer_mouse', 'corkscrew',
22 | 'cream', 'croquet_ball', 'crutch', 'cucumber', 'cup_or_mug', 'diaper',
23 | 'digital_clock', 'dishwasher', 'dog', 'domestic_cat', 'dragonfly',
24 | 'drum', 'dumbbell', 'electric_fan', 'elephant', 'face_powder', 'fig',
25 | 'filing_cabinet', 'flower_pot', 'flute', 'fox', 'french_horn', 'frog',
26 | 'frying_pan', 'giant_panda', 'goldfish', 'golf_ball', 'golfcart',
27 | 'guacamole', 'guitar', 'hair_dryer', 'hair_spray', 'hamburger',
28 | 'hammer', 'hamster', 'harmonica', 'harp', 'hat_with_a_wide_brim',
29 | 'head_cabbage', 'helmet', 'hippopotamus', 'horizontal_bar', 'horse',
30 | 'hotdog', 'iPod', 'isopod', 'jellyfish', 'koala_bear', 'ladle',
31 | 'ladybug', 'lamp', 'laptop', 'lemon', 'lion', 'lipstick', 'lizard',
32 | 'lobster', 'maillot', 'maraca', 'microphone', 'microwave', 'milk_can',
33 | 'miniskirt', 'monkey', 'motorcycle', 'mushroom', 'nail', 'neck_brace',
34 | 'oboe', 'orange', 'otter', 'pencil_box', 'pencil_sharpener', 'perfume',
35 | 'person', 'piano', 'pineapple', 'ping-pong_ball', 'pitcher', 'pizza',
36 | 'plastic_bag', 'plate_rack', 'pomegranate', 'popsicle', 'porcupine',
37 | 'power_drill', 'pretzel', 'printer', 'puck', 'punching_bag', 'purse',
38 | 'rabbit', 'racket', 'ray', 'red_panda', 'refrigerator',
39 | 'remote_control', 'rubber_eraser', 'rugby_ball', 'ruler',
40 | 'salt_or_pepper_shaker', 'saxophone', 'scorpion', 'screwdriver',
41 | 'seal', 'sheep', 'ski', 'skunk', 'snail', 'snake', 'snowmobile',
42 | 'snowplow', 'soap_dispenser', 'soccer_ball', 'sofa', 'spatula',
43 | 'squirrel', 'starfish', 'stethoscope', 'stove', 'strainer',
44 | 'strawberry', 'stretcher', 'sunglasses', 'swimming_trunks', 'swine',
45 | 'syringe', 'table', 'tape_player', 'tennis_ball', 'tick', 'tie',
46 | 'tiger', 'toaster', 'traffic_light', 'train', 'trombone', 'trumpet',
47 | 'turtle', 'tv_or_monitor', 'unicycle', 'vacuum', 'violin',
48 | 'volleyball', 'waffle_iron', 'washer', 'water_bottle', 'watercraft',
49 | 'whale', 'wine_bottle', 'zebra'
50 | ]
51 |
52 |
53 | def imagenet_vid_classes():
54 | return [
55 | 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
56 | 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
57 | 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
58 | 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
59 | 'watercraft', 'whale', 'zebra'
60 | ]
61 |
62 |
63 | def coco_classes():
64 | return [
65 | 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
66 | 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
67 | 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
68 | 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
69 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
70 | 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
71 | 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
72 | 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
73 | 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
74 | 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
75 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
76 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
77 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
78 | ]
79 |
80 | def kitti_classes():
81 | return [
82 | 'car', 'pedestrians','cyclists'
83 | ]
84 |
85 | dataset_aliases = {
86 | 'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'],
87 | 'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'],
88 | 'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'],
89 | 'coco': ['coco', 'mscoco', 'ms_coco'],
90 | 'kitti': ['kitti']
91 | }
92 |
93 |
94 | def get_classes(dataset):
95 | """Get class names of a dataset."""
96 | alias2name = {}
97 | for name, aliases in dataset_aliases.items():
98 | for alias in aliases:
99 | alias2name[alias] = name
100 |
101 | if mmcv.is_str(dataset):
102 | if dataset in alias2name:
103 | labels = eval(alias2name[dataset] + '_classes()')
104 | else:
105 | raise ValueError('Unrecognized dataset: {}'.format(dataset))
106 | else:
107 | raise TypeError('dataset must a str, but got {}'.format(type(dataset)))
108 | return labels
109 |
--------------------------------------------------------------------------------
/mmdet/core/evaluation/coco_utils.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 | import numpy as np
3 | from pycocotools.coco import COCO
4 | from pycocotools.cocoeval import COCOeval
5 |
6 | from .recall import eval_recalls
7 |
8 |
9 | def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)):
10 | for res_type in result_types:
11 | assert res_type in [
12 | 'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
13 | ]
14 |
15 | if mmcv.is_str(coco):
16 | coco = COCO(coco)
17 | assert isinstance(coco, COCO)
18 |
19 | if result_types == ['proposal_fast']:
20 | ar = fast_eval_recall(result_file, coco, np.array(max_dets))
21 | for i, num in enumerate(max_dets):
22 | print('AR@{}\t= {:.4f}'.format(num, ar[i]))
23 | return
24 |
25 | assert result_file.endswith('.json')
26 | coco_dets = coco.loadRes(result_file)
27 |
28 | img_ids = coco.getImgIds()
29 | for res_type in result_types:
30 | iou_type = 'bbox' if res_type == 'proposal' else res_type
31 | cocoEval = COCOeval(coco, coco_dets, iou_type)
32 | cocoEval.params.imgIds = img_ids
33 | if res_type == 'proposal':
34 | cocoEval.params.useCats = 0
35 | cocoEval.params.maxDets = list(max_dets)
36 | cocoEval.evaluate()
37 | cocoEval.accumulate()
38 | cocoEval.summarize()
39 |
40 |
41 | def fast_eval_recall(results,
42 | coco,
43 | max_dets,
44 | iou_thrs=np.arange(0.5, 0.96, 0.05)):
45 | if mmcv.is_str(results):
46 | assert results.endswith('.pkl')
47 | results = mmcv.load(results)
48 | elif not isinstance(results, list):
49 | raise TypeError(
50 | 'results must be a list of numpy arrays or a filename, not {}'.
51 | format(type(results)))
52 |
53 | gt_bboxes = []
54 | img_ids = coco.getImgIds()
55 | for i in range(len(img_ids)):
56 | ann_ids = coco.getAnnIds(imgIds=img_ids[i])
57 | ann_info = coco.loadAnns(ann_ids)
58 | if len(ann_info) == 0:
59 | gt_bboxes.append(np.zeros((0, 4)))
60 | continue
61 | bboxes = []
62 | for ann in ann_info:
63 | if ann.get('ignore', False) or ann['iscrowd']:
64 | continue
65 | x1, y1, w, h = ann['bbox']
66 | bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1])
67 | bboxes = np.array(bboxes, dtype=np.float32)
68 | if bboxes.shape[0] == 0:
69 | bboxes = np.zeros((0, 4))
70 | gt_bboxes.append(bboxes)
71 |
72 | recalls = eval_recalls(
73 | gt_bboxes, results, max_dets, iou_thrs, print_summary=False)
74 | ar = recalls.mean(axis=1)
75 | return ar
76 |
77 |
78 | def xyxy2xywh(bbox):
79 | _bbox = bbox.tolist()
80 | return [
81 | _bbox[0],
82 | _bbox[1],
83 | _bbox[2] - _bbox[0] + 1,
84 | _bbox[3] - _bbox[1] + 1,
85 | ]
86 |
87 |
88 | def proposal2json(dataset, results):
89 | json_results = []
90 | for idx in range(len(dataset)):
91 | img_id = dataset.img_ids[idx]
92 | bboxes = results[idx]
93 | for i in range(bboxes.shape[0]):
94 | data = dict()
95 | data['image_id'] = img_id
96 | data['bbox'] = xyxy2xywh(bboxes[i])
97 | data['score'] = float(bboxes[i][4])
98 | data['category_id'] = 1
99 | json_results.append(data)
100 | return json_results
101 |
102 |
103 | def det2json(dataset, results):
104 | json_results = []
105 | for idx in range(len(dataset)):
106 | img_id = dataset.img_ids[idx]
107 | result = results[idx]
108 | for label in range(len(result)):
109 | bboxes = result[label]
110 | for i in range(bboxes.shape[0]):
111 | data = dict()
112 | data['image_id'] = img_id
113 | data['bbox'] = xyxy2xywh(bboxes[i])
114 | data['score'] = float(bboxes[i][4])
115 | data['category_id'] = dataset.cat_ids[label]
116 | json_results.append(data)
117 | return json_results
118 |
119 |
120 | def segm2json(dataset, results):
121 | json_results = []
122 | for idx in range(len(dataset)):
123 | img_id = dataset.img_ids[idx]
124 | det, seg = results[idx]
125 | for label in range(len(det)):
126 | bboxes = det[label]
127 | segms = seg[label]
128 | for i in range(bboxes.shape[0]):
129 | data = dict()
130 | data['image_id'] = img_id
131 | data['bbox'] = xyxy2xywh(bboxes[i])
132 | data['score'] = float(bboxes[i][4])
133 | data['category_id'] = dataset.cat_ids[label]
134 | segms[i]['counts'] = segms[i]['counts'].decode()
135 | data['segmentation'] = segms[i]
136 | json_results.append(data)
137 | return json_results
138 |
139 |
140 | def results2json(dataset, results, out_file):
141 | if isinstance(results[0], list):
142 | json_results = det2json(dataset, results)
143 | elif isinstance(results[0], tuple):
144 | json_results = segm2json(dataset, results)
145 | elif isinstance(results[0], np.ndarray):
146 | json_results = proposal2json(dataset, results)
147 | else:
148 | raise TypeError('invalid type of results')
149 | mmcv.dump(json_results, out_file)
150 |
--------------------------------------------------------------------------------
/mmdet/core/evaluation/recall.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from terminaltables import AsciiTable
3 |
4 | from .bbox_overlaps import bbox_overlaps
5 |
6 |
7 | def _recalls(all_ious, proposal_nums, thrs):
8 |
9 | img_num = all_ious.shape[0]
10 | total_gt_num = sum([ious.shape[0] for ious in all_ious])
11 |
12 | _ious = np.zeros((proposal_nums.size, total_gt_num), dtype=np.float32)
13 | for k, proposal_num in enumerate(proposal_nums):
14 | tmp_ious = np.zeros(0)
15 | for i in range(img_num):
16 | ious = all_ious[i][:, :proposal_num].copy()
17 | gt_ious = np.zeros((ious.shape[0]))
18 | if ious.size == 0:
19 | tmp_ious = np.hstack((tmp_ious, gt_ious))
20 | continue
21 | for j in range(ious.shape[0]):
22 | gt_max_overlaps = ious.argmax(axis=1)
23 | max_ious = ious[np.arange(0, ious.shape[0]), gt_max_overlaps]
24 | gt_idx = max_ious.argmax()
25 | gt_ious[j] = max_ious[gt_idx]
26 | box_idx = gt_max_overlaps[gt_idx]
27 | ious[gt_idx, :] = -1
28 | ious[:, box_idx] = -1
29 | tmp_ious = np.hstack((tmp_ious, gt_ious))
30 | _ious[k, :] = tmp_ious
31 |
32 | _ious = np.fliplr(np.sort(_ious, axis=1))
33 | recalls = np.zeros((proposal_nums.size, thrs.size))
34 | for i, thr in enumerate(thrs):
35 | recalls[:, i] = (_ious >= thr).sum(axis=1) / float(total_gt_num)
36 |
37 | return recalls
38 |
39 |
40 | def set_recall_param(proposal_nums, iou_thrs):
41 | """Check proposal_nums and iou_thrs and set correct format.
42 | """
43 | if isinstance(proposal_nums, list):
44 | _proposal_nums = np.array(proposal_nums)
45 | elif isinstance(proposal_nums, int):
46 | _proposal_nums = np.array([proposal_nums])
47 | else:
48 | _proposal_nums = proposal_nums
49 |
50 | if iou_thrs is None:
51 | _iou_thrs = np.array([0.5])
52 | elif isinstance(iou_thrs, list):
53 | _iou_thrs = np.array(iou_thrs)
54 | elif isinstance(iou_thrs, float):
55 | _iou_thrs = np.array([iou_thrs])
56 | else:
57 | _iou_thrs = iou_thrs
58 |
59 | return _proposal_nums, _iou_thrs
60 |
61 |
62 | def eval_recalls(gts,
63 | proposals,
64 | proposal_nums=None,
65 | iou_thrs=None,
66 | print_summary=True):
67 | """Calculate recalls.
68 |
69 | Args:
70 | gts(list or ndarray): a list of arrays of shape (n, 4)
71 | proposals(list or ndarray): a list of arrays of shape (k, 4) or (k, 5)
72 | proposal_nums(int or list of int or ndarray): top N proposals
73 | thrs(float or list or ndarray): iou thresholds
74 |
75 | Returns:
76 | ndarray: recalls of different ious and proposal nums
77 | """
78 |
79 | img_num = len(gts)
80 | assert img_num == len(proposals)
81 |
82 | proposal_nums, iou_thrs = set_recall_param(proposal_nums, iou_thrs)
83 |
84 | all_ious = []
85 | for i in range(img_num):
86 | if proposals[i].ndim == 2 and proposals[i].shape[1] == 5:
87 | scores = proposals[i][:, 4]
88 | sort_idx = np.argsort(scores)[::-1]
89 | img_proposal = proposals[i][sort_idx, :]
90 | else:
91 | img_proposal = proposals[i]
92 | prop_num = min(img_proposal.shape[0], proposal_nums[-1])
93 | if gts[i] is None or gts[i].shape[0] == 0:
94 | ious = np.zeros((0, img_proposal.shape[0]), dtype=np.float32)
95 | else:
96 | ious = bbox_overlaps(gts[i], img_proposal[:prop_num, :4])
97 | all_ious.append(ious)
98 | all_ious = np.array(all_ious)
99 | recalls = _recalls(all_ious, proposal_nums, iou_thrs)
100 | if print_summary:
101 | print_recall_summary(recalls, proposal_nums, iou_thrs)
102 | return recalls
103 |
104 |
105 | def print_recall_summary(recalls,
106 | proposal_nums,
107 | iou_thrs,
108 | row_idxs=None,
109 | col_idxs=None):
110 | """Print recalls in a table.
111 |
112 | Args:
113 | recalls(ndarray): calculated from `bbox_recalls`
114 | proposal_nums(ndarray or list): top N proposals
115 | iou_thrs(ndarray or list): iou thresholds
116 | row_idxs(ndarray): which rows(proposal nums) to print
117 | col_idxs(ndarray): which cols(iou thresholds) to print
118 | """
119 | proposal_nums = np.array(proposal_nums, dtype=np.int32)
120 | iou_thrs = np.array(iou_thrs)
121 | if row_idxs is None:
122 | row_idxs = np.arange(proposal_nums.size)
123 | if col_idxs is None:
124 | col_idxs = np.arange(iou_thrs.size)
125 | row_header = [''] + iou_thrs[col_idxs].tolist()
126 | table_data = [row_header]
127 | for i, num in enumerate(proposal_nums[row_idxs]):
128 | row = [
129 | '{:.3f}'.format(val)
130 | for val in recalls[row_idxs[i], col_idxs].tolist()
131 | ]
132 | row.insert(0, num)
133 | table_data.append(row)
134 | table = AsciiTable(table_data)
135 | print(table.table)
136 |
137 |
138 | def plot_num_recall(recalls, proposal_nums):
139 | """Plot Proposal_num-Recalls curve.
140 |
141 | Args:
142 | recalls(ndarray or list): shape (k,)
143 | proposal_nums(ndarray or list): same shape as `recalls`
144 | """
145 | if isinstance(proposal_nums, np.ndarray):
146 | _proposal_nums = proposal_nums.tolist()
147 | else:
148 | _proposal_nums = proposal_nums
149 | if isinstance(recalls, np.ndarray):
150 | _recalls = recalls.tolist()
151 | else:
152 | _recalls = recalls
153 |
154 | import matplotlib.pyplot as plt
155 | f = plt.figure()
156 | plt.plot([0] + _proposal_nums, [0] + _recalls)
157 | plt.xlabel('Proposal num')
158 | plt.ylabel('Recall')
159 | plt.axis([0, proposal_nums.max(), 0, 1])
160 | f.show()
161 |
162 |
163 | def plot_iou_recall(recalls, iou_thrs):
164 | """Plot IoU-Recalls curve.
165 |
166 | Args:
167 | recalls(ndarray or list): shape (k,)
168 | iou_thrs(ndarray or list): same shape as `recalls`
169 | """
170 | if isinstance(iou_thrs, np.ndarray):
171 | _iou_thrs = iou_thrs.tolist()
172 | else:
173 | _iou_thrs = iou_thrs
174 | if isinstance(recalls, np.ndarray):
175 | _recalls = recalls.tolist()
176 | else:
177 | _recalls = recalls
178 |
179 | import matplotlib.pyplot as plt
180 | f = plt.figure()
181 | plt.plot(_iou_thrs + [1.0], _recalls + [0.])
182 | plt.xlabel('IoU')
183 | plt.ylabel('Recall')
184 | plt.axis([iou_thrs.min(), 1, 0, 1])
185 | f.show()
186 |
187 |
188 |
--------------------------------------------------------------------------------
/mmdet/core/loss/__init__.py:
--------------------------------------------------------------------------------
1 | from .losses import (weighted_nll_loss, weighted_cross_entropy,
2 | weighted_binary_cross_entropy, sigmoid_focal_loss,
3 | weighted_sigmoid_focal_loss, mask_cross_entropy,
4 | smooth_l1_loss, weighted_smoothl1, l1_loss, weighted_l1, accuracy)
5 |
6 | __all__ = [
7 | 'weighted_nll_loss', 'weighted_cross_entropy',
8 | 'weighted_binary_cross_entropy', 'sigmoid_focal_loss',
9 | 'weighted_sigmoid_focal_loss', 'mask_cross_entropy', 'smooth_l1_loss',
10 | 'weighted_smoothl1', 'l1_loss', 'weighted_l1', 'accuracy'
11 | ]
12 |
--------------------------------------------------------------------------------
/mmdet/core/loss/losses.py:
--------------------------------------------------------------------------------
1 | # TODO merge naive and weighted loss.
2 | import torch
3 | import torch.nn.functional as F
4 |
5 |
6 | def weighted_nll_loss(pred, label, weight, avg_factor=None):
7 | if avg_factor is None:
8 | avg_factor = max(torch.sum(weight > 0).float().item(), 1.)
9 | raw = F.nll_loss(pred, label, reduction='none')
10 | return torch.sum(raw * weight)[None] / avg_factor
11 |
12 |
13 | def weighted_cross_entropy(pred, label, weight, avg_factor=None, reduce=True):
14 | if avg_factor is None:
15 | avg_factor = max(torch.sum(weight > 0).float().item(), 1.)
16 | raw = F.cross_entropy(pred, label, reduction='none')
17 | if reduce:
18 | return torch.sum(raw * weight)[None] / avg_factor
19 | else:
20 | return raw * weight / avg_factor
21 |
22 |
23 | def weighted_binary_cross_entropy(pred, label, weight, avg_factor=None):
24 | if avg_factor is None:
25 | avg_factor = max(torch.sum(weight > 0).float().item(), 1.)
26 | return F.binary_cross_entropy_with_logits(
27 | pred, label.float(), weight.float(),
28 | reduction='sum')[None] / avg_factor
29 |
30 |
31 | def sigmoid_focal_loss(pred,
32 | target,
33 | weight,
34 | gamma=2.0,
35 | alpha=0.25,
36 | reduction='mean'):
37 | pred_sigmoid = pred.sigmoid()
38 | target = target.type_as(pred)
39 | pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
40 | weight = (alpha * target + (1 - alpha) * (1 - target)) * weight
41 | weight = weight * pt.pow(gamma)
42 | loss = F.binary_cross_entropy_with_logits(
43 | pred, target, reduction='none') * weight
44 | reduction_enum = F._Reduction.get_enum(reduction)
45 | # none: 0, mean:1, sum: 2
46 | if reduction_enum == 0:
47 | return loss
48 | elif reduction_enum == 1:
49 | return loss.mean()
50 | elif reduction_enum == 2:
51 | return loss.sum()
52 |
53 |
54 | def weighted_sigmoid_focal_loss(pred,
55 | target,
56 | weight,
57 | gamma=2.0,
58 | alpha=0.25,
59 | avg_factor=None,
60 | num_classes=80):
61 | if avg_factor is None:
62 | avg_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6
63 | return sigmoid_focal_loss(
64 | pred, target, weight, gamma=gamma, alpha=alpha,
65 | reduction='sum')[None] / avg_factor
66 |
67 |
68 | def mask_cross_entropy(pred, target, label):
69 | num_rois = pred.size()[0]
70 | inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)
71 | pred_slice = pred[inds, label].squeeze(1)
72 | return F.binary_cross_entropy_with_logits(
73 | pred_slice, target, reduction='mean')[None]
74 |
75 |
76 | def smooth_l1_loss(pred, target, beta=1.0, reduction='mean'):
77 | assert beta > 0
78 | assert pred.size() == target.size() and target.numel() > 0
79 | diff = torch.abs(pred - target)
80 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
81 | diff - 0.5 * beta)
82 | reduction_enum = F._Reduction.get_enum(reduction)
83 | # none: 0, mean:1, sum: 2
84 | if reduction_enum == 0:
85 | return loss
86 | elif reduction_enum == 1:
87 | return loss.sum() / pred.numel()
88 | elif reduction_enum == 2:
89 | return loss.sum()
90 |
91 |
92 | def weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None):
93 | if avg_factor is None:
94 | avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6
95 | loss = smooth_l1_loss(pred, target, beta, reduction='none')
96 | return torch.sum(loss * weight)[None] / avg_factor
97 |
98 | def l1_loss(pred, target, reduction='mean'):
99 | assert pred.size() == target.size() and target.numel() > 0
100 | loss = torch.abs(pred - target)
101 | reduction_enum = F._Reduction.get_enum(reduction)
102 | # none: 0, mean:1, sum: 2
103 | if reduction_enum == 0:
104 | return loss
105 | elif reduction_enum == 1:
106 | return loss.sum() / pred.numel()
107 | elif reduction_enum == 2:
108 | return loss.sum()
109 |
110 | def weighted_l1(pred, target, weight, avg_factor=None):
111 | if avg_factor is None:
112 | avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6
113 | loss = l1_loss(pred, target, reduction='none')
114 | return torch.sum(loss * weight)[None] / avg_factor
115 |
116 | def accuracy(pred, target, topk=1):
117 | if isinstance(topk, int):
118 | topk = (topk, )
119 | return_single = True
120 | else:
121 | return_single = False
122 |
123 | maxk = max(topk)
124 | _, pred_label = pred.topk(maxk, 1, True, True)
125 | pred_label = pred_label.t()
126 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
127 |
128 | res = []
129 | for k in topk:
130 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
131 | res.append(correct_k.mul_(100.0 / pred.size(0)))
132 | return res[0] if return_single else res
--------------------------------------------------------------------------------
/mmdet/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import split_combined_polys
2 | from .mask_target import mask_target
3 |
4 | __all__ = ['split_combined_polys', 'mask_target']
5 |
--------------------------------------------------------------------------------
/mmdet/core/mask/mask_target.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import mmcv
4 |
5 |
6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
7 | cfg):
8 | cfg_list = [cfg for _ in range(len(pos_proposals_list))]
9 | mask_targets = map(mask_target_single, pos_proposals_list,
10 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
11 | mask_targets = torch.cat(list(mask_targets))
12 | return mask_targets
13 |
14 |
15 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
16 | mask_size = cfg.mask_size
17 | num_pos = pos_proposals.size(0)
18 | mask_targets = []
19 | if num_pos > 0:
20 | proposals_np = pos_proposals.cpu().numpy()
21 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
22 | for i in range(num_pos):
23 | gt_mask = gt_masks[pos_assigned_gt_inds[i]]
24 | bbox = proposals_np[i, :].astype(np.int32)
25 | x1, y1, x2, y2 = bbox
26 | w = np.maximum(x2 - x1 + 1, 1)
27 | h = np.maximum(y2 - y1 + 1, 1)
28 | # mask is uint8 both before and after resizing
29 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
30 | (mask_size, mask_size))
31 | mask_targets.append(target)
32 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
33 | pos_proposals.device)
34 | else:
35 | mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size))
36 | return mask_targets
37 |
--------------------------------------------------------------------------------
/mmdet/core/mask/utils.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 |
3 |
4 | def split_combined_polys(polys, poly_lens, polys_per_mask):
5 | """Split the combined 1-D polys into masks.
6 |
7 | A mask is represented as a list of polys, and a poly is represented as
8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D
9 | tensor. Here we need to split the tensor into original representations.
10 |
11 | Args:
12 | polys (list): a list (length = image num) of 1-D tensors
13 | poly_lens (list): a list (length = image num) of poly length
14 | polys_per_mask (list): a list (length = image num) of poly number
15 | of each mask
16 |
17 | Returns:
18 | list: a list (length = image num) of list (length = mask num) of
19 | list (length = poly num) of numpy array
20 | """
21 | mask_polys_list = []
22 | for img_id in range(len(polys)):
23 | polys_single = polys[img_id]
24 | polys_lens_single = poly_lens[img_id].tolist()
25 | polys_per_mask_single = polys_per_mask[img_id].tolist()
26 |
27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single)
28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
29 | mask_polys_list.append(mask_polys)
30 | return mask_polys_list
31 |
--------------------------------------------------------------------------------
/mmdet/core/point_cloud/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skyhehe123/SA-SSD/2d75c973af65453186bd9242d7fa5e62dc44ec03/mmdet/core/point_cloud/__init__.py
--------------------------------------------------------------------------------
/mmdet/core/point_cloud/voxel_generator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from mmdet.ops.points_op import points_to_voxel
3 |
4 | class VoxelGenerator:
5 | def __init__(self,
6 | voxel_size,
7 | point_cloud_range,
8 | max_num_points,
9 | max_voxels=20000):
10 | point_cloud_range = np.array(point_cloud_range, dtype=np.float32)
11 | # [0, -40, -3, 70.4, 40, 1]
12 | voxel_size = np.array(voxel_size, dtype=np.float32)
13 | grid_size = (
14 | point_cloud_range[3:] - point_cloud_range[:3]) / voxel_size
15 | grid_size = np.round(grid_size).astype(np.int64)
16 | self._voxel_size = voxel_size
17 | self._point_cloud_range = point_cloud_range
18 | self._max_num_points = max_num_points
19 | self._max_voxels = max_voxels
20 | self._grid_size = grid_size
21 |
22 | def generate(self, points):
23 | return points_to_voxel(
24 | points, self._voxel_size, self._point_cloud_range,
25 | self._max_num_points, True, self._max_voxels)
26 |
27 | @property
28 | def voxel_size(self):
29 | return self._voxel_size
30 |
31 | @property
32 | def max_num_points_per_voxel(self):
33 | return self._max_num_points
34 |
35 | @property
36 | def point_cloud_range(self):
37 | return self._point_cloud_range
38 |
39 | @property
40 | def grid_size(self):
41 | return self._grid_size
42 |
43 |
44 |
--------------------------------------------------------------------------------
/mmdet/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
1 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes,
2 | merge_aug_scores, merge_aug_masks)
3 | from .rotate_nms_gpu import rotate_nms_gpu
4 | __all__ = [
5 | 'merge_aug_proposals', 'merge_aug_bboxes',
6 | 'merge_aug_scores', 'merge_aug_masks','rotate_nms_gpu'
7 | ]
8 |
--------------------------------------------------------------------------------
/mmdet/core/post_processing/bbox_nms.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from mmdet.ops.iou3d.iou3d_utils import nms_gpu
3 |
4 | def rotate_nms_torch(rbboxes,
5 | scores,
6 | pre_max_size=None,
7 | post_max_size=None,
8 | iou_threshold=0.5):
9 | if pre_max_size is not None:
10 | num_keeped_scores = scores.shape[0]
11 | pre_max_size = min(num_keeped_scores, pre_max_size)
12 | scores, indices = torch.topk(scores, k=pre_max_size)
13 | rbboxes = rbboxes[indices]
14 |
15 | if len(rbboxes) == 0:
16 | keep = torch.empty((0,), dtype=torch.int64)
17 | else:
18 | ret = nms_gpu(rbboxes, scores, iou_threshold)
19 | keep = ret[:post_max_size]
20 |
21 | if keep.shape[0] == 0:
22 | return None
23 |
24 | if pre_max_size is not None:
25 | return indices[keep]
26 | else:
27 | return keep
--------------------------------------------------------------------------------
/mmdet/core/post_processing/merge_augs.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | import numpy as np
4 | from mmdet.core.bbox.transforms import bbox_mapping_back
5 |
6 |
7 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
8 | """Merge augmented proposals (multiscale, flip, etc.)
9 |
10 | Args:
11 | aug_proposals (list[Tensor]): proposals from different testing
12 | schemes, shape (n, 5). Note that they are not rescaled to the
13 | original image size.
14 | img_metas (list[dict]): image info including "shape_scale" and "flip".
15 | rpn_test_cfg (dict): rpn test config.
16 |
17 | Returns:
18 | Tensor: shape (n, 4), proposals corresponding to original image scale.
19 | """
20 | recovered_proposals = []
21 | for proposals, img_info in zip(aug_proposals, img_metas):
22 | img_shape = img_info['img_shape']
23 | scale_factor = img_info['scale_factor']
24 | flip = img_info['flip']
25 | _proposals = proposals.clone()
26 | _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,
27 | scale_factor, flip)
28 | recovered_proposals.append(_proposals)
29 | aug_proposals = torch.cat(recovered_proposals, dim=0)
30 | merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr)
31 | scores = merged_proposals[:, 4]
32 | _, order = scores.sort(0, descending=True)
33 | num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])
34 | order = order[:num]
35 | merged_proposals = merged_proposals[order, :]
36 | return merged_proposals
37 |
38 |
39 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
40 | """Merge augmented detection bboxes and scores.
41 |
42 | Args:
43 | aug_bboxes (list[Tensor]): shape (n, 4*#class)
44 | aug_scores (list[Tensor] or None): shape (n, #class)
45 | img_shapes (list[Tensor]): shape (3, ).
46 | rcnn_test_cfg (dict): rcnn test config.
47 |
48 | Returns:
49 | tuple: (bboxes, scores)
50 | """
51 | recovered_bboxes = []
52 | for bboxes, img_info in zip(aug_bboxes, img_metas):
53 | img_shape = img_info[0]['img_shape']
54 | scale_factor = img_info[0]['scale_factor']
55 | flip = img_info[0]['flip']
56 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
57 | recovered_bboxes.append(bboxes)
58 | bboxes = torch.stack(recovered_bboxes).mean(dim=0)
59 | if aug_scores is None:
60 | return bboxes
61 | else:
62 | scores = torch.stack(aug_scores).mean(dim=0)
63 | return bboxes, scores
64 |
65 |
66 | def merge_aug_scores(aug_scores):
67 | """Merge augmented bbox scores."""
68 | if isinstance(aug_scores[0], torch.Tensor):
69 | return torch.mean(torch.stack(aug_scores), dim=0)
70 | else:
71 | return np.mean(aug_scores, axis=0)
72 |
73 |
74 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
75 | """Merge augmented mask prediction.
76 |
77 | Args:
78 | aug_masks (list[ndarray]): shape (n, #class, h, w)
79 | img_shapes (list[ndarray]): shape (3, ).
80 | rcnn_test_cfg (dict): rcnn test config.
81 |
82 | Returns:
83 | tuple: (bboxes, scores)
84 | """
85 | recovered_masks = [
86 | mask if not img_info[0]['flip'] else mask[..., ::-1]
87 | for mask, img_info in zip(aug_masks, img_metas)
88 | ]
89 | if weights is None:
90 | merged_masks = np.mean(recovered_masks, axis=0)
91 | else:
92 | merged_masks = np.average(
93 | np.array(recovered_masks), axis=0, weights=np.array(weights))
94 | return merged_masks
95 |
--------------------------------------------------------------------------------
/mmdet/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .dist_utils import allreduce_grads, DistOptimizerHook
2 | from .misc import tensor2imgs, unmap, multi_apply
3 |
4 | __all__ = [
5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',
6 | 'multi_apply'
7 | ]
8 |
--------------------------------------------------------------------------------
/mmdet/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import torch.distributed as dist
4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors,
5 | _take_tensors)
6 | from mmcv.runner import OptimizerHook
7 |
8 |
9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
10 | if bucket_size_mb > 0:
11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024
12 | buckets = _take_tensors(tensors, bucket_size_bytes)
13 | else:
14 | buckets = OrderedDict()
15 | for tensor in tensors:
16 | tp = tensor.type()
17 | if tp not in buckets:
18 | buckets[tp] = []
19 | buckets[tp].append(tensor)
20 | buckets = buckets.values()
21 |
22 | for bucket in buckets:
23 | flat_tensors = _flatten_dense_tensors(bucket)
24 | dist.all_reduce(flat_tensors)
25 | flat_tensors.div_(world_size)
26 | for tensor, synced in zip(
27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
28 | tensor.copy_(synced)
29 |
30 |
31 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1):
32 | grads = [
33 | param.grad.data for param in model.parameters()
34 | if param.requires_grad and param.grad is not None
35 | ]
36 | world_size = dist.get_world_size()
37 | if coalesce:
38 | _allreduce_coalesced(grads, world_size, bucket_size_mb)
39 | else:
40 | for tensor in grads:
41 | dist.all_reduce(tensor.div_(world_size))
42 |
43 |
44 | class DistOptimizerHook(OptimizerHook):
45 |
46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
47 | self.grad_clip = grad_clip
48 | self.coalesce = coalesce
49 | self.bucket_size_mb = bucket_size_mb
50 |
51 | def after_train_iter(self, runner):
52 | runner.optimizer.zero_grad()
53 | runner.outputs['loss'].backward()
54 | allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb)
55 | if self.grad_clip is not None:
56 | self.clip_grads(runner.model.parameters())
57 | runner.optimizer.step()
58 |
--------------------------------------------------------------------------------
/mmdet/core/utils/misc.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 | import mmcv
3 | import numpy as np
4 | from six.moves import map, zip
5 | import time
6 | import torch
7 |
8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
9 | num_imgs = tensor.size(0)
10 | mean = np.array(mean, dtype=np.float32)
11 | std = np.array(std, dtype=np.float32)
12 | imgs = []
13 | for img_id in range(num_imgs):
14 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
15 | img = mmcv.imdenormalize(
16 | img, mean, std, to_bgr=to_rgb).astype(np.uint8)
17 | imgs.append(np.ascontiguousarray(img))
18 | return imgs
19 |
20 |
21 | def multi_apply(func, *args, **kwargs):
22 | pfunc = partial(func, **kwargs) if kwargs else func
23 | map_results = map(pfunc, *args)
24 | return tuple(map(list, zip(*map_results)))
25 |
26 |
27 | def unmap(data, count, inds, fill=0):
28 | """ Unmap a subset of item (data) back to the original set of items (of
29 | size count) """
30 | if data.dim() == 1:
31 | ret = data.new_full((count, ), fill)
32 | ret[inds] = data
33 | else:
34 | new_size = (count, ) + data.size()[1:]
35 | ret = data.new_full(new_size, fill)
36 | ret[inds, :] = data
37 | return ret
38 |
39 | class TimeCatcher(object):
40 | def __init__(self, show=True, cuda=True):
41 | self.show=show
42 | self.cuda = cuda
43 |
44 | def __enter__(self):
45 | if self.cuda:
46 | torch.cuda.synchronize()
47 | self.start = time.time()
48 | return self
49 |
50 | def __exit__(self, type, value, traceback):
51 | if self.cuda:
52 | torch.cuda.synchronize()
53 | self.end = time.time()
54 | ms = (self.end - self.start) * 1000
55 | if self.show:
56 | print("%.5f ms" % ms)
--------------------------------------------------------------------------------
/mmdet/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .custom import CustomDataset
2 | from .coco import CocoDataset
3 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
4 | from .utils import to_tensor, random_scale, show_ann, get_dataset
5 | from .concat_dataset import ConcatDataset
6 | from .kitti import KittiLiDAR, KittiVideo
7 | from .voc import VOCDataset
8 | __all__ = [
9 | 'CustomDataset', 'CocoDataset', 'GroupSampler', 'DistributedGroupSampler',
10 | 'ConcatDataset', 'build_dataloader', 'to_tensor', 'random_scale',
11 | 'show_ann', 'get_dataset', 'KittiLiDAR','KittiVideo', 'VOCDataset'
12 | ]
13 |
--------------------------------------------------------------------------------
/mmdet/datasets/coco.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from pycocotools.coco import COCO
3 |
4 | from .custom import CustomDataset
5 |
6 |
7 | class CocoDataset(CustomDataset):
8 |
9 | def load_annotations(self, ann_file):
10 | self.coco = COCO(ann_file)
11 | self.cat_ids = self.coco.getCatIds()
12 | self.cat2label = {
13 | cat_id: i + 1
14 | for i, cat_id in enumerate(self.cat_ids)
15 | }
16 | self.img_ids = self.coco.getImgIds()
17 | img_infos = []
18 | for i in self.img_ids:
19 | info = self.coco.loadImgs([i])[0]
20 | info['filename'] = info['file_name']
21 | img_infos.append(info)
22 | return img_infos
23 |
24 | def get_ann_info(self, idx):
25 | img_id = self.img_infos[idx]['id']
26 | ann_ids = self.coco.getAnnIds(imgIds=[img_id])
27 | ann_info = self.coco.loadAnns(ann_ids)
28 | return self._parse_ann_info(ann_info)
29 |
30 | def _filter_imgs(self, min_size=32):
31 | """Filter images too small or without ground truths."""
32 | valid_inds = []
33 | ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values())
34 | for i, img_info in enumerate(self.img_infos):
35 | if self.img_ids[i] not in ids_with_ann:
36 | continue
37 | if min(img_info['width'], img_info['height']) >= min_size:
38 | valid_inds.append(i)
39 | return valid_inds
40 |
41 | def _parse_ann_info(self, ann_info, with_mask=True):
42 | """Parse bbox and mask annotation.
43 |
44 | Args:
45 | ann_info (list[dict]): Annotation info of an image.
46 | with_mask (bool): Whether to parse mask annotations.
47 |
48 | Returns:
49 | dict: A dict containing the following keys: bboxes, bboxes_ignore,
50 | labels, masks, mask_polys, poly_lens.
51 | """
52 | gt_bboxes = []
53 | gt_labels = []
54 | gt_bboxes_ignore = []
55 | # Two formats are provided.
56 | # 1. mask: a binary map of the same size of the image.
57 | # 2. polys: each mask consists of one or several polys, each poly is a
58 | # list of float.
59 | if with_mask:
60 | gt_masks = []
61 | gt_mask_polys = []
62 | gt_poly_lens = []
63 | for i, ann in enumerate(ann_info):
64 | if ann.get('ignore', False):
65 | continue
66 | x1, y1, w, h = ann['bbox']
67 | if ann['area'] <= 0 or w < 1 or h < 1:
68 | continue
69 | bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
70 | if ann['iscrowd']:
71 | gt_bboxes_ignore.append(bbox)
72 | else:
73 | gt_bboxes.append(bbox)
74 | gt_labels.append(self.cat2label[ann['category_id']])
75 | if with_mask:
76 | gt_masks.append(self.coco.annToMask(ann))
77 | mask_polys = [
78 | p for p in ann['segmentation'] if len(p) >= 6
79 | ] # valid polygons have >= 3 points (6 coordinates)
80 | poly_lens = [len(p) for p in mask_polys]
81 | gt_mask_polys.append(mask_polys)
82 | gt_poly_lens.extend(poly_lens)
83 | if gt_bboxes:
84 | gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
85 | gt_labels = np.array(gt_labels, dtype=np.int64)
86 | else:
87 | gt_bboxes = np.zeros((0, 4), dtype=np.float32)
88 | gt_labels = np.array([], dtype=np.int64)
89 |
90 | if gt_bboxes_ignore:
91 | gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
92 | else:
93 | gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
94 |
95 | ann = dict(
96 | bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
97 |
98 | if with_mask:
99 | ann['masks'] = gt_masks
100 | # poly format is not used in the current implementation
101 | ann['mask_polys'] = gt_mask_polys
102 | ann['poly_lens'] = gt_poly_lens
103 | return ann
104 |
--------------------------------------------------------------------------------
/mmdet/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
3 |
4 |
5 | class ConcatDataset(_ConcatDataset):
6 | """
7 | Same as torch.utils.data.dataset.ConcatDataset, but
8 | concat the group flag for image aspect ratio.
9 | """
10 | def __init__(self, datasets):
11 | """
12 | flag: Images with aspect ratio greater than 1 will be set as group 1,
13 | otherwise group 0.
14 | """
15 | super(ConcatDataset, self).__init__(datasets)
16 | if hasattr(datasets[0], 'flag'):
17 | flags = []
18 | for i in range(0, len(datasets)):
19 | flags.append(datasets[i].flag)
20 | self.flag = np.concatenate(flags)
21 |
--------------------------------------------------------------------------------
/mmdet/datasets/loader/__init__.py:
--------------------------------------------------------------------------------
1 | from .build_loader import build_dataloader
2 | from .sampler import GroupSampler, DistributedGroupSampler
3 |
4 | __all__ = [
5 | 'GroupSampler', 'DistributedGroupSampler', 'build_dataloader'
6 | ]
7 |
--------------------------------------------------------------------------------
/mmdet/datasets/loader/build_loader.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 |
3 | from mmcv.runner import get_dist_info
4 | from mmcv.parallel import collate
5 | from torch.utils.data import DataLoader
6 | from .sampler import GroupSampler, DistributedGroupSampler
7 |
8 | # https://github.com/pytorch/pytorch/issues/973
9 | import resource
10 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
11 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
12 |
13 |
14 | def build_dataloader(dataset,
15 | imgs_per_gpu,
16 | workers_per_gpu,
17 | num_gpus=1,
18 | dist=True,
19 | **kwargs):
20 | if dist:
21 | rank, world_size = get_dist_info()
22 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size,
23 | rank)
24 | batch_size = imgs_per_gpu
25 | num_workers = workers_per_gpu
26 | else:
27 | if not kwargs.get('shuffle', True):
28 | sampler = None
29 | else:
30 | sampler = GroupSampler(dataset, imgs_per_gpu)
31 | batch_size = num_gpus * imgs_per_gpu
32 | num_workers = num_gpus * workers_per_gpu
33 |
34 | data_loader = DataLoader(
35 | dataset,
36 | batch_size=batch_size,
37 | sampler=sampler,
38 | num_workers=num_workers,
39 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
40 | pin_memory=False,
41 | **kwargs)
42 |
43 | return data_loader
44 |
--------------------------------------------------------------------------------
/mmdet/datasets/loader/sampler.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 |
3 | import math
4 | import torch
5 | import numpy as np
6 |
7 | from torch.distributed import get_world_size, get_rank
8 | from torch.utils.data.sampler import Sampler
9 |
10 |
11 | class GroupSampler(Sampler):
12 |
13 | def __init__(self, dataset, samples_per_gpu=1):
14 | assert hasattr(dataset, 'flag')
15 | self.dataset = dataset
16 | self.samples_per_gpu = samples_per_gpu
17 | self.flag = dataset.flag.astype(np.int64)
18 | self.group_sizes = np.bincount(self.flag)
19 | self.num_samples = 0
20 | for i, size in enumerate(self.group_sizes):
21 | self.num_samples += int(np.ceil(
22 | size / self.samples_per_gpu)) * self.samples_per_gpu
23 |
24 | def __iter__(self):
25 | indices = []
26 | for i, size in enumerate(self.group_sizes):
27 | if size == 0:
28 | continue
29 | indice = np.where(self.flag == i)[0]
30 | assert len(indice) == size
31 | np.random.shuffle(indice)
32 | num_extra = int(np.ceil(size / self.samples_per_gpu)
33 | ) * self.samples_per_gpu - len(indice)
34 | indice = np.concatenate([indice, indice[:num_extra]])
35 | indices.append(indice)
36 | indices = np.concatenate(indices)
37 | indices = [
38 | indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu]
39 | for i in np.random.permutation(
40 | range(len(indices) // self.samples_per_gpu))
41 | ]
42 | indices = np.concatenate(indices)
43 | indices = torch.from_numpy(indices).long()
44 | assert len(indices) == self.num_samples
45 | return iter(indices)
46 |
47 | def __len__(self):
48 | return self.num_samples
49 |
50 |
51 | class DistributedGroupSampler(Sampler):
52 | """Sampler that restricts data loading to a subset of the dataset.
53 | It is especially useful in conjunction with
54 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
55 | process can pass a DistributedSampler instance as a DataLoader sampler,
56 | and load a subset of the original dataset that is exclusive to it.
57 | .. note::
58 | Dataset is assumed to be of constant size.
59 | Arguments:
60 | dataset: Dataset used for sampling.
61 | num_replicas (optional): Number of processes participating in
62 | distributed training.
63 | rank (optional): Rank of the current process within num_replicas.
64 | """
65 |
66 | def __init__(self,
67 | dataset,
68 | samples_per_gpu=1,
69 | num_replicas=None,
70 | rank=None):
71 | if num_replicas is None:
72 | num_replicas = get_world_size()
73 | if rank is None:
74 | rank = get_rank()
75 | self.dataset = dataset
76 | self.samples_per_gpu = samples_per_gpu
77 | self.num_replicas = num_replicas
78 | self.rank = rank
79 | self.epoch = 0
80 |
81 | assert hasattr(self.dataset, 'flag')
82 | self.flag = self.dataset.flag
83 | self.group_sizes = np.bincount(self.flag)
84 |
85 | self.num_samples = 0
86 | for i, j in enumerate(self.group_sizes):
87 | self.num_samples += int(
88 | math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
89 | self.num_replicas)) * self.samples_per_gpu
90 | self.total_size = self.num_samples * self.num_replicas
91 |
92 | def __iter__(self):
93 | # deterministically shuffle based on epoch
94 | g = torch.Generator()
95 | g.manual_seed(self.epoch)
96 |
97 | indices = []
98 | for i, size in enumerate(self.group_sizes):
99 | if size > 0:
100 | indice = np.where(self.flag == i)[0]
101 | assert len(indice) == size
102 | indice = indice[list(torch.randperm(int(size),
103 | generator=g))].tolist()
104 | extra = int(
105 | math.ceil(
106 | size * 1.0 / self.samples_per_gpu / self.num_replicas)
107 | ) * self.samples_per_gpu * self.num_replicas - len(indice)
108 | indice += indice[:extra]
109 | indices += indice
110 |
111 | assert len(indices) == self.total_size
112 |
113 | indices = [
114 | indices[j] for i in list(
115 | torch.randperm(
116 | len(indices) // self.samples_per_gpu, generator=g))
117 | for j in range(i * self.samples_per_gpu, (i + 1) *
118 | self.samples_per_gpu)
119 | ]
120 |
121 | # subsample
122 | offset = self.num_samples * self.rank
123 | indices = indices[offset:offset + self.num_samples]
124 | assert len(indices) == self.num_samples
125 |
126 | return iter(indices)
127 |
128 | def __len__(self):
129 | return self.num_samples
130 |
131 | def set_epoch(self, epoch):
132 | self.epoch = epoch
133 |
--------------------------------------------------------------------------------
/mmdet/datasets/transforms.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 | import numpy as np
3 | import torch
4 |
5 | __all__ = ['ImageTransform', 'BboxTransform', 'MaskTransform', 'Numpy2Tensor']
6 |
7 |
8 | class ImageTransform(object):
9 | """Preprocess an image.
10 |
11 | 1. rescale the image to expected size
12 | 2. normalize the image
13 | 3. flip the image (if needed)
14 | 4. pad the image (if needed)
15 | 5. transpose to (c, h, w)
16 | """
17 |
18 | def __init__(self,
19 | mean=(0, 0, 0),
20 | std=(1, 1, 1),
21 | to_rgb=True,
22 | size_divisor=None):
23 | self.mean = np.array(mean, dtype=np.float32)
24 | self.std = np.array(std, dtype=np.float32)
25 | self.to_rgb = to_rgb
26 | self.size_divisor = size_divisor
27 |
28 | def __call__(self, img, scale, flip=False):
29 | img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)
30 | img_shape = img.shape
31 | img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
32 | if flip:
33 | img = mmcv.imflip(img)
34 | if self.size_divisor is not None:
35 | img = mmcv.impad_to_multiple(img, self.size_divisor)
36 | pad_shape = img.shape
37 | else:
38 | pad_shape = img_shape
39 | img = img.transpose(2, 0, 1)
40 | return img, img_shape, pad_shape, scale_factor
41 |
42 |
43 | def bbox_flip(bboxes, img_shape):
44 | """Flip bboxes horizontally.
45 |
46 | Args:
47 | bboxes(ndarray): shape (..., 4*k)
48 | img_shape(tuple): (height, width)
49 | """
50 | assert bboxes.shape[-1] % 4 == 0
51 | w = img_shape[1]
52 | flipped = bboxes.copy()
53 | flipped[..., 0::4] = w - bboxes[..., 2::4] - 1
54 | flipped[..., 2::4] = w - bboxes[..., 0::4] - 1
55 | return flipped
56 |
57 |
58 | class BboxTransform(object):
59 | """Preprocess gt bboxes.
60 |
61 | 1. rescale bboxes according to image size
62 | 2. flip bboxes (if needed)
63 | 3. pad the first dimension to `max_num_gts`
64 | """
65 |
66 | def __init__(self, max_num_gts=None):
67 | self.max_num_gts = max_num_gts
68 |
69 | def __call__(self, bboxes, img_shape, scale_factor, flip=False):
70 | gt_bboxes = bboxes * scale_factor
71 | if flip:
72 | gt_bboxes = bbox_flip(gt_bboxes, img_shape)
73 | gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1])
74 | gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0])
75 | if self.max_num_gts is None:
76 | return gt_bboxes
77 | else:
78 | num_gts = gt_bboxes.shape[0]
79 | padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32)
80 | padded_bboxes[:num_gts, :] = gt_bboxes
81 | return padded_bboxes
82 |
83 |
84 | class MaskTransform(object):
85 | """Preprocess masks.
86 |
87 | 1. resize masks to expected size and stack to a single array
88 | 2. flip the masks (if needed)
89 | 3. pad the masks (if needed)
90 | """
91 |
92 | def __call__(self, masks, pad_shape, scale_factor, flip=False):
93 | masks = [
94 | mmcv.imrescale(mask, scale_factor, interpolation='nearest')
95 | for mask in masks
96 | ]
97 | if flip:
98 | masks = [mask[:, ::-1] for mask in masks]
99 | padded_masks = [
100 | mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks
101 | ]
102 | padded_masks = np.stack(padded_masks, axis=0)
103 | return padded_masks
104 |
105 |
106 | class Numpy2Tensor(object):
107 |
108 | def __init__(self):
109 | pass
110 |
111 | def __call__(self, *args):
112 | if len(args) == 1:
113 | return torch.from_numpy(args[0])
114 | else:
115 | return tuple([torch.from_numpy(np.array(array)) for array in args])
--------------------------------------------------------------------------------
/mmdet/datasets/utils.py:
--------------------------------------------------------------------------------
1 | import copy
2 | from collections import Sequence
3 |
4 | import mmcv
5 | from mmcv.runner import obj_from_dict
6 | import torch
7 | from collections import defaultdict
8 | import matplotlib.pyplot as plt
9 | import numpy as np
10 | from .concat_dataset import ConcatDataset
11 | from .. import datasets
12 | from mmdet.core.point_cloud import voxel_generator
13 | from mmdet.core.point_cloud import point_augmentor
14 | from mmdet.core.bbox3d import bbox3d_target
15 | from mmdet.core.anchor import anchor3d_generator
16 | def to_tensor(data):
17 | """Convert objects of various python types to :obj:`torch.Tensor`.
18 |
19 | Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
20 | :class:`Sequence`, :class:`int` and :class:`float`.
21 | """
22 | if isinstance(data, torch.Tensor):
23 | return data
24 | elif isinstance(data, np.ndarray):
25 | return torch.from_numpy(data)
26 | elif isinstance(data, Sequence) and not mmcv.is_str(data):
27 | return [to_tensor(d) for d in data]
28 | elif isinstance(data, int):
29 | return torch.LongTensor([data])
30 | elif isinstance(data, float):
31 | return torch.FloatTensor([data])
32 | elif data is None:
33 | return data
34 | else:
35 | raise TypeError('type {} cannot be converted to tensor.'.format(
36 | type(data)))
37 |
38 |
39 | def random_scale(img_scales, mode='range'):
40 | """Randomly select a scale from a list of scales or scale ranges.
41 |
42 | Args:
43 | img_scales (list[tuple]): Image scale or scale range.
44 | mode (str): "range" or "value".
45 |
46 | Returns:
47 | tuple: Sampled image scale.
48 | """
49 | num_scales = len(img_scales)
50 | if num_scales == 1: # fixed scale is specified
51 | img_scale = img_scales[0]
52 | elif num_scales == 2: # randomly sample a scale
53 | if mode == 'range':
54 | img_scale_long = [max(s) for s in img_scales]
55 | img_scale_short = [min(s) for s in img_scales]
56 | long_edge = np.random.randint(
57 | min(img_scale_long),
58 | max(img_scale_long) + 1)
59 | short_edge = np.random.randint(
60 | min(img_scale_short),
61 | max(img_scale_short) + 1)
62 | img_scale = (long_edge, short_edge)
63 | elif mode == 'value':
64 | img_scale = img_scales[np.random.randint(num_scales)]
65 | else:
66 | if mode != 'value':
67 | raise ValueError(
68 | 'Only "value" mode supports more than 2 image scales')
69 | img_scale = img_scales[np.random.randint(num_scales)]
70 | return img_scale
71 |
72 |
73 | def show_ann(coco, img, ann_info):
74 | plt.imshow(mmcv.bgr2rgb(img))
75 | plt.axis('off')
76 | coco.showAnns(ann_info)
77 | plt.show()
78 |
79 |
80 | def get_dataset(data_cfg):
81 |
82 | if isinstance(data_cfg['ann_file'], (list, tuple)):
83 | ann_files = data_cfg['ann_file']
84 | num_dset = len(ann_files)
85 | else:
86 | ann_files = [data_cfg['ann_file']]
87 | num_dset = 1
88 |
89 | if isinstance(data_cfg['img_prefix'], (list, tuple)):
90 | img_prefixes = data_cfg['img_prefix']
91 | else:
92 | img_prefixes = [data_cfg['img_prefix']] * num_dset
93 | assert len(img_prefixes) == num_dset
94 |
95 | if 'generator' in data_cfg.keys() and data_cfg['generator'] is not None:
96 | generator = obj_from_dict(data_cfg['generator'], voxel_generator)
97 | else:
98 | generator = None
99 |
100 | if 'augmentor' in data_cfg.keys() and data_cfg['augmentor'] is not None:
101 | augmentor = obj_from_dict(data_cfg['augmentor'], point_augmentor)
102 | else:
103 | augmentor = None
104 |
105 | if 'anchor_generator' in data_cfg.keys() and data_cfg['anchor_generator'] is not None:
106 | anchor_generator = {cls: obj_from_dict(cfg, anchor3d_generator) for cls, cfg in data_cfg['anchor_generator'].items()}
107 | else:
108 | anchor_generator = None
109 |
110 | dsets = []
111 | for i in range(num_dset):
112 | data_info = copy.deepcopy(data_cfg)
113 | data_info['ann_file'] = ann_files[i]
114 | data_info['img_prefix'] = img_prefixes[i]
115 | if generator is not None:
116 | data_info['generator'] = generator
117 | if anchor_generator is not None:
118 | data_info['anchor_generator'] = anchor_generator
119 | if augmentor is not None:
120 | data_info['augmentor'] = augmentor
121 | dset = obj_from_dict(data_info, datasets)
122 | dsets.append(dset)
123 | if len(dsets) > 1:
124 | dset = ConcatDataset(dsets)
125 | else:
126 | dset = dsets[0]
127 | return dset
128 |
129 | # def example_convert_to_torch(example, device=None) -> dict:
130 | # example_torch = {}
131 | # torch_names = [
132 | # 'img', 'voxels','coordinates',\
133 | # # 'anchors_mask','anchors',\
134 | # #'gt_labels','gt_bboxes','gt_bboxes_ignore',\
135 | # 'num_points', 'right', 'grid'
136 | # ]
137 | # for k, v in example.items():
138 | # if k in torch_names:
139 | # example_torch[k] = to_tensor(v)
140 | # else:
141 | # example_torch[k] = v
142 | #
143 | # return example_torch
144 |
145 | # def merge_second_batch(batch_list, samples_per_gpu=1, to_torch=True):
146 | # example_merged = defaultdict(list)
147 | # for example in batch_list:
148 | # for k, v in example.items():
149 | # example_merged[k].append(v)
150 | # ret = {}
151 | #
152 | # for key, elems in example_merged.items():
153 | # if key in [
154 | # 'voxels', 'num_points',
155 | # ]:
156 | # ret[key] = np.concatenate(elems, axis=0)
157 | # elif key == 'coordinates':
158 | # coors = []
159 | # for i, coor in enumerate(elems):
160 | # coor_pad = np.pad(
161 | # coor, ((0, 0), (1, 0)),
162 | # mode='constant',
163 | # constant_values=i)
164 | # coors.append(coor_pad)
165 | # ret[key] = np.concatenate(coors, axis=0)
166 | # elif key in [
167 | # 'img_meta', 'img_shape', 'calib', 'sample_idx', 'gt_labels', 'gt_bboxes','gt_bboxes_ignore'
168 | # ]:
169 | # ret[key] = elems
170 | # else:
171 | # ret[key] = np.stack(elems, axis=0)
172 | #
173 | # if to_torch:
174 | # ret = example_convert_to_torch(ret)
175 | # return ret
--------------------------------------------------------------------------------
/mmdet/datasets/voc.py:
--------------------------------------------------------------------------------
1 | from .xml_style import XMLDataset
2 |
3 |
4 | class VOCDataset(XMLDataset):
5 |
6 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
7 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
8 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
9 | 'tvmonitor')
10 |
11 | def __init__(self, **kwargs):
12 | super(VOCDataset, self).__init__(**kwargs)
13 | if 'VOC2007' in self.img_prefix:
14 | self.year = 2007
15 | elif 'VOC2012' in self.img_prefix:
16 | self.year = 2012
17 | else:
18 | raise ValueError('Cannot infer dataset year from img_prefix')
--------------------------------------------------------------------------------
/mmdet/datasets/xml_style.py:
--------------------------------------------------------------------------------
1 | import os.path as osp
2 | import xml.etree.ElementTree as ET
3 |
4 | import mmcv
5 | import numpy as np
6 |
7 | from .custom import CustomDataset
8 |
9 |
10 | class XMLDataset(CustomDataset):
11 |
12 | def __init__(self, **kwargs):
13 | super(XMLDataset, self).__init__(**kwargs)
14 | self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)}
15 |
16 | def load_annotations(self, ann_file):
17 | img_infos = []
18 | img_ids = mmcv.list_from_file(ann_file)
19 | for img_id in img_ids:
20 | filename = 'JPEGImages/{}.jpg'.format(img_id)
21 | xml_path = osp.join(self.img_prefix, 'Annotations',
22 | '{}.xml'.format(img_id))
23 | tree = ET.parse(xml_path)
24 | root = tree.getroot()
25 | size = root.find('size')
26 | width = int(size.find('width').text)
27 | height = int(size.find('height').text)
28 | img_infos.append(
29 | dict(id=img_id, filename=filename, width=width, height=height))
30 | return img_infos
31 |
32 | def get_ann_info(self, idx):
33 | img_id = self.img_infos[idx]['id']
34 | xml_path = osp.join(self.img_prefix, 'Annotations',
35 | '{}.xml'.format(img_id))
36 | tree = ET.parse(xml_path)
37 | root = tree.getroot()
38 | bboxes = []
39 | labels = []
40 | bboxes_ignore = []
41 | labels_ignore = []
42 | for obj in root.findall('object'):
43 | name = obj.find('name').text
44 | label = self.cat2label[name]
45 | difficult = int(obj.find('difficult').text)
46 | bnd_box = obj.find('bndbox')
47 | bbox = [
48 | int(bnd_box.find('xmin').text),
49 | int(bnd_box.find('ymin').text),
50 | int(bnd_box.find('xmax').text),
51 | int(bnd_box.find('ymax').text)
52 | ]
53 | if difficult:
54 | bboxes_ignore.append(bbox)
55 | labels_ignore.append(label)
56 | else:
57 | bboxes.append(bbox)
58 | labels.append(label)
59 | if not bboxes:
60 | bboxes = np.zeros((0, 4))
61 | labels = np.zeros((0, ))
62 | else:
63 | bboxes = np.array(bboxes, ndmin=2) - 1
64 | labels = np.array(labels)
65 | if not bboxes_ignore:
66 | bboxes_ignore = np.zeros((0, 4))
67 | labels_ignore = np.zeros((0, ))
68 | else:
69 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
70 | labels_ignore = np.array(labels_ignore)
71 | ann = dict(
72 | bboxes=bboxes.astype(np.float32),
73 | labels=labels.astype(np.int64),
74 | bboxes_ignore=bboxes_ignore.astype(np.float32),
75 | labels_ignore=labels_ignore.astype(np.int64))
76 | return ann
--------------------------------------------------------------------------------
/mmdet/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .detectors import (BaseDetector,RPN)
2 | from .builder import (build_neck, build_rpn_head, build_roi_extractor,build_backbone,
3 | build_bbox_head, build_mask_head, build_detector)
4 |
5 | __all__ = [
6 | 'BaseDetector', 'RPN', 'build_backbone', 'build_neck', 'build_rpn_head',
7 | 'build_roi_extractor', 'build_bbox_head', 'build_mask_head',
8 | 'build_detector'
9 | ]
10 |
--------------------------------------------------------------------------------
/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import ResNet
2 | from .vxnet import *
3 | from .pillar import *
4 | __all__ = ['ResNet','VoxelFeatNet','SimpleVoxel', 'PillarFeatureNet', 'PointPillarsScatter']
5 |
--------------------------------------------------------------------------------
/mmdet/models/backbones/pillar.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.nn import functional as F
4 | from ..utils import change_default_args, Empty, get_paddings_indicator
5 |
6 |
7 | class PFNLayer(nn.Module):
8 | def __init__(self,
9 | in_channels,
10 | out_channels,
11 | use_norm=True,
12 | last_layer=False):
13 | super(PFNLayer, self).__init__()
14 | self.name = 'PFNLayer'
15 | self.last_vfe = last_layer
16 | if not self.last_vfe:
17 | out_channels = out_channels // 2
18 | self.units = out_channels
19 |
20 | if use_norm:
21 | BatchNorm1d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm1d)
22 | Linear = change_default_args(bias=False)(nn.Linear)
23 | else:
24 | BatchNorm1d = Empty
25 | Linear = change_default_args(bias=True)(nn.Linear)
26 |
27 | self.linear = Linear(in_channels, self.units)
28 | self.norm = BatchNorm1d(self.units)
29 |
30 | def forward(self, inputs):
31 |
32 | x = self.linear(inputs)
33 | x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2, 1).contiguous()
34 | x = F.relu(x)
35 | x_max = torch.max(x, dim=1, keepdim=True)[0]
36 |
37 | if self.last_vfe:
38 | return x_max
39 | else:
40 | x_repeat = x_max.repeat(1, inputs.shape[1], 1)
41 | x_concatenated = torch.cat([x, x_repeat], dim=2)
42 | return x_concatenated
43 |
44 |
45 | class PillarFeatureNet(nn.Module):
46 | def __init__(self,
47 | num_input_features=4,
48 | use_norm=True,
49 | num_filters=(64,),
50 | with_distance=False,
51 | voxel_size=(0.2, 0.2, 4),
52 | pc_range=(0, -40, -3, 70.4, 40, 1)
53 | ):
54 | super(PillarFeatureNet, self).__init__()
55 | self.name = 'PillarFeatureNet'
56 | assert len(num_filters) > 0
57 | num_input_features += 5
58 | if with_distance:
59 | num_input_features += 1
60 | self._with_distance = with_distance
61 |
62 | # Create PillarFeatureNet layers
63 | num_filters = [num_input_features] + list(num_filters)
64 | pfn_layers = []
65 | for i in range(len(num_filters) - 1):
66 | in_filters = num_filters[i]
67 | out_filters = num_filters[i + 1]
68 | if i < len(num_filters) - 2:
69 | last_layer = False
70 | else:
71 | last_layer = True
72 | pfn_layers.append(PFNLayer(in_filters, out_filters, use_norm, last_layer=last_layer))
73 | self.pfn_layers = nn.ModuleList(pfn_layers)
74 |
75 | # Need pillar (voxel) size and x/y offset in order to calculate pillar offset
76 | self.vx = voxel_size[0]
77 | self.vy = voxel_size[1]
78 | self.x_offset = self.vx / 2 + pc_range[0]
79 | self.y_offset = self.vy / 2 + pc_range[1]
80 |
81 | nx = int((pc_range[3] - pc_range[0]) / self.vx)
82 | ny = int((pc_range[4] - pc_range[1]) / self.vy)
83 | self.scatter = PointPillarsScatter(nx, ny)
84 |
85 | def forward(self, features, coors, num_voxels, batch_size):
86 |
87 | # Find distance of x, y, and z from cluster center
88 | points_mean = features[:, :, :3].sum(dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
89 | f_cluster = features[:, :, :3] - points_mean
90 |
91 | # Find distance of x, y, and z from pillar center
92 | f_center = torch.zeros_like(features[:, :, :2])
93 | f_center[:, :, 0] = features[:, :, 0] - (coors[:, 3].float().unsqueeze(1) * self.vx + self.x_offset)
94 | f_center[:, :, 1] = features[:, :, 1] - (coors[:, 2].float().unsqueeze(1) * self.vy + self.y_offset)
95 |
96 | # Combine together feature decorations
97 | features_ls = [features, f_cluster, f_center]
98 | if self._with_distance:
99 | points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
100 | features_ls.append(points_dist)
101 | features = torch.cat(features_ls, dim=-1)
102 |
103 | # The feature decorations were calculated without regard to whether pillar was empty. Need to ensure that
104 | # empty pillars remain set to zeros.
105 | voxel_count = features.shape[1]
106 | mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
107 | mask = torch.unsqueeze(mask, -1).type_as(features)
108 | features *= mask
109 |
110 | # Forward pass through PFNLayers
111 | for pfn in self.pfn_layers:
112 | features = pfn(features)
113 |
114 | return self.scatter(features.squeeze(), coors, batch_size)
115 |
116 |
117 | class PointPillarsScatter(nn.Module):
118 | def __init__(self,
119 | nx, ny,
120 | num_input_features=64):
121 | """
122 | Point Pillar's Scatter.
123 | Converts learned features from dense tensor to sparse pseudo image. This replaces SECOND's
124 | second.pytorch.voxelnet.SparseMiddleExtractor.
125 | :param output_shape: ([int]: 4). Required output shape of features.
126 | :param num_input_features: . Number of input features.
127 | """
128 | super(PointPillarsScatter, self).__init__()
129 | self.name = 'PointPillarsScatter'
130 | self.nx = nx
131 | self.ny = ny
132 | self.nchannels = num_input_features
133 |
134 | def forward(self, voxel_features, coords, batch_size):
135 | # batch_canvas will be the final output.
136 | batch_canvas = []
137 | for batch_itt in range(batch_size):
138 | # Create the canvas for this sample
139 | canvas = torch.zeros(self.nchannels, self.nx * self.ny, dtype=voxel_features.dtype,
140 | device=voxel_features.device)
141 |
142 | # Only include non-empty pillars
143 | batch_mask = coords[:, 0] == batch_itt
144 | this_coords = coords[batch_mask, :]
145 | indices = this_coords[:, 2] * self.nx + this_coords[:, 3]
146 | indices = indices.type(torch.long)
147 | voxels = voxel_features[batch_mask, :]
148 | voxels = voxels.t()
149 |
150 | # Now scatter the blob back to the canvas.
151 | canvas[:, indices] = voxels
152 |
153 | # Append to a list for later stacking.
154 | batch_canvas.append(canvas)
155 |
156 | # Stack to 3-dim tensor (batch-size, nchannels, nrows*ncols)
157 | batch_canvas = torch.stack(batch_canvas, 0)
158 |
159 | # Undo the column stacking to final 4-dim tensor
160 | batch_canvas = batch_canvas.view(batch_size, self.nchannels, self.ny, self.nx)
161 |
162 | return batch_canvas
163 |
--------------------------------------------------------------------------------
/mmdet/models/backbones/vxnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from torch.nn import functional as F
4 | from ..utils import change_default_args, Empty, get_paddings_indicator
5 |
6 |
7 | class VFELayer(nn.Module):
8 | def __init__(self, in_channels, out_channels, use_norm=True, name='vfe'):
9 | super(VFELayer, self).__init__()
10 | self.name = name
11 | self.units = int(out_channels / 2)
12 | if use_norm:
13 | BatchNorm1d = change_default_args(
14 | eps=1e-3, momentum=0.01)(nn.BatchNorm1d)
15 | Linear = change_default_args(bias=False)(nn.Linear)
16 | else:
17 | BatchNorm1d = Empty
18 | Linear = change_default_args(bias=True)(nn.Linear)
19 | self.linear = Linear(in_channels, self.units)
20 | self.norm = BatchNorm1d(self.units)
21 |
22 | def forward(self, inputs):
23 | # [K, T, 7] tensordot [7, units] = [K, T, units]
24 | voxel_count = inputs.shape[1]
25 | x = self.linear(inputs)
26 | x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,
27 | 1).contiguous()
28 | pointwise = F.relu(x)
29 | # [K, T, units]
30 |
31 | aggregated = torch.max(pointwise, dim=1, keepdim=True)[0]
32 | # [K, 1, units]
33 | repeated = aggregated.repeat(1, voxel_count, 1)
34 |
35 | concatenated = torch.cat([pointwise, repeated], dim=2)
36 | # [K, T, 2 * units]
37 | return concatenated
38 |
39 | class VoxelFeatNet(nn.Module):
40 | def __init__(self,
41 | num_input_features=4,
42 | use_norm=True,
43 | num_filters=[32, 128],
44 | with_distance=False,
45 | name='VoxelFeatureExtractor'):
46 | super(VoxelFeatNet, self).__init__()
47 | self.name = name
48 | if use_norm:
49 | BatchNorm1d = change_default_args(
50 | eps=1e-3, momentum=0.01)(nn.BatchNorm1d)
51 | Linear = change_default_args(bias=False)(nn.Linear)
52 | else:
53 | BatchNorm1d = Empty
54 | Linear = change_default_args(bias=True)(nn.Linear)
55 | assert len(num_filters) == 2
56 | num_input_features += 3 # add mean features
57 | if with_distance:
58 | num_input_features += 1
59 | self._with_distance = with_distance
60 | self.vfe1 = VFELayer(num_input_features, num_filters[0], use_norm)
61 | self.vfe2 = VFELayer(num_filters[0], num_filters[1], use_norm)
62 | self.linear = Linear(num_filters[1], num_filters[1])
63 | # var_torch_init(self.linear.weight)
64 | # var_torch_init(self.linear.bias)
65 | self.norm = BatchNorm1d(num_filters[1])
66 |
67 | def init_weights(self, pretrained=None):
68 | pass
69 |
70 | def forward(self, features, num_voxels):
71 | # features: [concated_num_points, num_voxel_size, 3(4)]
72 | # num_voxels: [concated_num_points]
73 | points_mean = features[:, :, :3].sum(
74 | dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
75 | features_relative = features[:, :, :3] - points_mean
76 | if self._with_distance:
77 | points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
78 | features = torch.cat(
79 | [features, features_relative, points_dist], dim=-1)
80 | else:
81 | features = torch.cat([features, features_relative], dim=-1)
82 | voxel_count = features.shape[1]
83 | mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
84 | mask = torch.unsqueeze(mask, -1).type_as(features)
85 | # mask = features.max(dim=2, keepdim=True)[0] != 0
86 | x = self.vfe1(features)
87 | x *= mask
88 | x = self.vfe2(x)
89 | x *= mask
90 | x = self.linear(x)
91 | x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,
92 | 1).contiguous()
93 | x = F.relu(x)
94 | x *= mask
95 | # x: [concated_num_points, num_voxel_size, 128]
96 | voxelwise = torch.max(x, dim=1)[0]
97 | return voxelwise
98 |
99 | class SimpleVoxel(nn.Module):
100 | def __init__(self,
101 | num_input_features=4,
102 | use_norm=True,
103 | num_filters=[32, 128],
104 | with_distance=False,
105 | name='VoxelFeatureExtractor'):
106 | super(SimpleVoxel, self).__init__()
107 | self.name = name
108 | self.num_input_features = num_input_features
109 |
110 | def forward(self, features, num_voxels):
111 | #return features
112 | # features: [concated_num_points, num_voxel_size, 3(4)]
113 | # num_voxels: [concated_num_points]
114 | points_mean = features[:, :, :self.num_input_features].sum(
115 | dim=1, keepdim=False) / num_voxels.type_as(features).view(-1, 1)
116 | return points_mean.contiguous()
117 |
118 |
119 |
--------------------------------------------------------------------------------
/mmdet/models/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_head import BBoxHead
2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead
3 | from ..single_stage_heads import PSWarpHead
4 | __all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'PSWarpHead']
5 |
--------------------------------------------------------------------------------
/mmdet/models/builder.py:
--------------------------------------------------------------------------------
1 | from mmcv.runner import obj_from_dict
2 | from torch import nn
3 |
4 | from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads,
5 | mask_heads, single_stage_heads)
6 |
7 | __all__ = [
8 | 'build_backbone', 'build_neck', 'build_rpn_head', 'build_roi_extractor',
9 | 'build_bbox_head', 'build_mask_head', 'build_single_stage_head','build_detector',
10 | ]
11 |
12 |
13 | def _build_module(cfg, parrent=None, default_args=None):
14 | return cfg if isinstance(cfg, nn.Module) else obj_from_dict(
15 | cfg, parrent, default_args)
16 |
17 |
18 | def build(cfg, parrent=None, default_args=None):
19 | if isinstance(cfg, list):
20 | modules = [_build_module(cfg_, parrent, default_args) for cfg_ in cfg]
21 | return nn.Sequential(*modules)
22 | else:
23 | return _build_module(cfg, parrent, default_args)
24 |
25 |
26 | def build_backbone(cfg):
27 | return build(cfg, backbones)
28 |
29 |
30 | def build_neck(cfg):
31 | return build(cfg, necks)
32 |
33 |
34 | def build_rpn_head(cfg):
35 | return build(cfg, rpn_heads)
36 |
37 |
38 | def build_roi_extractor(cfg):
39 | return build(cfg, roi_extractors)
40 |
41 |
42 | def build_bbox_head(cfg):
43 | return build(cfg, bbox_heads)
44 |
45 |
46 | def build_mask_head(cfg):
47 | return build(cfg, mask_heads)
48 |
49 |
50 | def build_single_stage_head(cfg):
51 | return build(cfg, single_stage_heads)
52 |
53 |
54 | def build_detector(cfg, train_cfg=None, test_cfg=None):
55 | from . import detectors
56 | return build(cfg, detectors, dict(train_cfg=train_cfg, test_cfg=test_cfg))
57 |
--------------------------------------------------------------------------------
/mmdet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseDetector
2 | from .single_stage import SingleStageDetector
3 | from .rpn import RPN
4 | from .pointpillars import PointPillars
5 |
6 | __all__ = [
7 | 'BaseDetector', 'SingleStageDetector', 'RPN', 'PointPillars',
8 | ]
9 |
--------------------------------------------------------------------------------
/mmdet/models/detectors/base.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from abc import ABCMeta, abstractmethod
3 |
4 | import mmcv
5 | import numpy as np
6 | import torch.nn as nn
7 |
8 | from mmdet.core import tensor2imgs, get_classes
9 |
10 |
11 | class BaseDetector(nn.Module):
12 | """Base class for detectors"""
13 |
14 | __metaclass__ = ABCMeta
15 |
16 | def __init__(self):
17 | super(BaseDetector, self).__init__()
18 |
19 | @property
20 | def with_neck(self):
21 | return hasattr(self, 'neck') and self.neck is not None
22 |
23 | @property
24 | def with_bbox(self):
25 | return hasattr(self, 'bbox_head') and self.bbox_head is not None
26 |
27 | @property
28 | def with_mask(self):
29 | return hasattr(self, 'mask_head') and self.mask_head is not None
30 |
31 | @abstractmethod
32 | def extract_feat(self, imgs):
33 | pass
34 |
35 | def extract_feats(self, imgs):
36 | assert isinstance(imgs, list)
37 | for img in imgs:
38 | yield self.extract_feat(img)
39 |
40 | @abstractmethod
41 | def forward_train(self, imgs, img_metas, **kwargs):
42 | pass
43 |
44 | @abstractmethod
45 | def simple_test(self, img, img_meta, **kwargs):
46 | pass
47 |
48 | @abstractmethod
49 | def aug_test(self, imgs, img_metas, **kwargs):
50 | pass
51 |
52 | def init_weights(self, pretrained=None):
53 | if pretrained is not None:
54 | logger = logging.getLogger()
55 | logger.info('load model from: {}'.format(pretrained))
56 |
57 | def forward_test(self, imgs, img_metas, **kwargs):
58 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
59 | if not isinstance(var, list):
60 | raise TypeError('{} must be a list, but got {}'.format(
61 | name, type(var)))
62 |
63 | num_augs = len(imgs)
64 | if num_augs != len(img_metas):
65 | raise ValueError(
66 | 'num of augmentations ({}) != num of image meta ({})'.format(
67 | len(imgs), len(img_metas)))
68 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared
69 | imgs_per_gpu = imgs[0].size(0)
70 | assert imgs_per_gpu == 1
71 |
72 | if num_augs == 1:
73 | return self.simple_test(imgs[0], img_metas[0], **kwargs)
74 | else:
75 | return self.aug_test(imgs, img_metas, **kwargs)
76 |
77 | def forward(self, img, img_meta, return_loss=True, **kwargs):
78 | if return_loss:
79 | return self.forward_train(img, img_meta, **kwargs)
80 | else:
81 | return self.forward_test(img, img_meta, **kwargs)
82 |
83 | def show_result(self,
84 | data,
85 | result,
86 | img_norm_cfg,
87 | dataset='coco',
88 | score_thr=0.3):
89 | img_tensor = data['img'][0]
90 | img_metas = data['img_meta'][0].data[0]
91 | imgs = tensor2imgs(img_tensor, **img_norm_cfg)
92 | assert len(imgs) == len(img_metas)
93 |
94 | if isinstance(dataset, str):
95 | class_names = get_classes(dataset)
96 | elif isinstance(dataset, list):
97 | class_names = dataset
98 | else:
99 | raise TypeError('dataset must be a valid dataset name or a list'
100 | ' of class names, not {}'.format(type(dataset)))
101 |
102 | for img, img_meta in zip(imgs, img_metas):
103 | h, w, _ = img_meta['img_shape']
104 | img_show = img[:h, :w, :]
105 | labels = [
106 | np.full(bbox.shape[0], i, dtype=np.int32)
107 | for i, bbox in enumerate(result)
108 | ]
109 | labels = np.concatenate(labels)
110 | bboxes = np.vstack(result)
111 | mmcv.imshow_det_bboxes(
112 | img_show,
113 | bboxes,
114 | labels,
115 | class_names=class_names,
116 | score_thr=score_thr)
117 |
--------------------------------------------------------------------------------
/mmdet/models/detectors/pointpillars.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import torch
3 | from .. import builder
4 | from mmcv.runner import load_checkpoint
5 | from .base import BaseDetector
6 | import torch.nn.functional as F
7 |
8 | class PointPillars(BaseDetector):
9 |
10 | def __init__(self,
11 | backbone,
12 | neck,
13 | rpn_head=None,
14 | bbox_head=None,
15 | rcnn_head=None,
16 | train_cfg=None,
17 | test_cfg=None,
18 | pretrained=None):
19 | super(PointPillars, self).__init__()
20 | self.backbone = builder.build_backbone(backbone)
21 | self.neck = builder.build_neck(neck)
22 | self.bbox_head = builder.build_single_stage_head(bbox_head)
23 |
24 | if rpn_head is not None:
25 | self.rpn_head = builder.build_rpn_head(rpn_head)
26 |
27 | self.train_cfg = train_cfg
28 | self.test_cfg = test_cfg
29 |
30 | if rcnn_head is not None:
31 | self.rcnn_head = builder.build_bbox_head(rcnn_head)
32 |
33 | self.init_weights(pretrained=pretrained)
34 |
35 | @property
36 | def with_rpn(self):
37 | return hasattr(self, 'rpn_head') and self.rpn_head is not None
38 |
39 | def init_weights(self, pretrained=None):
40 | if isinstance(pretrained, str):
41 | logger = logging.getLogger()
42 | load_checkpoint(self, pretrained, strict=False, logger=logger)
43 | def freeze_layers(self, model):
44 | for param in model.parameters():
45 | param.requires_grad = False
46 |
47 | def merge_second_batch(self, batch_args):
48 | ret = {}
49 | for key, elems in batch_args.items():
50 | if key in [
51 | 'voxels', 'num_points',
52 | ]:
53 | ret[key] = torch.cat(elems, dim=0)
54 | elif key == 'coordinates':
55 | coors = []
56 | for i, coor in enumerate(elems):
57 | coor_pad = F.pad(
58 | coor, [1, 0, 0, 0],
59 | mode='constant',
60 | value=i)
61 | coors.append(coor_pad)
62 | ret[key] = torch.cat(coors, dim=0)
63 | elif key in [
64 | 'img_meta', 'gt_labels', 'gt_bboxes',
65 | ]:
66 | ret[key] = elems
67 | else:
68 | ret[key] = torch.stack(elems, dim=0)
69 | return ret
70 |
71 | def forward_train(self, img, img_meta, **kwargs):
72 |
73 | batch_size = len(img_meta)
74 | ret = self.merge_second_batch(kwargs)
75 |
76 | losses = dict()
77 |
78 | canvas = self.backbone(ret['voxels'], ret['coordinates'], ret['num_points'], batch_size)
79 |
80 | x = self.neck(canvas)
81 |
82 | bbox_outs = self.bbox_head(x)
83 | bbox_loss_inputs = bbox_outs + (ret['gt_bboxes'], ret['gt_labels'], ret['anchors'], ret['anchors_mask'], self.train_cfg)
84 | bbox_losses = self.bbox_head.loss(*bbox_loss_inputs)
85 | losses.update(bbox_losses)
86 |
87 | return losses
88 |
89 | def forward_test(self, img, img_meta, **kwargs):
90 |
91 | batch_size = len(img_meta)
92 | ret = self.merge_second_batch(kwargs)
93 | canvas = self.backbone(ret['voxels'], ret['coordinates'], ret['num_points'], batch_size)
94 | x = self.neck(canvas)
95 |
96 | rpn_outs = self.bbox_head.forward(x)
97 | proposal_inputs = rpn_outs + (ret['anchors'], ret['anchors_mask'], img_meta, self.test_cfg)
98 |
99 | return self.bbox_head.get_det_bboxes_nms(*proposal_inputs)
100 |
101 |
102 |
103 |
104 |
105 |
106 |
--------------------------------------------------------------------------------
/mmdet/models/detectors/rpn.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 |
3 | from mmdet.core import tensor2imgs, bbox_mapping
4 | from .base import BaseDetector
5 | from .test_mixins import RPNTestMixin
6 | from .. import builder
7 |
8 |
9 | class RPN(BaseDetector, RPNTestMixin):
10 |
11 | def __init__(self,
12 | backbone,
13 | neck,
14 | rpn_head,
15 | train_cfg,
16 | test_cfg,
17 | pretrained=None):
18 | super(RPN, self).__init__()
19 | self.backbone = builder.build_backbone(backbone)
20 | self.neck = builder.build_neck(neck) if neck is not None else None
21 | self.rpn_head = builder.build_rpn_head(rpn_head)
22 | self.train_cfg = train_cfg
23 | self.test_cfg = test_cfg
24 | self.init_weights(pretrained=pretrained)
25 |
26 | def init_weights(self, pretrained=None):
27 | super(RPN, self).init_weights(pretrained)
28 | self.backbone.init_weights(pretrained=pretrained)
29 | if self.with_neck:
30 | self.neck.init_weights()
31 | self.rpn_head.init_weights()
32 |
33 | def extract_feat(self, img):
34 | x = self.backbone(img)
35 | if self.with_neck:
36 | x = self.neck(x)
37 | return x
38 |
39 | def forward_train(self, img, img_meta, gt_bboxes=None):
40 | if self.train_cfg.rpn.get('debug', False):
41 | self.rpn_head.debug_imgs = tensor2imgs(img)
42 |
43 | x = self.extract_feat(img)
44 | rpn_outs = self.rpn_head(x)
45 |
46 | rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn)
47 | losses = self.rpn_head.loss(*rpn_loss_inputs)
48 | return losses
49 |
50 | def simple_test(self, img, img_meta, rescale=False):
51 | x = self.extract_feat(img)
52 | proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn)
53 | if rescale:
54 | for proposals, meta in zip(proposal_list, img_meta):
55 | proposals[:, :4] /= meta['scale_factor']
56 | # TODO: remove this restriction
57 | return proposal_list[0].cpu().numpy()
58 |
59 | def aug_test(self, imgs, img_metas, rescale=False):
60 | proposal_list = self.aug_test_rpn(
61 | self.extract_feats(imgs), img_metas, self.test_cfg.rpn)
62 | if not rescale:
63 | for proposals, img_meta in zip(proposal_list, img_metas[0]):
64 | img_shape = img_meta['img_shape']
65 | scale_factor = img_meta['scale_factor']
66 | flip = img_meta['flip']
67 | proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape,
68 | scale_factor, flip)
69 | # TODO: remove this restriction
70 | return proposal_list[0].cpu().numpy()
71 |
72 | def show_result(self, data, result, img_norm_cfg):
73 | """Show RPN proposals on the image.
74 |
75 | Although we assume batch size is 1, this method supports arbitrary
76 | batch size.
77 | """
78 | img_tensor = data['img'][0]
79 | img_metas = data['img_meta'][0].data[0]
80 | imgs = tensor2imgs(img_tensor, **img_norm_cfg)
81 | assert len(imgs) == len(img_metas)
82 | for img, img_meta in zip(imgs, img_metas):
83 | h, w, _ = img_meta['img_shape']
84 | img_show = img[:h, :w, :]
85 | mmcv.imshow_bboxes(img_show, result, top_k=20)
86 |
--------------------------------------------------------------------------------
/mmdet/models/detectors/single_stage.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import logging
4 | from mmcv.runner import load_checkpoint
5 | from .base import BaseDetector
6 | from .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin
7 | from .. import builder
8 | from mmdet.core import (assign_and_sample, bbox2roi, rbbox2roi, bbox2result, multi_apply, kitti_bbox2results,\
9 | tensor2points, delta2rbbox3d, weighted_binary_cross_entropy)
10 | import torch.nn.functional as F
11 |
12 |
13 | class SingleStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
14 | MaskTestMixin):
15 |
16 | def __init__(self,
17 | backbone,
18 | neck=None,
19 | bbox_head=None,
20 | extra_head=None,
21 | train_cfg=None,
22 | test_cfg=None,
23 | pretrained=None):
24 | super(SingleStageDetector, self).__init__()
25 | self.backbone = builder.build_backbone(backbone)
26 |
27 | if neck is not None:
28 | self.neck = builder.build_neck(neck)
29 | else:
30 | raise NotImplementedError
31 |
32 | if bbox_head is not None:
33 | self.rpn_head = builder.build_single_stage_head(bbox_head)
34 |
35 | if extra_head is not None:
36 | self.extra_head = builder.build_single_stage_head(extra_head)
37 |
38 | self.train_cfg = train_cfg
39 | self.test_cfg = test_cfg
40 |
41 | self.init_weights(pretrained)
42 |
43 | @property
44 | def with_rpn(self):
45 | return hasattr(self, 'rpn_head') and self.rpn_head is not None
46 |
47 | def init_weights(self, pretrained=None):
48 | if isinstance(pretrained, str):
49 | logger = logging.getLogger()
50 | load_checkpoint(self, pretrained, strict=False, logger=logger)
51 |
52 | def merge_second_batch(self, batch_args):
53 | ret = {}
54 | for key, elems in batch_args.items():
55 | if key in ['voxels', 'num_points', ]:
56 | ret[key] = torch.cat(elems, dim=0)
57 | elif key in ['coordinates', ]:
58 | coors = []
59 | for i, coor in enumerate(elems):
60 | coor_pad = F.pad(
61 | coor, [1, 0, 0, 0],
62 | mode='constant',
63 | value=i)
64 | coors.append(coor_pad)
65 | ret[key] = torch.cat(coors, dim=0)
66 | elif key in ['img_meta', 'gt_labels', 'gt_bboxes', 'gt_types', ]:
67 | ret[key] = elems
68 | else:
69 | if isinstance(elems, dict):
70 | ret[key] = {k: torch.stack(v, dim=0) for k, v in elems.items()}
71 | else:
72 | ret[key] = torch.stack(elems, dim=0)
73 | return ret
74 |
75 | def forward_train(self, img, img_meta, **kwargs):
76 |
77 | batch_size = len(img_meta)
78 |
79 | ret = self.merge_second_batch(kwargs)
80 |
81 | vx = self.backbone(ret['voxels'], ret['num_points'])
82 | x, conv6, point_misc = self.neck(vx, ret['coordinates'], batch_size, is_test=False)
83 |
84 | losses = dict()
85 |
86 | aux_loss = self.neck.aux_loss(*point_misc, gt_bboxes=ret['gt_bboxes'])
87 | losses.update(aux_loss)
88 |
89 | # RPN forward and loss
90 | if self.with_rpn:
91 | rpn_outs = self.rpn_head(x)
92 | rpn_loss_inputs = rpn_outs + (ret['gt_bboxes'], ret['gt_labels'], ret['gt_types'],\
93 | ret['anchors'], ret['anchors_mask'], self.train_cfg.rpn)
94 | rpn_losses = self.rpn_head.loss(*rpn_loss_inputs)
95 | losses.update(rpn_losses)
96 | guided_anchors, _ = self.rpn_head.get_guided_anchors(*rpn_outs, ret['anchors'],\
97 | ret['anchors_mask'], ret['gt_bboxes'], ret['gt_labels'], thr=self.train_cfg.rpn.anchor_thr)
98 | else:
99 | raise NotImplementedError
100 |
101 | # bbox head forward and loss
102 | if self.extra_head:
103 | bbox_score = self.extra_head(conv6, guided_anchors)
104 | refine_loss_inputs = (bbox_score, ret['gt_bboxes'], ret['gt_labels'], guided_anchors, self.train_cfg.extra)
105 | refine_losses = self.extra_head.loss(*refine_loss_inputs)
106 | losses.update(refine_losses)
107 |
108 | return losses
109 |
110 | def forward_test(self, img, img_meta, **kwargs):
111 |
112 | batch_size = len(img_meta)
113 |
114 | ret = self.merge_second_batch(kwargs)
115 |
116 | vx = self.backbone(ret['voxels'], ret['num_points'])
117 | (x, conv6) = self.neck(vx, ret['coordinates'], batch_size, is_test=True)
118 |
119 | rpn_outs = self.rpn_head.forward(x)
120 |
121 | guided_anchors, anchor_labels = self.rpn_head.get_guided_anchors(*rpn_outs, ret['anchors'], ret['anchors_mask'],
122 | None, None, thr=.1)
123 |
124 | bbox_score = self.extra_head(conv6, guided_anchors, is_test=True)
125 |
126 | det_bboxes, det_scores, det_labels = self.extra_head.get_rescore_bboxes(
127 | guided_anchors, bbox_score, anchor_labels, img_meta, self.test_cfg.extra)
128 |
129 | results = [kitti_bbox2results(*param, class_names=self.class_names) for param in zip(det_bboxes, det_scores, det_labels, img_meta)]
130 |
131 | return results
132 |
133 |
134 |
135 |
--------------------------------------------------------------------------------
/mmdet/models/detectors/test_mixins.py:
--------------------------------------------------------------------------------
1 | from mmdet.core import (bbox2roi, bbox_mapping, merge_aug_proposals,
2 | merge_aug_bboxes, merge_aug_masks)
3 |
4 | import numpy as np
5 |
6 | class RPNTestMixin(object):
7 |
8 | def simple_test_rpn(self, x, img_meta, rpn_test_cfg):
9 | rpn_outs = self.rpn_head(x)
10 | proposal_inputs = rpn_outs + (img_meta, rpn_test_cfg)
11 | proposal_list = self.rpn_head.get_proposals(*proposal_inputs)
12 | return proposal_list
13 |
14 | def aug_test_rpn(self, feats, img_metas, rpn_test_cfg):
15 | imgs_per_gpu = len(img_metas[0])
16 | aug_proposals = [[] for _ in range(imgs_per_gpu)]
17 | for x, img_meta in zip(feats, img_metas):
18 | proposal_list = self.simple_test_rpn(x, img_meta, rpn_test_cfg)
19 | for i, proposals in enumerate(proposal_list):
20 | aug_proposals[i].append(proposals)
21 | # after merging, proposals will be rescaled to the original image size
22 | merged_proposals = [
23 | merge_aug_proposals(proposals, img_meta, rpn_test_cfg)
24 | for proposals, img_meta in zip(aug_proposals, img_metas)
25 | ]
26 | return merged_proposals
27 |
28 |
29 | class BBoxTestMixin(object):
30 |
31 | def simple_test_bboxes(self,
32 | x,
33 | img_meta,
34 | proposals,
35 | rcnn_test_cfg,
36 | rescale=False):
37 | """Test only det bboxes without augmentation."""
38 | rois = bbox2roi(proposals)
39 | roi_feats = self.bbox_roi_extractor(
40 | x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
41 | cls_score, bbox_pred = self.bbox_head(roi_feats)
42 | img_shape = img_meta[0]['img_shape']
43 | scale_factor = img_meta[0]['scale_factor']
44 | det_bboxes, det_labels = self.bbox_head.get_det_bboxes_nms(
45 | rois,
46 | cls_score,
47 | bbox_pred,
48 | img_shape,
49 | scale_factor,
50 | rescale=rescale,
51 | cfg=rcnn_test_cfg)
52 | return det_bboxes, det_labels
53 |
54 | def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
55 | aug_bboxes = []
56 | aug_scores = []
57 | for x, img_meta in zip(feats, img_metas):
58 | # only one image in the batch
59 | img_shape = img_meta[0]['img_shape']
60 | scale_factor = img_meta[0]['scale_factor']
61 | flip = img_meta[0]['flip']
62 | # TODO more flexible
63 | proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
64 | scale_factor, flip)
65 | rois = bbox2roi([proposals])
66 | # recompute feature maps to save GPU memory
67 | roi_feats = self.bbox_roi_extractor(
68 | x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
69 | cls_score, bbox_pred = self.bbox_head(roi_feats)
70 | bboxes, scores = self.bbox_head.get_det_bboxes_nms(
71 | rois,
72 | cls_score,
73 | bbox_pred,
74 | img_shape,
75 | scale_factor,
76 | rescale=False,
77 | cfg=None)
78 | aug_bboxes.append(bboxes)
79 | aug_scores.append(scores)
80 | # after merging, bboxes will be rescaled to the original image size
81 | merged_bboxes, merged_scores = merge_aug_bboxes(
82 | aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
83 | det_bboxes, det_labels = multiclass_nms(
84 | merged_bboxes, merged_scores, rcnn_test_cfg.score_thr,
85 | rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img)
86 | return det_bboxes, det_labels
87 |
88 | class MaskTestMixin(object):
89 |
90 | def simple_test_mask(self,
91 | x,
92 | img_meta,
93 | det_bboxes,
94 | det_labels,
95 | rescale=False):
96 | # image shape of the first image in the batch (only one)
97 | ori_shape = img_meta[0]['ori_shape']
98 | scale_factor = img_meta[0]['scale_factor']
99 | if det_bboxes.shape[0] == 0:
100 | segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
101 | else:
102 | # if det_bboxes is rescaled to the original image size, we need to
103 | # rescale it back to the testing scale to obtain RoIs.
104 | _bboxes = (det_bboxes[:, :4] * scale_factor
105 | if rescale else det_bboxes)
106 | mask_rois = bbox2roi([_bboxes])
107 | mask_feats = self.mask_roi_extractor(
108 | x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois)
109 | mask_pred = self.mask_head(mask_feats)
110 | segm_result = self.mask_head.get_seg_masks(
111 | mask_pred, _bboxes, det_labels, self.test_cfg.rcnn, ori_shape,
112 | scale_factor, rescale)
113 | return segm_result
114 |
115 | def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels):
116 | if det_bboxes.shape[0] == 0:
117 | segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
118 | else:
119 | aug_masks = []
120 | for x, img_meta in zip(feats, img_metas):
121 | img_shape = img_meta[0]['img_shape']
122 | scale_factor = img_meta[0]['scale_factor']
123 | flip = img_meta[0]['flip']
124 | _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
125 | scale_factor, flip)
126 | mask_rois = bbox2roi([_bboxes])
127 | mask_feats = self.mask_roi_extractor(
128 | x[:len(self.mask_roi_extractor.featmap_strides)],
129 | mask_rois)
130 | mask_pred = self.mask_head(mask_feats)
131 | # convert to numpy array to save memory
132 | aug_masks.append(mask_pred.sigmoid().cpu().numpy())
133 | merged_masks = merge_aug_masks(aug_masks, img_metas,
134 | self.test_cfg.rcnn)
135 |
136 | ori_shape = img_metas[0][0]['ori_shape']
137 | segm_result = self.mask_head.get_seg_masks(
138 | merged_masks,
139 | det_bboxes,
140 | det_labels,
141 | self.test_cfg.rcnn,
142 | ori_shape,
143 | scale_factor=1.0,
144 | rescale=False)
145 | return segm_result
146 |
--------------------------------------------------------------------------------
/mmdet/models/mask_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcn_mask_head import FCNMaskHead
2 |
3 | __all__ = ['FCNMaskHead']
4 |
--------------------------------------------------------------------------------
/mmdet/models/mask_heads/fcn_mask_head.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 | import numpy as np
3 | import pycocotools.mask as mask_util
4 | import torch
5 | import torch.nn as nn
6 |
7 | from ..utils import ConvModule
8 | from mmdet.core import mask_cross_entropy, mask_target
9 |
10 |
11 | class FCNMaskHead(nn.Module):
12 |
13 | def __init__(self,
14 | num_convs=4,
15 | roi_feat_size=14,
16 | in_channels=256,
17 | conv_kernel_size=3,
18 | conv_out_channels=256,
19 | upsample_method='deconv',
20 | upsample_ratio=2,
21 | num_classes=81,
22 | class_agnostic=False,
23 | normalize=None):
24 | super(FCNMaskHead, self).__init__()
25 | if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']:
26 | raise ValueError(
27 | 'Invalid upsample method {}, accepted methods '
28 | 'are "deconv", "nearest", "bilinear"'.format(upsample_method))
29 | self.num_convs = num_convs
30 | self.roi_feat_size = roi_feat_size # WARN: not used and reserved
31 | self.in_channels = in_channels
32 | self.conv_kernel_size = conv_kernel_size
33 | self.conv_out_channels = conv_out_channels
34 | self.upsample_method = upsample_method
35 | self.upsample_ratio = upsample_ratio
36 | self.num_classes = num_classes
37 | self.class_agnostic = class_agnostic
38 | self.normalize = normalize
39 | self.with_bias = normalize is None
40 |
41 | self.convs = nn.ModuleList()
42 | for i in range(self.num_convs):
43 | in_channels = (self.in_channels
44 | if i == 0 else self.conv_out_channels)
45 | padding = (self.conv_kernel_size - 1) // 2
46 | self.convs.append(
47 | ConvModule(
48 | in_channels,
49 | self.conv_out_channels,
50 | 3,
51 | padding=padding,
52 | normalize=normalize,
53 | bias=self.with_bias))
54 | if self.upsample_method is None:
55 | self.upsample = None
56 | elif self.upsample_method == 'deconv':
57 | self.upsample = nn.ConvTranspose2d(
58 | self.conv_out_channels,
59 | self.conv_out_channels,
60 | self.upsample_ratio,
61 | stride=self.upsample_ratio)
62 | else:
63 | self.upsample = nn.Upsample(
64 | scale_factor=self.upsample_ratio, mode=self.upsample_method)
65 |
66 | out_channels = 1 if self.class_agnostic else self.num_classes
67 | self.conv_logits = nn.Conv2d(self.conv_out_channels, out_channels, 1)
68 | self.relu = nn.ReLU(inplace=True)
69 | self.debug_imgs = None
70 |
71 | def init_weights(self):
72 | for m in [self.upsample, self.conv_logits]:
73 | if m is None:
74 | continue
75 | nn.init.kaiming_normal_(
76 | m.weight, mode='fan_out', nonlinearity='relu')
77 | nn.init.constant_(m.bias, 0)
78 |
79 | def forward(self, x):
80 | for conv in self.convs:
81 | x = conv(x)
82 | if self.upsample is not None:
83 | x = self.upsample(x)
84 | if self.upsample_method == 'deconv':
85 | x = self.relu(x)
86 | mask_pred = self.conv_logits(x)
87 | return mask_pred
88 |
89 | def get_target(self, sampling_results, gt_masks, rcnn_train_cfg):
90 | pos_proposals = [res.pos_bboxes for res in sampling_results]
91 | pos_assigned_gt_inds = [
92 | res.pos_assigned_gt_inds for res in sampling_results
93 | ]
94 | mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds,
95 | gt_masks, rcnn_train_cfg)
96 | return mask_targets
97 |
98 | def loss(self, mask_pred, mask_targets, labels):
99 | loss = dict()
100 | loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels)
101 | loss['loss_mask'] = loss_mask
102 | return loss
103 |
104 | def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,
105 | ori_shape, scale_factor, rescale):
106 | """Get segmentation masks from mask_pred and bboxes.
107 |
108 | Args:
109 | mask_pred (Tensor or ndarray): shape (n, #class+1, h, w).
110 | For single-scale testing, mask_pred is the direct output of
111 | model, whose type is Tensor, while for multi-scale testing,
112 | it will be converted to numpy array outside of this method.
113 | det_bboxes (Tensor): shape (n, 4/5)
114 | det_labels (Tensor): shape (n, )
115 | img_shape (Tensor): shape (3, )
116 | rcnn_test_cfg (dict): rcnn testing config
117 | ori_shape: original image size
118 |
119 | Returns:
120 | list[list]: encoded masks
121 | """
122 | if isinstance(mask_pred, torch.Tensor):
123 | mask_pred = mask_pred.sigmoid().cpu().numpy()
124 | assert isinstance(mask_pred, np.ndarray)
125 |
126 | cls_segms = [[] for _ in range(self.num_classes - 1)]
127 | bboxes = det_bboxes.cpu().numpy()[:, :4]
128 | labels = det_labels.cpu().numpy() + 1
129 |
130 | if rescale:
131 | img_h, img_w = ori_shape[:2]
132 | else:
133 | img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32)
134 | img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32)
135 | scale_factor = 1.0
136 |
137 | for i in range(bboxes.shape[0]):
138 | bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
139 | label = labels[i]
140 | w = max(bbox[2] - bbox[0] + 1, 1)
141 | h = max(bbox[3] - bbox[1] + 1, 1)
142 |
143 | if not self.class_agnostic:
144 | mask_pred_ = mask_pred[i, label, :, :]
145 | else:
146 | mask_pred_ = mask_pred[i, 0, :, :]
147 | im_mask = np.zeros((img_h, img_w), dtype=np.uint8)
148 |
149 | bbox_mask = mmcv.imresize(mask_pred_, (w, h))
150 | bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(
151 | np.uint8)
152 | im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask
153 | rle = mask_util.encode(
154 | np.array(im_mask[:, :, np.newaxis], order='F'))[0]
155 | cls_segms[label - 1].append(rle)
156 |
157 | return cls_segms
158 |
--------------------------------------------------------------------------------
/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import FPN
2 | from .cmn import SpMiddleFHD
3 | from .rpn import RPN
4 | __all__ = ['FPN','SpMiddleFHD','RPN']
5 |
--------------------------------------------------------------------------------
/mmdet/models/necks/fpn.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | from ..utils import ConvModule
4 | from ..utils import xavier_init
5 |
6 |
7 | class FPN(nn.Module):
8 |
9 | def __init__(self,
10 | in_channels,
11 | out_channels,
12 | num_outs,
13 | start_level=0,
14 | end_level=-1,
15 | add_extra_convs=False,
16 | normalize=None,
17 | activation=None):
18 | super(FPN, self).__init__()
19 | assert isinstance(in_channels, list)
20 | self.in_channels = in_channels
21 | self.out_channels = out_channels
22 | self.num_ins = len(in_channels)
23 | self.num_outs = num_outs
24 | self.activation = activation
25 | self.with_bias = normalize is None
26 |
27 | if end_level == -1:
28 | self.backbone_end_level = self.num_ins
29 | assert num_outs >= self.num_ins - start_level
30 | else:
31 | # if end_level < inputs, no extra level is allowed
32 | self.backbone_end_level = end_level
33 | assert end_level <= len(in_channels)
34 | assert num_outs == end_level - start_level
35 | self.start_level = start_level
36 | self.end_level = end_level
37 | self.add_extra_convs = add_extra_convs
38 |
39 | self.lateral_convs = nn.ModuleList()
40 | self.fpn_convs = nn.ModuleList()
41 |
42 | for i in range(self.start_level, self.backbone_end_level):
43 | l_conv = ConvModule(
44 | in_channels[i],
45 | out_channels,
46 | 1,
47 | normalize=normalize,
48 | bias=self.with_bias,
49 | activation=self.activation,
50 | inplace=False)
51 | fpn_conv = ConvModule(
52 | out_channels,
53 | out_channels,
54 | 3,
55 | padding=1,
56 | normalize=normalize,
57 | bias=self.with_bias,
58 | activation=self.activation,
59 | inplace=False)
60 |
61 | self.lateral_convs.append(l_conv)
62 | self.fpn_convs.append(fpn_conv)
63 |
64 | # lvl_id = i - self.start_level
65 | # setattr(self, 'lateral_conv{}'.format(lvl_id), l_conv)
66 | # setattr(self, 'fpn_conv{}'.format(lvl_id), fpn_conv)
67 |
68 | # add extra conv layers (e.g., RetinaNet)
69 | extra_levels = num_outs - self.backbone_end_level + self.start_level
70 | if add_extra_convs and extra_levels >= 1:
71 | for i in range(extra_levels):
72 | in_channels = (self.in_channels[self.backbone_end_level - 1]
73 | if i == 0 else out_channels)
74 | extra_fpn_conv = ConvModule(
75 | in_channels,
76 | out_channels,
77 | 3,
78 | stride=2,
79 | padding=1,
80 | normalize=normalize,
81 | bias=self.with_bias,
82 | activation=self.activation,
83 | inplace=False)
84 | self.fpn_convs.append(extra_fpn_conv)
85 |
86 | # default init_weights for conv(msra) and norm in ConvModule
87 | def init_weights(self):
88 | for m in self.modules():
89 | if isinstance(m, nn.Conv2d):
90 | xavier_init(m, distribution='uniform')
91 |
92 | def forward(self, inputs):
93 | assert len(inputs) == len(self.in_channels)
94 |
95 | # build laterals
96 | laterals = [
97 | lateral_conv(inputs[i + self.start_level])
98 | for i, lateral_conv in enumerate(self.lateral_convs)
99 | ]
100 |
101 | # build top-down path
102 | used_backbone_levels = len(laterals)
103 | for i in range(used_backbone_levels - 1, 0, -1):
104 | laterals[i - 1] += F.interpolate(
105 | laterals[i], scale_factor=2, mode='nearest')
106 |
107 | # build outputs
108 | # part 1: from original levels
109 | outs = [
110 | self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
111 | ]
112 | # part 2: add extra levels
113 | if self.num_outs > len(outs):
114 | # use max pool to get more levels on top of outputs
115 | # (e.g., Faster R-CNN, Mask R-CNN)
116 | if not self.add_extra_convs:
117 | for i in range(self.num_outs - used_backbone_levels):
118 | outs.append(F.max_pool2d(outs[-1], 1, stride=2))
119 | # add conv layers on top of original feature maps (RetinaNet)
120 | else:
121 | orig = inputs[self.backbone_end_level - 1]
122 | outs.append(self.fpn_convs[used_backbone_levels](orig))
123 | for i in range(used_backbone_levels + 1, self.num_outs):
124 | # BUG: we should add relu before each extra conv
125 | outs.append(self.fpn_convs[i](outs[-1]))
126 | return tuple(outs)
127 |
--------------------------------------------------------------------------------
/mmdet/models/necks/rpn.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from torch import nn
3 | from ..utils import Empty, change_default_args, Sequential
4 | import torch
5 |
6 | class RPNBase(nn.Module):
7 | def __init__(self,
8 | use_norm=True,
9 | layer_nums=(3, 5, 5),
10 | layer_strides=(2, 2, 2),
11 | num_filters=(128, 128, 256),
12 | upsample_strides=(1, 2, 4),
13 | num_upsample_filters=(256, 256, 256),
14 | num_input_features=128):
15 |
16 | """upsample_strides support float: [0.25, 0.5, 1]
17 | if upsample_strides < 1, conv2d will be used instead of convtranspose2d.
18 | """
19 | super(RPNBase, self).__init__()
20 | self._layer_strides = layer_strides
21 | self._num_filters = num_filters
22 | self._layer_nums = layer_nums
23 | self._upsample_strides = upsample_strides
24 | self._num_upsample_filters = num_upsample_filters
25 | self._num_input_features = num_input_features
26 | self._use_norm = use_norm
27 |
28 | assert len(layer_strides) == len(layer_nums)
29 | assert len(num_filters) == len(layer_nums)
30 | assert len(num_upsample_filters) == len(upsample_strides)
31 | self._upsample_start_idx = len(layer_nums) - len(upsample_strides)
32 | must_equal_list = []
33 | for i in range(len(upsample_strides)):
34 | must_equal_list.append(upsample_strides[i] / np.prod(
35 | layer_strides[:i + self._upsample_start_idx + 1]))
36 | for val in must_equal_list:
37 | assert val == must_equal_list[0]
38 |
39 | if use_norm:
40 | BatchNorm2d = change_default_args(
41 | eps=1e-3, momentum=0.01)(nn.BatchNorm2d)
42 | Conv2d = change_default_args(bias=False)(nn.Conv2d)
43 | ConvTranspose2d = change_default_args(bias=False)(
44 | nn.ConvTranspose2d)
45 | else:
46 | BatchNorm2d = Empty
47 | Conv2d = change_default_args(bias=True)(nn.Conv2d)
48 | ConvTranspose2d = change_default_args(bias=True)(
49 | nn.ConvTranspose2d)
50 |
51 | in_filters = [num_input_features, *num_filters[:-1]]
52 | blocks = []
53 | deblocks = []
54 |
55 | for i, layer_num in enumerate(layer_nums):
56 | block, num_out_filters = self._make_layer(
57 | in_filters[i],
58 | num_filters[i],
59 | layer_num,
60 | stride=layer_strides[i])
61 | blocks.append(block)
62 | if i - self._upsample_start_idx >= 0:
63 | stride = upsample_strides[i - self._upsample_start_idx]
64 | if stride >= 1:
65 | stride = np.round(stride).astype(np.int64)
66 | deblock = nn.Sequential(
67 | ConvTranspose2d(
68 | num_out_filters,
69 | num_upsample_filters[i - self._upsample_start_idx],
70 | stride,
71 | stride=stride),
72 | BatchNorm2d(
73 | num_upsample_filters[i - self._upsample_start_idx]),
74 | nn.ReLU(),
75 | )
76 | else:
77 | stride = np.round(1 / stride).astype(np.int64)
78 | deblock = nn.Sequential(
79 | Conv2d(
80 | num_out_filters,
81 | num_upsample_filters[i - self._upsample_start_idx],
82 | stride,
83 | stride=stride),
84 | BatchNorm2d(
85 | num_upsample_filters[i - self._upsample_start_idx]),
86 | nn.ReLU(),
87 | )
88 | deblocks.append(deblock)
89 |
90 | self._num_out_filters = num_out_filters
91 | self.blocks = nn.ModuleList(blocks)
92 | self.deblocks = nn.ModuleList(deblocks)
93 |
94 | @property
95 | def downsample_factor(self):
96 | factor = np.prod(self._layer_strides)
97 | if len(self._upsample_strides) > 0:
98 | factor /= self._upsample_strides[-1]
99 | return factor
100 |
101 | def _make_layer(self, inplanes, planes, num_blocks, stride=1):
102 | raise NotImplementedError
103 |
104 | def forward(self, x):
105 | ups = []
106 | stage_outputs = []
107 | for i in range(len(self.blocks)):
108 | x = self.blocks[i](x)
109 | stage_outputs.append(x)
110 | if i - self._upsample_start_idx >= 0:
111 | ups.append(self.deblocks[i - self._upsample_start_idx](x))
112 |
113 | if len(ups) > 0:
114 | x = torch.cat(ups, dim=1)
115 |
116 | return x
117 |
118 | class RPN(RPNBase):
119 | def _make_layer(self, inplanes, planes, num_blocks, stride=1):
120 | if self._use_norm:
121 | BatchNorm2d = change_default_args(
122 | eps=1e-3, momentum=0.01)(nn.BatchNorm2d)
123 | Conv2d = change_default_args(bias=False)(nn.Conv2d)
124 | ConvTranspose2d = change_default_args(bias=False)(
125 | nn.ConvTranspose2d)
126 | else:
127 | BatchNorm2d = Empty
128 | Conv2d = change_default_args(bias=True)(nn.Conv2d)
129 | ConvTranspose2d = change_default_args(bias=True)(
130 | nn.ConvTranspose2d)
131 |
132 | block = Sequential(
133 | nn.ZeroPad2d(1),
134 | Conv2d(inplanes, planes, 3, stride=stride),
135 | BatchNorm2d(planes),
136 | nn.ReLU(),
137 | )
138 | for j in range(num_blocks):
139 | block.add(Conv2d(planes, planes, 3, padding=1))
140 | block.add(BatchNorm2d(planes))
141 | block.add(nn.ReLU())
142 |
143 | return block, planes
--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .single_level import SingleRoIExtractor
2 |
3 | __all__ = ['SingleRoIExtractor']
4 |
--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/single_level.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import torch
3 | import torch.nn as nn
4 | from mmdet.core import tensor2points
5 | from mmdet import ops
6 | import numpy as np
7 |
8 | class SingleRoIExtractor(nn.Module):
9 | """Extract RoI features from a single level feature map.
10 |
11 | If there are mulitple input feature levels, each RoI is mapped to a level
12 | according to its scale.
13 |
14 | Args:
15 | roi_layer (dict): Specify RoI layer type and arguments.
16 | out_channels (int): Output channels of RoI layers.
17 | featmap_strides (int): Strides of input feature maps.
18 | finest_scale (int): Scale threshold of mapping to level 0.
19 | """
20 |
21 | def __init__(self,
22 | roi_layer,
23 | out_channels,
24 | featmap_strides,
25 | finest_scale=56):
26 | super(SingleRoIExtractor, self).__init__()
27 | self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)
28 | self.out_channels = out_channels
29 | self.featmap_strides = featmap_strides
30 | self.finest_scale = finest_scale
31 |
32 | @property
33 | def num_inputs(self):
34 | """int: Input feature map levels."""
35 | return len(self.featmap_strides)
36 |
37 | def init_weights(self):
38 | pass
39 |
40 | def build_roi_layers(self, layer_cfg, featmap_strides):
41 | cfg = layer_cfg.copy()
42 | layer_type = cfg.pop('type')
43 | assert hasattr(ops, layer_type)
44 | layer_cls = getattr(ops, layer_type)
45 | roi_layers = nn.ModuleList(
46 | [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides])
47 | return roi_layers
48 |
49 | def map_roi_levels(self, rois, num_levels):
50 | """Map rois to corresponding feature levels by scales.
51 |
52 | - scale < finest_scale: level 0
53 | - finest_scale <= scale < finest_scale * 2: level 1
54 | - finest_scale * 2 <= scale < finest_scale * 4: level 2
55 | - scale >= finest_scale * 4: level 3
56 |
57 | Args:
58 | rois (Tensor): Input RoIs, shape (k, 5).
59 | num_levels (int): Total level number.
60 |
61 | Returns:
62 | Tensor: Level index (0-based) of each RoI, shape (k, )
63 | """
64 | scale = torch.sqrt(
65 | (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1))
66 | target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6))
67 | target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()
68 | return target_lvls
69 |
70 | def forward(self, feats, rois):
71 | if len(feats) == 1:
72 | return self.roi_layers[0](feats[0], rois)
73 |
74 | out_size = self.roi_layers[0].out_size
75 | num_levels = len(feats)
76 | target_lvls = self.map_roi_levels(rois, num_levels)
77 | roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels,
78 | out_size, out_size).fill_(0)
79 | for i in range(num_levels):
80 | inds = target_lvls == i
81 | if inds.any():
82 | rois_ = rois[inds, :]
83 | roi_feats_t = self.roi_layers[i](feats[i], rois_)
84 | roi_feats[inds] += roi_feats_t
85 | return roi_feats
86 |
87 |
88 |
89 |
--------------------------------------------------------------------------------
/mmdet/models/rpn_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .rpn_head import RPNHead
2 |
3 | __all__ = ['RPNHead']
4 |
--------------------------------------------------------------------------------
/mmdet/models/single_stage_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .retina_head import RetinaHead
2 | from .ssd_rotate_head import *
3 |
4 | __all__ = ['RetinaHead', "SSDRotateHead"]
5 |
--------------------------------------------------------------------------------
/mmdet/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .conv_module import ConvModule
2 | from .norm import build_norm_layer
3 | from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init,
4 | bias_init_with_prob)
5 | from .empty import Empty
6 | from .sequential import Sequential
7 | import inspect
8 | import torch
9 |
10 | def get_paddings_indicator(actual_num, max_num, axis=0):
11 | """Create boolean mask by actually number of a padded tensor.
12 | Args:
13 | actual_num ([type]): [description]
14 | max_num ([type]): [description]
15 | Returns:
16 | [type]: [description]
17 | """
18 |
19 | actual_num = torch.unsqueeze(actual_num, axis + 1)
20 | # tiled_actual_num: [N, M, 1]
21 | max_num_shape = [1] * len(actual_num.shape)
22 | max_num_shape[axis + 1] = -1
23 | max_num = torch.arange(
24 | max_num, dtype=torch.int, device=actual_num.device).view(max_num_shape)
25 | # tiled_actual_num: [[3,3,3,3,3], [4,4,4,4,4], [2,2,2,2,2]]
26 | # tiled_max_num: [[0,1,2,3,4], [0,1,2,3,4], [0,1,2,3,4]]
27 | paddings_indicator = actual_num.int() > max_num
28 | # paddings_indicator shape: [batch_size, max_num]
29 | return paddings_indicator
30 |
31 | def get_pos_to_kw_map(func):
32 | pos_to_kw = {}
33 | fsig = inspect.signature(func)
34 | pos = 0
35 | for name, info in fsig.parameters.items():
36 | if info.kind is info.POSITIONAL_OR_KEYWORD:
37 | pos_to_kw[pos] = name
38 | pos += 1
39 | return pos_to_kw
40 |
41 | def change_default_args(**kwargs):
42 | def layer_wrapper(layer_class):
43 | class DefaultArgLayer(layer_class):
44 | def __init__(self, *args, **kw):
45 | pos_to_kw = get_pos_to_kw_map(layer_class.__init__)
46 | kw_to_pos = {kw: pos for pos, kw in pos_to_kw.items()}
47 | for key, val in kwargs.items():
48 | if key not in kw and kw_to_pos[key] > len(args):
49 | kw[key] = val
50 | super().__init__(*args, **kw)
51 |
52 | return DefaultArgLayer
53 |
54 | return layer_wrapper
55 |
56 | def one_hot(tensor, depth, dim=-1, on_value=1.0, dtype=torch.float32):
57 | tensor_onehot = torch.zeros(
58 | *list(tensor.shape), depth, dtype=dtype, device=tensor.device)
59 | tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value)
60 | return tensor_onehot
61 |
62 | __all__ = [
63 | 'ConvModule', 'build_norm_layer', 'xavier_init', 'normal_init',
64 | 'uniform_init', 'kaiming_init', 'bias_init_with_prob','Empty',
65 | 'change_default_args','Sequential','one_hot', 'get_paddings_indicator'
66 | ]
67 |
--------------------------------------------------------------------------------
/mmdet/models/utils/conv_module.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import torch.nn as nn
4 | from mmcv.cnn import kaiming_init, constant_init
5 |
6 | from .norm import build_norm_layer
7 |
8 | class ConvModule(nn.Module):
9 |
10 | def __init__(self,
11 | in_channels,
12 | out_channels,
13 | kernel_size,
14 | stride=1,
15 | padding=0,
16 | dilation=1,
17 | groups=1,
18 | bias=True,
19 | normalize=None,
20 | activation='relu',
21 | inplace=True,
22 | activate_last=True):
23 | super(ConvModule, self).__init__()
24 | self.with_norm = normalize is not None
25 | self.with_activatation = activation is not None
26 | self.with_bias = bias
27 | self.activation = activation
28 | self.activate_last = activate_last
29 |
30 | if self.with_norm and self.with_bias:
31 | warnings.warn('ConvModule has norm and bias at the same time')
32 |
33 | self.conv = nn.Conv2d(
34 | in_channels,
35 | out_channels,
36 | kernel_size,
37 | stride,
38 | padding,
39 | dilation,
40 | groups,
41 | bias=bias)
42 |
43 | self.in_channels = self.conv.in_channels
44 | self.out_channels = self.conv.out_channels
45 | self.kernel_size = self.conv.kernel_size
46 | self.stride = self.conv.stride
47 | self.padding = self.conv.padding
48 | self.dilation = self.conv.dilation
49 | self.transposed = self.conv.transposed
50 | self.output_padding = self.conv.output_padding
51 | self.groups = self.conv.groups
52 |
53 | if self.with_norm:
54 | norm_channels = out_channels if self.activate_last else in_channels
55 | self.norm = build_norm_layer(normalize, norm_channels)
56 |
57 | if self.with_activatation:
58 | assert activation in ['relu'], 'Only ReLU supported.'
59 | if self.activation == 'relu':
60 | self.activate = nn.ReLU(inplace=inplace)
61 |
62 | # Default using msra init
63 | self.init_weights()
64 |
65 | def init_weights(self):
66 | nonlinearity = 'relu' if self.activation is None else self.activation
67 | kaiming_init(self.conv, nonlinearity=nonlinearity)
68 | if self.with_norm:
69 | constant_init(self.norm, 1, bias=0)
70 |
71 | def forward(self, x, activate=True, norm=True):
72 | if self.activate_last:
73 | x = self.conv(x)
74 | if norm and self.with_norm:
75 | x = self.norm(x)
76 | if activate and self.with_activatation:
77 | x = self.activate(x)
78 | else:
79 | if norm and self.with_norm:
80 | x = self.norm(x)
81 | if activate and self.with_activatation:
82 | x = self.activate(x)
83 | x = self.conv(x)
84 | return x
85 |
86 |
87 |
--------------------------------------------------------------------------------
/mmdet/models/utils/empty.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | class Empty(torch.nn.Module):
5 | def __init__(self, *args, **kwargs):
6 | super(Empty, self).__init__()
7 |
8 | def forward(self, *args, **kwargs):
9 | if len(args) == 1:
10 | return args[0]
11 | elif len(args) == 0:
12 | return None
13 | return args
--------------------------------------------------------------------------------
/mmdet/models/utils/norm.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 |
3 | norm_cfg = {'BN': nn.BatchNorm2d, 'SyncBN': None, 'GN': None}
4 |
5 |
6 | def build_norm_layer(cfg, num_features):
7 | assert isinstance(cfg, dict) and 'type' in cfg
8 | cfg_ = cfg.copy()
9 | cfg_.setdefault('eps', 1e-5)
10 | layer_type = cfg_.pop('type')
11 |
12 | if layer_type not in norm_cfg:
13 | raise KeyError('Unrecognized norm type {}'.format(layer_type))
14 | elif norm_cfg[layer_type] is None:
15 | raise NotImplementedError
16 |
17 | return norm_cfg[layer_type](num_features, **cfg_)
18 |
--------------------------------------------------------------------------------
/mmdet/models/utils/sequential.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from collections import OrderedDict
3 | class Sequential(torch.nn.Module):
4 | r"""A sequential container.
5 | Modules will be added to it in the order they are passed in the constructor.
6 | Alternatively, an ordered dict of modules can also be passed in.
7 |
8 | To make it easier to understand, given is a small example::
9 |
10 | # Example of using Sequential
11 | model = Sequential(
12 | nn.Conv2d(1,20,5),
13 | nn.ReLU(),
14 | nn.Conv2d(20,64,5),
15 | nn.ReLU()
16 | )
17 |
18 | # Example of using Sequential with OrderedDict
19 | model = Sequential(OrderedDict([
20 | ('conv1', nn.Conv2d(1,20,5)),
21 | ('relu1', nn.ReLU()),
22 | ('conv2', nn.Conv2d(20,64,5)),
23 | ('relu2', nn.ReLU())
24 | ]))
25 |
26 | # Example of using Sequential with kwargs(python 3.6+)
27 | model = Sequential(
28 | conv1=nn.Conv2d(1,20,5),
29 | relu1=nn.ReLU(),
30 | conv2=nn.Conv2d(20,64,5),
31 | relu2=nn.ReLU()
32 | )
33 | """
34 |
35 | def __init__(self, *args, **kwargs):
36 | super(Sequential, self).__init__()
37 | if len(args) == 1 and isinstance(args[0], OrderedDict):
38 | for key, module in args[0].items():
39 | self.add_module(key, module)
40 | else:
41 | for idx, module in enumerate(args):
42 | self.add_module(str(idx), module)
43 | for name, module in kwargs.items():
44 | if sys.version_info < (3, 6):
45 | raise ValueError("kwargs only supported in py36+")
46 | if name in self._modules:
47 | raise ValueError("name exists.")
48 | self.add_module(name, module)
49 |
50 | def __getitem__(self, idx):
51 | if not (-len(self) <= idx < len(self)):
52 | raise IndexError('index {} is out of range'.format(idx))
53 | if idx < 0:
54 | idx += len(self)
55 | it = iter(self._modules.values())
56 | for i in range(idx):
57 | next(it)
58 | return next(it)
59 |
60 | def __len__(self):
61 | return len(self._modules)
62 |
63 | def add(self, module, name=None):
64 | if name is None:
65 | name = str(len(self._modules))
66 | if name in self._modules:
67 | raise KeyError("name exists")
68 | self.add_module(name, module)
69 |
70 | def forward(self, input):
71 | # i = 0
72 | for module in self._modules.values():
73 | # print(i)
74 | input = module(input)
75 | # i += 1
76 | return input
--------------------------------------------------------------------------------
/mmdet/models/utils/weight_init.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch.nn as nn
3 |
4 |
5 | def xavier_init(module, gain=1, bias=0, distribution='normal'):
6 | assert distribution in ['uniform', 'normal']
7 | if distribution == 'uniform':
8 | nn.init.xavier_uniform_(module.weight, gain=gain)
9 | else:
10 | nn.init.xavier_normal_(module.weight, gain=gain)
11 | if hasattr(module, 'bias'):
12 | nn.init.constant_(module.bias, bias)
13 |
14 |
15 | def normal_init(module, mean=0, std=1, bias=0):
16 | nn.init.normal_(module.weight, mean, std)
17 | if hasattr(module, 'bias'):
18 | nn.init.constant_(module.bias, bias)
19 |
20 |
21 | def uniform_init(module, a=0, b=1, bias=0):
22 | nn.init.uniform_(module.weight, a, b)
23 | if hasattr(module, 'bias'):
24 | nn.init.constant_(module.bias, bias)
25 |
26 |
27 | def kaiming_init(module,
28 | mode='fan_out',
29 | nonlinearity='relu',
30 | bias=0,
31 | distribution='normal'):
32 | assert distribution in ['uniform', 'normal']
33 | if distribution == 'uniform':
34 | nn.init.kaiming_uniform_(
35 | module.weight, mode=mode, nonlinearity=nonlinearity)
36 | else:
37 | nn.init.kaiming_normal_(
38 | module.weight, mode=mode, nonlinearity=nonlinearity)
39 | if hasattr(module, 'bias'):
40 | nn.init.constant_(module.bias, bias)
41 |
42 |
43 | def bias_init_with_prob(prior_prob):
44 | """ initialize conv/fc bias value according to giving probablity"""
45 | bias_init = float(-np.log((1 - prior_prob) / prior_prob))
46 | return bias_init
47 |
--------------------------------------------------------------------------------
/mmdet/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .points_op import pts_in_boxes3d
2 | __all__ = ['pts_in_boxes3d']
3 |
--------------------------------------------------------------------------------
/mmdet/ops/iou3d/iou3d_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import mmdet.ops.iou3d.iou3d_cuda as iou3d_cuda
3 | import math
4 |
5 | def limit_period(val, offset=0.5, period=math.pi):
6 | return val - torch.floor(val / period + offset) * period
7 |
8 | def boxes3d_to_near_torch(boxes3d):
9 | rboxes = boxes3d[:, [0, 1, 3, 4, 6]]
10 | """convert rotated bbox to nearest 'standing' or 'lying' bbox.
11 | Args:
12 | rboxes: [N, 5(x, y, xdim, ydim, rad)] rotated bboxes
13 | Returns:
14 | boxes_near: [N, 4(xmin, ymin, xmax, ymax)] nearest boxes
15 | """
16 | rots = rboxes[..., -1]
17 | rots_0_pi_div_2 = torch.abs(limit_period(rots, 0.5, math.pi))
18 | cond = (rots_0_pi_div_2 > math.pi / 4)[..., None]
19 | boxes_center = torch.where(cond, rboxes[:, [0, 1, 3, 2]], rboxes[:, :4])
20 | boxes_near = torch.cat([boxes_center[:, :2] - boxes_center[:, 2:] / 2, \
21 | boxes_center[:, :2] + boxes_center[:, 2:] / 2], dim=-1)
22 | return boxes_near
23 |
24 | def boxes_iou(bboxes1, bboxes2, mode='iou', eps=0.0):
25 | assert mode in ['iou', 'iof']
26 |
27 | rows = bboxes1.size(0)
28 | cols = bboxes2.size(0)
29 |
30 | if rows * cols == 0:
31 | return bboxes1.new(rows, cols)
32 |
33 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2]
34 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2]
35 | wh = (rb - lt + eps).clamp(min=0) # [rows, cols, 2]
36 | overlap = wh[:, :, 0] * wh[:, :, 1]
37 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + eps) * (
38 | bboxes1[:, 3] - bboxes1[:, 1] + eps)
39 | if mode == 'iou':
40 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + eps) * (
41 | bboxes2[:, 3] - bboxes2[:, 1] + eps)
42 | ious = overlap / (area1[:, None] + area2 - overlap)
43 | else:
44 | ious = overlap / (area1[:, None])
45 | return ious
46 |
47 | def boxes3d_to_bev_torch(boxes3d):
48 | """
49 | :param boxes3d: (N, 7) [x, y, z, h, w, l, ry]
50 | :return:
51 | boxes_bev: (N, 5) [x1, y1, x2, y2, ry]
52 | """
53 | boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
54 |
55 | cu, cv = boxes3d[:, 0], boxes3d[:, 1]
56 | half_l, half_w = boxes3d[:, 3] / 2, boxes3d[:, 4] / 2
57 | boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
58 | boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
59 | boxes_bev[:, 4] = boxes3d[:, 6]
60 | return boxes_bev
61 |
62 | def boxes_iou_bev(boxes_a, boxes_b):
63 | """
64 | :param boxes_a: (M, 5)
65 | :param boxes_b: (N, 5)
66 | :return:
67 | ans_iou: (M, N)
68 | """
69 | boxes_a_bev = boxes3d_to_bev_torch(boxes_a)
70 | boxes_b_bev = boxes3d_to_bev_torch(boxes_b)
71 |
72 | ans_iou = torch.cuda.FloatTensor(torch.Size((boxes_a_bev.shape[0], boxes_b_bev.shape[0]))).zero_()
73 |
74 | iou3d_cuda.boxes_iou_bev_gpu(boxes_a_bev.contiguous(), boxes_b_bev.contiguous(), ans_iou)
75 |
76 | return ans_iou
77 |
78 |
79 | def boxes_iou3d_gpu(boxes_a, boxes_b):
80 | """
81 | :param boxes_a: (N, 7) [x, y, z, h, w, l, ry]
82 | :param boxes_b: (M, 7) [x, y, z, h, w, l, ry]
83 | :return:
84 | ans_iou: (M, N)
85 | """
86 | boxes_a_bev = boxes3d_to_bev_torch(boxes_a)
87 | boxes_b_bev = boxes3d_to_bev_torch(boxes_b)
88 |
89 | # bev overlap
90 | overlaps_bev = torch.cuda.FloatTensor(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_() # (N, M)
91 | iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(), boxes_b_bev.contiguous(), overlaps_bev)
92 |
93 | # height overlap
94 | boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5]).view(-1, 1)
95 | boxes_a_height_min = boxes_a[:, 2].view(-1, 1)
96 | boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5]).view(1, -1)
97 | boxes_b_height_min = boxes_b[:, 2].view(1, -1)
98 |
99 | max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
100 | min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
101 | overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
102 |
103 | # 3d iou
104 | overlaps_3d = overlaps_bev * overlaps_h
105 |
106 | vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
107 | vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
108 |
109 | iou3d = overlaps_3d / torch.clamp(vol_a + vol_b - overlaps_3d, min=1e-7)
110 |
111 | return iou3d
112 |
113 |
114 | def nms_gpu(boxes, scores, thresh):
115 | """
116 | :param boxes: (N, 5) [x1, y1, x2, y2, ry]
117 | :param scores: (N)
118 | :param thresh:
119 | :return:
120 | """
121 | # areas = (x2 - x1) * (y2 - y1)
122 | order = scores.sort(0, descending=True)[1]
123 |
124 | boxes = boxes[order].contiguous()
125 |
126 | keep = torch.LongTensor(boxes.size(0))
127 | num_out = iou3d_cuda.nms_gpu(boxes, keep, thresh)
128 | return order[keep[:num_out].cuda()].contiguous()
129 |
130 | def nms_normal_gpu(boxes, scores, thresh):
131 | """
132 | :param boxes: (N, 5) [x1, y1, x2, y2, ry]
133 | :param scores: (N)
134 | :param thresh:
135 | :return:
136 | """
137 | # areas = (x2 - x1) * (y2 - y1)
138 | order = scores.sort(0, descending=True)[1]
139 |
140 | boxes = boxes[order].contiguous()
141 |
142 | keep = torch.LongTensor(boxes.size(0))
143 | num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh)
144 | return order[keep[:num_out].cuda()].contiguous()
145 |
146 | class RotateIou2dSimilarity(object):
147 | """Class to compute similarity based on Intersection over Union (IOU) metric.
148 |
149 | This class computes pairwise similarity between two BoxLists based on IOU.
150 | """
151 | def __call__(self, boxes1, boxes2):
152 | return boxes_iou_bev(boxes1, boxes2)
153 |
154 | class RotateIou3dSimilarity(object):
155 | """Class to compute similarity based on Intersection over Union (IOU) metric.
156 |
157 | This class computes pairwise similarity between two BoxLists based on IOU.
158 | """
159 | def __call__(self, boxes1, boxes2):
160 | return boxes_iou3d_gpu(boxes1, boxes2)
161 |
162 |
163 | class NearestIouSimilarity(object):
164 | """Class to compute similarity based on the squared distance metric.
165 |
166 | This class computes pairwise similarity between two BoxLists based on the
167 | negative squared distance metric.
168 | """
169 |
170 | def __call__(self, boxes1, boxes2):
171 | """Compute matrix of (negated) sq distances.
172 |
173 | Args:
174 | boxlist1: BoxList holding N boxes.
175 | boxlist2: BoxList holding M boxes.
176 |
177 | Returns:
178 | A tensor with shape [N, M] representing negated pairwise squared distance.
179 | """
180 |
181 | boxes1_near = boxes3d_to_near_torch(boxes1)
182 | boxes2_near = boxes3d_to_near_torch(boxes2)
183 | return boxes_iou(boxes1_near, boxes2_near)
184 |
185 | if __name__ == '__main__':
186 | pass
--------------------------------------------------------------------------------
/mmdet/ops/iou3d/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
3 |
4 | setup(
5 | name='iou3d',
6 | ext_modules=[
7 | CUDAExtension('iou3d_cuda', [
8 | 'src/iou3d.cpp',
9 | 'src/iou3d_kernel.cu',
10 | ],
11 | extra_compile_args={'cxx': ['-g'],
12 | 'nvcc': ['-O2']})
13 | ],
14 | cmdclass={'build_ext': BuildExtension})
15 |
--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/pointnet2_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch.autograd import Variable
3 | from torch.autograd import Function
4 | import torch.nn as nn
5 | from typing import Tuple
6 |
7 | import mmdet.ops.pointnet2.pointnet2_cuda as pointnet2
8 |
9 | class ThreeNN(Function):
10 |
11 | @staticmethod
12 | def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
13 | """
14 | Find the three nearest neighbors of unknown in known
15 | :param ctx:
16 | :param unknown: (N, 3)
17 | :param known: (M, 3)
18 | :return:
19 | dist: (N, 3) l2 distance to the three nearest neighbors
20 | idx: (N, 3) index of 3 nearest neighbors
21 | """
22 | assert unknown.is_contiguous()
23 | assert known.is_contiguous()
24 |
25 | N, _ = unknown.size()
26 | m = known.size(0)
27 | dist2 = torch.cuda.FloatTensor(N, 3)
28 | idx = torch.cuda.IntTensor(N, 3)
29 |
30 | pointnet2.three_nn_wrapper(N, m, unknown, known, dist2, idx)
31 | return torch.sqrt(dist2), idx
32 |
33 | @staticmethod
34 | def backward(ctx, a=None, b=None):
35 | return None, None
36 |
37 |
38 | three_nn = ThreeNN.apply
39 |
40 |
41 | class ThreeInterpolate(Function):
42 |
43 | @staticmethod
44 | def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
45 | """
46 | Performs weight linear interpolation on 3 features
47 | :param ctx:
48 | :param features: (M, C) Features descriptors to be interpolated from
49 | :param idx: (n, 3) three nearest neighbors of the target features in features
50 | :param weight: (n, 3) weights
51 | :return:
52 | output: (N, C) tensor of the interpolated features
53 | """
54 | assert features.is_contiguous()
55 | assert idx.is_contiguous()
56 | assert weight.is_contiguous()
57 |
58 | m, c = features.size()
59 | n = idx.size(0)
60 | ctx.three_interpolate_for_backward = (idx, weight, m)
61 | output = torch.cuda.FloatTensor(n, c)
62 |
63 | pointnet2.three_interpolate_wrapper(c, m, n, features, idx, weight, output)
64 | return output
65 |
66 | @staticmethod
67 | def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
68 | """
69 | :param ctx:
70 | :param grad_out: (N, C) tensor with gradients of outputs
71 | :return:
72 | grad_features: (M, C) tensor with gradients of features
73 | None:
74 | None:
75 | """
76 | idx, weight, m = ctx.three_interpolate_for_backward
77 | n, c = grad_out.size()
78 |
79 | grad_features = Variable(torch.cuda.FloatTensor(m, c).zero_())
80 | grad_out_data = grad_out.data.contiguous()
81 |
82 | pointnet2.three_interpolate_grad_wrapper( c, n, m, grad_out_data, idx, weight, grad_features.data)
83 | return grad_features, None, None
84 |
85 |
86 | three_interpolate = ThreeInterpolate.apply
87 |
88 |
89 |
--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
3 |
4 | setup(
5 | name='pointnet2',
6 | ext_modules=[
7 | CUDAExtension('pointnet2_cuda', [
8 | 'src/pointnet2_api.cpp',
9 | 'src/interpolate.cpp',
10 | 'src/interpolate_gpu.cu',
11 | ],
12 | extra_compile_args={'cxx': ['-g'],
13 | 'nvcc': ['-O2']})
14 | ],
15 | cmdclass={'build_ext': BuildExtension}
16 | )
17 |
--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/src/cuda_utils.h:
--------------------------------------------------------------------------------
1 | #ifndef _CUDA_UTILS_H
2 | #define _CUDA_UTILS_H
3 |
4 | #include
5 |
6 | #define TOTAL_THREADS 1024
7 | #define THREADS_PER_BLOCK 256
8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
9 |
10 | inline int opt_n_threads(int work_size) {
11 | const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0);
12 |
13 | return max(min(1 << pow_2, TOTAL_THREADS), 1);
14 | }
15 | #endif
16 |
--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/src/interpolate.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include "interpolate_gpu.h"
10 |
11 | extern THCState *state;
12 |
13 | void three_nn_wrapper_fast(int n, int m, at::Tensor unknown_tensor,
14 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) {
15 | const float *unknown = unknown_tensor.data();
16 | const float *known = known_tensor.data();
17 | float *dist2 = dist2_tensor.data();
18 | int *idx = idx_tensor.data();
19 |
20 | cudaStream_t stream = THCState_getCurrentStream(state);
21 | three_nn_kernel_launcher_fast(n, m, unknown, known, dist2, idx, stream);
22 | }
23 |
24 |
25 | void three_interpolate_wrapper_fast(int c, int m, int n,
26 | at::Tensor points_tensor,
27 | at::Tensor idx_tensor,
28 | at::Tensor weight_tensor,
29 | at::Tensor out_tensor) {
30 |
31 | const float *points = points_tensor.data();
32 | const float *weight = weight_tensor.data();
33 | float *out = out_tensor.data();
34 | const int *idx = idx_tensor.data();
35 |
36 | cudaStream_t stream = THCState_getCurrentStream(state);
37 | three_interpolate_kernel_launcher_fast(c, m, n, points, idx, weight, out, stream);
38 | }
39 |
40 | void three_interpolate_grad_wrapper_fast(int c, int n, int m,
41 | at::Tensor grad_out_tensor,
42 | at::Tensor idx_tensor,
43 | at::Tensor weight_tensor,
44 | at::Tensor grad_points_tensor) {
45 |
46 | const float *grad_out = grad_out_tensor.data();
47 | const float *weight = weight_tensor.data();
48 | float *grad_points = grad_points_tensor.data();
49 | const int *idx = idx_tensor.data();
50 |
51 | cudaStream_t stream = THCState_getCurrentStream(state);
52 | three_interpolate_grad_kernel_launcher_fast(c, n, m, grad_out, idx, weight, grad_points, stream);
53 | }
--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/src/interpolate_gpu.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "cuda_utils.h"
6 | #include "interpolate_gpu.h"
7 |
8 |
9 | __global__ void three_nn_kernel_fast(int n, int m, const float *__restrict__ unknown,
10 | const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) {
11 | // unknown: (N, 4)
12 | // known: (M, 4)
13 | // output:
14 | // dist2: (N, 3)
15 | // idx: (N, 3)
16 |
17 |
18 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
19 | if (pt_idx >= n) return;
20 |
21 | unknown += pt_idx * 4;
22 |
23 | dist2 += pt_idx * 3;
24 | idx += pt_idx * 3;
25 |
26 | float ub = unknown[0];
27 | float ux = unknown[1];
28 | float uy = unknown[2];
29 | float uz = unknown[3];
30 |
31 | double best1 = 1e40, best2 = 1e40, best3 = 1e40;
32 | int besti1 = 0, besti2 = 0, besti3 = 0;
33 | for (int k = 0; k < m; ++k) {
34 | float b = known[k * 4 + 0]; //batch number
35 | if (b!=ub)
36 | continue;
37 | float x = known[k * 4 + 1];
38 | float y = known[k * 4 + 2];
39 | float z = known[k * 4 + 3];
40 | float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
41 | if (d < best1) {
42 | best3 = best2; besti3 = besti2;
43 | best2 = best1; besti2 = besti1;
44 | best1 = d; besti1 = k;
45 | }
46 | else if (d < best2) {
47 | best3 = best2; besti3 = besti2;
48 | best2 = d; besti2 = k;
49 | }
50 | else if (d < best3) {
51 | best3 = d; besti3 = k;
52 | }
53 | }
54 | dist2[0] = best1; dist2[1] = best2; dist2[2] = best3;
55 | idx[0] = besti1; idx[1] = besti2; idx[2] = besti3;
56 | }
57 |
58 | void three_nn_kernel_launcher_fast(int n, int m, const float *unknown,
59 | const float *known, float *dist2, int *idx, cudaStream_t stream) {
60 | // unknown: (N, 4)
61 | // known: (M, 4)
62 | // output:
63 | // dist2: (N, 3)
64 | // idx: (N, 3)
65 |
66 | cudaError_t err;
67 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row)
68 | dim3 threads(THREADS_PER_BLOCK);
69 |
70 | three_nn_kernel_fast<<>>(n, m, unknown, known, dist2, idx);
71 |
72 | err = cudaGetLastError();
73 | if (cudaSuccess != err) {
74 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
75 | exit(-1);
76 | }
77 | }
78 |
79 |
80 | __global__ void three_interpolate_kernel_fast(int c, int m, int n, const float *__restrict__ points,
81 | const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) {
82 | // points: (M, C)
83 | // idx: (N, 3)
84 | // weight: (N, 3)
85 | // output:
86 | // out: (N, C)
87 |
88 |
89 | int c_idx = blockIdx.y;
90 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
91 |
92 | if (c_idx >= c || pt_idx >= n) return;
93 |
94 | weight += pt_idx * 3;
95 | //points += c_idx * m;
96 |
97 | idx += pt_idx * 3;
98 |
99 | out += pt_idx * c;
100 |
101 | out[c_idx] = weight[0] * points[idx[0] * c + c_idx] + weight[1] * points[idx[1] * c + c_idx] + weight[2] * points[idx[2] * c + c_idx];
102 | }
103 |
104 | void three_interpolate_kernel_launcher_fast(int c, int m, int n,
105 | const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream) {
106 | // points: (M, C)
107 | // idx: (N, 3)
108 | // weight: (N, 3)
109 | // output:
110 | // out: (N, C)
111 |
112 | cudaError_t err;
113 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c); // blockIdx.x(col), blockIdx.y(row)
114 | dim3 threads(THREADS_PER_BLOCK);
115 | three_interpolate_kernel_fast<<>>(c, m, n, points, idx, weight, out);
116 |
117 | err = cudaGetLastError();
118 | if (cudaSuccess != err) {
119 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
120 | exit(-1);
121 | }
122 | }
123 |
124 | __global__ void three_interpolate_grad_kernel_fast(int c, int n, int m, const float *__restrict__ grad_out,
125 | const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ grad_points) {
126 | // grad_out: (N, C)
127 | // weight: (N, 3)
128 | // idx: (N, 3)
129 | // output:
130 | // grad_points: (M, C)
131 |
132 |
133 | int c_idx = blockIdx.y;
134 | int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
135 |
136 | if (c_idx >= c || pt_idx >= n) return;
137 |
138 | grad_out += pt_idx * c + c_idx;
139 | weight += pt_idx * 3;
140 | //grad_points += c_idx * m;
141 | idx += pt_idx * 3;
142 |
143 | atomicAdd(grad_points + idx[0] * c + c_idx, grad_out[0] * weight[0]);
144 | atomicAdd(grad_points + idx[1] * c + c_idx, grad_out[0] * weight[1]);
145 | atomicAdd(grad_points + idx[2] * c + c_idx, grad_out[0] * weight[2]);
146 | }
147 |
148 | void three_interpolate_grad_kernel_launcher_fast(int c, int n, int m, const float *grad_out,
149 | const int *idx, const float *weight, float *grad_points, cudaStream_t stream) {
150 | // grad_out: (N, C)
151 | // weight: (N, 3)
152 | // idx: (N, 3)
153 | // output:
154 | // grad_points: (M, C)
155 |
156 | cudaError_t err;
157 | dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c); // blockIdx.x(col), blockIdx.y(row)
158 | dim3 threads(THREADS_PER_BLOCK);
159 | three_interpolate_grad_kernel_fast<<>>(c, n, m, grad_out, idx, weight, grad_points);
160 |
161 | err = cudaGetLastError();
162 | if (cudaSuccess != err) {
163 | fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
164 | exit(-1);
165 | }
166 | }
--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/src/interpolate_gpu.h:
--------------------------------------------------------------------------------
1 | #ifndef _INTERPOLATE_GPU_H
2 | #define _INTERPOLATE_GPU_H
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 |
10 | void three_nn_wrapper_fast(int n, int m, at::Tensor unknown_tensor,
11 | at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor);
12 |
13 | void three_nn_kernel_launcher_fast(int n, int m, const float *unknown,
14 | const float *known, float *dist2, int *idx, cudaStream_t stream);
15 |
16 |
17 | void three_interpolate_wrapper_fast(int c, int m, int n, at::Tensor points_tensor,
18 | at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor);
19 |
20 | void three_interpolate_kernel_launcher_fast(int c, int m, int n,
21 | const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream);
22 |
23 |
24 | void three_interpolate_grad_wrapper_fast(int c, int n, int m, at::Tensor grad_out_tensor,
25 | at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor);
26 |
27 | void three_interpolate_grad_kernel_launcher_fast(int c, int n, int m, const float *grad_out,
28 | const int *idx, const float *weight, float *grad_points, cudaStream_t stream);
29 |
30 | #endif
31 |
--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/src/pointnet2_api.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "interpolate_gpu.h"
5 |
6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
7 | m.def("three_nn_wrapper", &three_nn_wrapper_fast, "three_nn_wrapper_fast");
8 | m.def("three_interpolate_wrapper", &three_interpolate_wrapper_fast, "three_interpolate_wrapper_fast");
9 | m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_fast, "three_interpolate_grad_wrapper_fast");
10 | }
11 |
--------------------------------------------------------------------------------
/mmdet/ops/points_op/__init__.py:
--------------------------------------------------------------------------------
1 | from .points_ops import *
2 | from mmdet.ops.points_op import points_op_cpu
3 | import torch
4 |
5 | def pts_in_boxes3d(pts, boxes3d):
6 | N = len(pts)
7 | M = len(boxes3d)
8 | pts_in_flag = torch.IntTensor(M, N).fill_(0)
9 | reg_target = torch.FloatTensor(N, 3).fill_(0)
10 | points_op_cpu.pts_in_boxes3d(pts.contiguous(), boxes3d.contiguous(), pts_in_flag, reg_target)
11 | return pts_in_flag, reg_target
12 |
13 |
--------------------------------------------------------------------------------
/mmdet/ops/points_op/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension
3 |
4 | class get_pybind_include(object):
5 | """Helper class to determine the pybind11 include path
6 | The purpose of this class is to postpone importing pybind11
7 | until it is actually installed, so that the ``get_include()``
8 | method can be invoked. """
9 |
10 | def __init__(self, user=False):
11 | self.user = user
12 |
13 | def __str__(self):
14 | import pybind11
15 | return pybind11.get_include(self.user)
16 |
17 | ext_modules = [
18 | CppExtension(
19 | name='points_op_cpu',
20 | sources = ['src/points_op.cpp'],
21 | extra_compile_args=['-g'],
22 | include_dirs=[
23 | # Path to pybind11 headers
24 | get_pybind_include(),
25 | get_pybind_include(user=True)
26 | ],
27 | ),
28 | ]
29 |
30 | setup(
31 | name='cpplib',
32 | ext_modules=ext_modules,
33 | cmdclass={
34 | 'build_ext': BuildExtension
35 | })
36 |
37 |
--------------------------------------------------------------------------------
/mmdet/version.py:
--------------------------------------------------------------------------------
1 | # GENERATED VERSION FILE
2 | # TIME: Thu Mar 7 20:30:16 2019
3 |
4 | __version__ = '0.5.4+a6ee053'
5 | short_version = '0.5.4'
6 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | ## SA-SSD: Structure Aware Single-stage 3D Object Detection from Point Cloud (CVPR 2020) [\[paper\]](https://www4.comp.polyu.edu.hk/~cslzhang/paper/SA-SSD.pdf)
2 | Currently 1st place in KITTI BEV and 3rd in KITTI 3D. The detector can run at 25 FPS.
3 |
4 | **Authors**: [Chenhang He](https://github.com/skyhehe123), [Zeng Hui](https://github.com/HuiZeng), Jianqiang Huang, Xiansheng Hua, [Lei Zhang](https://www4.comp.polyu.edu.hk/~cslzhang/).
5 |
6 | ## Updates
7 | 2020-04-13: Add one_cycle (with Adam) training as default scheduler.
8 |
9 | 2020-08-04: Multi-class training is supported. (The multi-class traning is not well tuned and will slightly deteriote the performance of model with single class training (i.e. each class has a individual model), please find the bellow AP@(11 recall points) for your reference.)
10 | ```
11 | Car AP@0.70, 0.70, 0.70:
12 | bbox AP:98.96, 90.06, 89.52
13 | bev AP:90.59, 88.43, 87.49
14 | 3d AP:89.69, 79.41, 78.33
15 | aos AP:98.94, 89.89, 89.19
16 | Car AP@0.70, 0.50, 0.50:
17 | bbox AP:98.96, 90.06, 89.52
18 | bev AP:98.99, 90.13, 89.68
19 | 3d AP:98.97, 90.10, 89.63
20 | aos AP:98.94, 89.89, 89.19
21 |
22 | Pedestrian AP@0.50, 0.50, 0.50:
23 | bbox AP:62.88, 60.26, 53.58
24 | bev AP:58.52, 50.29, 44.10
25 | 3d AP:55.75, 48.01, 41.94
26 | aos AP:58.57, 55.19, 49.07
27 | Pedestrian AP@0.50, 0.25, 0.25:
28 | bbox AP:62.88, 60.26, 53.58
29 | bev AP:71.34, 62.80, 55.64
30 | 3d AP:71.33, 62.76, 55.60
31 | aos AP:58.57, 55.19, 49.07
32 |
33 | Cyclist AP@0.50, 0.50, 0.50:
34 | bbox AP:87.25, 73.74, 67.84
35 | bev AP:85.40, 70.48, 64.59
36 | 3d AP:82.80, 63.37, 61.60
37 | aos AP:86.93, 73.26, 67.41
38 | Cyclist AP@0.50, 0.25, 0.25:
39 | bbox AP:87.25, 73.74, 67.84
40 | bev AP:86.78, 71.55, 65.85
41 | 3d AP:86.78, 71.54, 65.85
42 | aos AP:86.93, 73.26, 67.41
43 | ```
44 |
45 | ## Demo
46 | [](https://www.youtube.com/watch?v=jrAb3ts4tAs)
47 |
48 | # Introduction
49 | 
50 | Current single-stage detectors are efficient by progressively downscaling the 3D point clouds in a fully convolutional manner. However, the downscaled features inevitably lose spatial information and cannot make full use of the structure information of 3D point cloud, degrading their localization precision. In this work, we propose to improve the localization precision of single-stage detectors by explicitly leveraging the structure information of 3D point cloud. Specifically, we design an auxiliary network which converts the convolutional features in the backbone network back to point-level representations. The auxiliary network is jointly optimized, by two point-level supervisions, to guide the convolutional features in the backbone network to be aware of the object structure. The auxiliary network can be detached after training and therefore introduces no extra computation in the inference stage. Besides, considering that single-stage detectors suffer from the discordance between the predicted bounding boxes and corresponding classification confidences, we develop an efficient part-sensitive warping operation to align the confidences to the predicted bounding boxes.
51 |
52 | # Dependencies
53 | - `python3.5+`
54 | - `pytorch` (tested on 1.1.0)
55 | - `opencv`
56 | - `shapely`
57 | - `mayavi`
58 | - `spconv` (v1.0)
59 |
60 | # Installation
61 | 1. Clone this repository.
62 | 2. Compile C++/CUDA modules in mmdet/ops by running the following command at each directory, e.g.
63 | ```bash
64 | $ cd mmdet/ops/points_op
65 | $ python3 setup.py build_ext --inplace
66 | ```
67 | 3. Setup following Environment variables, you may add them to ~/.bashrc:
68 | ```bash
69 | export NUMBAPRO_CUDA_DRIVER=/usr/lib/x86_64-linux-gnu/libcuda.so
70 | export NUMBAPRO_NVVM=/usr/local/cuda/nvvm/lib64/libnvvm.so
71 | export NUMBAPRO_LIBDEVICE=/usr/local/cuda/nvvm/libdevice
72 | export LD_LIBRARY_PATH=/home/billyhe/anaconda3/lib/python3.7/site-packages/spconv;
73 | ```
74 |
75 | # Data Preparation
76 | 1. Download the 3D KITTI detection dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Data to download include:
77 | * Velodyne point clouds (29 GB): input data to VoxelNet
78 | * Training labels of object data set (5 MB): input label to VoxelNet
79 | * Camera calibration matrices of object data set (16 MB): for visualization of predictions
80 | * Left color images of object data set (12 GB): for visualization of predictions
81 |
82 | 2. Create cropped point cloud and sample pool for data augmentation, please refer to [SECOND](https://github.com/traveller59/second.pytorch).
83 | ```bash
84 | $ python3 tools/create_data.py
85 | ```
86 |
87 | 3. Split the training set into training and validation set according to the protocol [here](https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz).
88 | ```plain
89 | └── DATA_DIR
90 | ├── training <-- training data
91 | | ├── image_2
92 | | ├── label_2
93 | | ├── velodyne
94 | | └── velodyne_reduced
95 | └── testing <--- testing data
96 | | ├── image_2
97 | | ├── label_2
98 | | ├── velodyne
99 | | └── velodyne_reduced
100 | ```
101 |
102 | # Pretrained Model
103 | You can download the pretrained model [here](https://drive.google.com/file/d/1WJnJDMOeNKszdZH3P077wKXcoty7XOUb/view?usp=sharing),
104 | which is trained on the train split (3712 samples) and evaluated on the val split (3769 samples) and test split (7518 samples).
105 | The performance (using 40 recall poisitions) on validation set is as follows:
106 | ```
107 | Car AP@0.70, 0.70, 0.70:
108 | bbox AP:99.12, 96.09, 93.61
109 | bev AP:96.55, 92.79, 90.32
110 | 3d AP:93.13, 84.54, 81.71
111 | ```
112 | # Train
113 | To train the SA-SSD with single GPU, run the following command:
114 | ```
115 | cd mmdet/tools
116 | python3 train.py ../configs/car_cfg.py
117 | ```
118 | To train the SA-SSD with multiple GPUs, run the following command:
119 | ```
120 | bash dist_train.sh
121 | ```
122 | # Eval
123 | To evaluate the model, run the following command:
124 | ```
125 | cd mmdet/tools
126 | python3 test.py ../configs/car_cfg.py ../saved_model_vehicle/epoch_50.pth
127 | ```
128 | ## Citation
129 | If you find this work useful in your research, please consider cite:
130 | ```
131 | @inproceedings{he2020sassd,
132 | title={Structure Aware Single-stage 3D Object Detection from Point Cloud},
133 | author={He, Chenhang and Zeng, Hui and Huang, Jianqiang and Hua, Xian-Sheng and Zhang, Lei},
134 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
135 | year={2020}
136 | }
137 | ```
138 |
139 | ## Acknowledgement
140 | The code is devloped based on mmdetection, some part of codes are borrowed from SECOND and PointRCNN.
141 | * [mmdetection](https://github.com/open-mmlab/mmdetection)
142 | * [mmcv](https://github.com/open-mmlab/mmcv)
143 | * [second.pytorch](https://github.com/traveller59/second.pytorch)
144 | * [PointRCNN](https://github.com/sshaoshuai/PointRCNN)
145 |
146 |
147 |
--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | PYTHON=${PYTHON:-"python"}
4 |
5 | $PYTHON -m torch.distributed.launch --nproc_per_node=2 $(dirname "$0")/train.py ../configs/car_cfg.py --launcher pytorch ${@:3}
6 |
--------------------------------------------------------------------------------
/tools/env.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import random
4 | import time
5 | import numpy as np
6 | import torch
7 | import torch.distributed as dist
8 | import torch.multiprocessing as mp
9 | from mmcv.runner import get_dist_info
10 |
11 |
12 | def init_dist(launcher, backend='nccl', **kwargs):
13 | if mp.get_start_method(allow_none=True) is None:
14 | mp.set_start_method('spawn')
15 | if launcher == 'pytorch':
16 | _init_dist_pytorch(backend, **kwargs)
17 | elif launcher == 'mpi':
18 | _init_dist_mpi(backend, **kwargs)
19 | elif launcher == 'slurm':
20 | _init_dist_slurm(backend, **kwargs)
21 | else:
22 | raise ValueError('Invalid launcher type: {}'.format(launcher))
23 |
24 |
25 | def _init_dist_pytorch(backend, **kwargs):
26 | # TODO: use local_rank instead of rank % num_gpus
27 | rank = int(os.environ['RANK'])
28 | num_gpus = torch.cuda.device_count()
29 | torch.cuda.set_device(rank % num_gpus)
30 | dist.init_process_group(backend=backend, **kwargs)
31 |
32 |
33 | def _init_dist_mpi(backend, **kwargs):
34 | raise NotImplementedError
35 |
36 |
37 | def _init_dist_slurm(backend, **kwargs):
38 | raise NotImplementedError
39 |
40 |
41 | def set_random_seed(seed):
42 | random.seed(seed)
43 | np.random.seed(seed)
44 | torch.manual_seed(seed)
45 | torch.cuda.manual_seed_all(seed)
46 |
47 |
48 | def get_root_logger(work_dir):
49 | logging.basicConfig(
50 | format='%(asctime)s - %(levelname)s - %(message)s',
51 | level=logging.INFO)
52 |
53 | logger = logging.getLogger()
54 | rank, _ = get_dist_info()
55 | if rank != 0:
56 | logger.setLevel('ERROR')
57 |
58 | filename = '{}.log'.format(time.strftime('%Y%m%d_%H%M%S', time.localtime()))
59 | log_file = os.path.join(work_dir, filename)
60 | file_handler = logging.FileHandler(log_file, 'w')
61 | file_handler.setLevel(logging.INFO)
62 | logger.addHandler(file_handler)
63 |
64 | return logger
65 |
--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import sys
3 | sys.path.append('/home/billyhe/SA-SSD')
4 | import torch
5 | import mmcv
6 | from mmcv.runner import load_checkpoint, parallel_test
7 | from mmcv.parallel import scatter, collate, MMDataParallel
8 | from mmdet.core.evaluation.kitti_eval import get_official_eval_result
9 | from mmdet.core import results2json, coco_eval
10 | from mmdet.datasets import build_dataloader
11 | from mmdet.models import build_detector, detectors
12 | import tools.kitti_common as kitti
13 | import numpy as np
14 | import torch.utils.data
15 | import os
16 | from tools.train_utils import load_params_from_file
17 | from mmdet.datasets import utils
18 |
19 | def single_test(model, data_loader, saveto=None, class_names=['Car']):
20 | template = '{} ' + ' '.join(['{:.4f}' for _ in range(15)]) + '\n'
21 | if saveto is not None:
22 | mmcv.mkdir_or_exist(saveto)
23 |
24 | model.eval()
25 | annos = []
26 |
27 | prog_bar = mmcv.ProgressBar(len(data_loader))
28 |
29 | for i, data in enumerate(data_loader):
30 | with torch.no_grad():
31 | results = model(return_loss=False, **data)
32 | annos+=results
33 | # image_shape = (375,1242)
34 | # for re in results:
35 | # img_idx = re['image_idx']
36 | # if re['bbox'] is not None:
37 | # box2d = re['bbox']
38 | # box3d = re['box3d_camera']
39 | # labels = re['label_preds']
40 | # scores = re['scores']
41 | # alphas = re['alphas']
42 | # anno = kitti.get_start_result_anno()
43 | # num_example = 0
44 | # for bbox2d, bbox3d, label, score, alpha in zip(box2d, box3d, labels, scores, alphas):
45 | # if bbox2d[0] > image_shape[1] or bbox2d[1] > image_shape[0]:
46 | # continue
47 | # if bbox2d[2] < 0 or bbox2d[3] < 0:
48 | # continue
49 | # bbox2d[2:] = np.minimum(bbox2d[2:], image_shape[::-1])
50 | # bbox2d[:2] = np.maximum(bbox2d[:2], [0, 0])
51 | # anno["name"].append(class_names[int(label)])
52 | # anno["truncated"].append(0.0)
53 | # anno["occluded"].append(0)
54 | # # anno["alpha"].append(-10)
55 | # anno["alpha"].append(alpha)
56 | # anno["bbox"].append(bbox2d)
57 | # # anno["dimensions"].append(np.array([-1,-1,-1]))
58 | # anno["dimensions"].append(bbox3d[[3, 4, 5]])
59 | # # anno["location"].append(np.array([-1000,-1000,-1000]))
60 | # anno["location"].append(bbox3d[:3])
61 | # # anno["rotation_y"].append(-10)
62 | # anno["rotation_y"].append(bbox3d[6])
63 | # anno["score"].append(score)
64 | # num_example += 1
65 | # if num_example != 0:
66 | # if saveto is not None:
67 | # of_path = os.path.join(saveto, '%06d.txt' % img_idx)
68 | # with open(of_path, 'w+') as f:
69 | # for name, bbox, dim, loc, ry, score, alpha in zip(anno['name'], anno["bbox"], \
70 | # anno["dimensions"], anno["location"], anno["rotation_y"], anno["score"],anno["alpha"]):
71 | # line = template.format(name, 0, 0, alpha, *bbox, *dim[[1,2,0]], *loc, ry, score)
72 | # f.write(line)
73 | #
74 | # anno = {n: np.stack(v) for n, v in anno.items()}
75 | # annos.append(anno)
76 | # else:
77 | # if saveto is not None:
78 | # of_path = os.path.join(saveto, '%06d.txt' % img_idx)
79 | # f = open(of_path, 'w+')
80 | # f.close()
81 | # annos.append(kitti.empty_result_anno())
82 | # else:
83 | # if saveto is not None:
84 | # of_path = os.path.join(saveto, '%06d.txt' % img_idx)
85 | # f = open(of_path, 'w+')
86 | # f.close()
87 | # annos.append(kitti.empty_result_anno())
88 | #
89 | # num_example = annos[-1]["name"].shape[0]
90 | # annos[-1]["image_idx"] = np.array(
91 | # [img_idx] * num_example, dtype=np.int64)
92 | prog_bar.update()
93 |
94 | return annos
95 |
96 |
97 | def _data_func(data, device_id):
98 | data = scatter(collate([data], samples_per_gpu=1), [device_id])[0]
99 | return dict(return_loss=False, rescale=True, **data)
100 |
101 |
102 | def parse_args():
103 | parser = argparse.ArgumentParser(description='MMDet test detector')
104 | parser.add_argument('config', help='test config file path')
105 | parser.add_argument('checkpoint', help='checkpoint file')
106 | parser.add_argument(
107 | '--gpus', default=1, type=int, help='GPU number used for testing')
108 | parser.add_argument(
109 | '--proc_per_gpu',
110 | default=1,
111 | type=int,
112 | help='Number of processes per GPU')
113 | parser.add_argument('--out', help='output result file')
114 | parser.add_argument(
115 | '--eval',
116 | type=str,
117 | nargs='+',
118 | choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'],
119 | help='eval types')
120 | parser.add_argument('--show', action='store_true', help='show results')
121 | args = parser.parse_args()
122 | return args
123 |
124 |
125 | def main():
126 | args = parse_args()
127 |
128 | cfg = mmcv.Config.fromfile(args.config)
129 | cfg.model.pretrained = None
130 |
131 | dataset = utils.get_dataset(cfg.data.val)
132 | class_names = cfg.data.val.class_names
133 |
134 | if args.gpus == 1:
135 | model = build_detector(
136 | cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
137 |
138 | print("Evaluate on", cfg.data.val.class_names)
139 | setattr(model, 'class_names', class_names)
140 |
141 | #load_checkpoint(model, args.checkpoint)
142 | model = MMDataParallel(model, device_ids=[0])
143 | load_params_from_file(model, args.checkpoint)
144 | data_loader = build_dataloader(
145 | dataset,
146 | 1,
147 | cfg.data.workers_per_gpu,
148 | num_gpus=1,
149 | shuffle=False,
150 | dist=False)
151 | outputs = single_test(model, data_loader, args.out)
152 | else:
153 | NotImplementedError
154 |
155 | # kitti evaluation
156 | gt_annos = kitti.get_label_annos(dataset.label_prefix, dataset.sample_ids)
157 | result = get_official_eval_result(gt_annos, outputs, current_classes=class_names)
158 | print(result)
159 |
160 |
161 | if __name__ == '__main__':
162 | main()
163 |
--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
1 | from __future__ import division
2 | import argparse
3 | import sys
4 | sys.path.append('/home/billyhe/SA-SSD')
5 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
6 | from mmdet.datasets import build_dataloader
7 | from tools.env import get_root_logger, init_dist, set_random_seed
8 | from tools.train_utils import train_model
9 | import pathlib
10 | from mmcv import Config
11 | from mmdet.datasets import get_dataset
12 | from mmdet.models import build_detector
13 | from tools.train_utils.optimization import build_optimizer, build_scheduler
14 |
15 | def parse_args():
16 | parser = argparse.ArgumentParser(description='Train a detector')
17 | parser.add_argument('config', help='train config file path')
18 | parser.add_argument('--work_dir', help='the dir to save logs and models')
19 | parser.add_argument(
20 | '--validate',
21 | action='store_true',
22 | help='whether to evaluate the checkpoint during training')
23 | parser.add_argument(
24 | '--gpus',
25 | type=int,
26 | default=1,
27 | help='number of gpus to use '
28 | '(only applicable to non-distributed training)')
29 | parser.add_argument('--seed', type=int, default=0, help='random seed')
30 | parser.add_argument(
31 | '--launcher',
32 | choices=['none', 'pytorch', 'slurm', 'mpi'],
33 | default='none',
34 | help='job launcher')
35 | parser.add_argument('--local_rank', type=int, default=0)
36 | parser.add_argument('--max_ckpt_save_num', type=int, default=10)
37 |
38 | args = parser.parse_args()
39 |
40 | return args
41 |
42 |
43 |
44 | def main():
45 |
46 | args = parse_args()
47 |
48 | cfg = Config.fromfile(args.config)
49 |
50 | if args.work_dir is not None:
51 | cfg.work_dir = args.work_dir
52 |
53 | pathlib.Path(cfg.work_dir).mkdir(parents=True, exist_ok=True)
54 |
55 | cfg.gpus = args.gpus
56 |
57 | # init distributed env first, since logger depends on the dist info.
58 | if args.launcher == 'none':
59 | distributed = False
60 | else:
61 | distributed = True
62 | init_dist(args.launcher, **cfg.dist_params)
63 |
64 | # init logger before other steps
65 | logger = get_root_logger(cfg.work_dir)
66 |
67 | logger.info('Distributed training: {}'.format(distributed))
68 |
69 | # set random seeds
70 | if args.seed is not None:
71 | logger.info('Set random seed to {}'.format(args.seed))
72 | set_random_seed(args.seed)
73 |
74 | model = build_detector(
75 | cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
76 |
77 | if distributed:
78 | model = MMDistributedDataParallel(model.cuda())
79 | else:
80 | model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
81 |
82 | train_dataset = get_dataset(cfg.data.train)
83 |
84 | optimizer = build_optimizer(model, cfg.optimizer)
85 |
86 | train_loader = build_dataloader(
87 | train_dataset,
88 | cfg.data.imgs_per_gpu,
89 | cfg.data.workers_per_gpu,
90 | dist=distributed)
91 |
92 | start_epoch = it = 0
93 | last_epoch = -1
94 |
95 | lr_scheduler, lr_warmup_scheduler = build_scheduler(
96 | optimizer, total_iters_each_epoch=len(train_loader), total_epochs=cfg.total_epochs,
97 | last_epoch=last_epoch, optim_cfg=cfg.optimizer, lr_cfg=cfg.lr_config
98 | )
99 | # -----------------------start training---------------------------
100 | logger.info('**********************Start training**********************')
101 |
102 | train_model(
103 | model,
104 | optimizer,
105 | train_loader,
106 | lr_scheduler=lr_scheduler,
107 | optim_cfg=cfg.optimizer,
108 | start_epoch=start_epoch,
109 | total_epochs=cfg.total_epochs,
110 | start_iter=it,
111 | rank=args.local_rank,
112 | logger = logger,
113 | ckpt_save_dir=cfg.work_dir,
114 | lr_warmup_scheduler=lr_warmup_scheduler,
115 | ckpt_save_interval=cfg.checkpoint_config.interval,
116 | max_ckpt_save_num=args.max_ckpt_save_num,
117 | log_interval = cfg.log_config.interval
118 | )
119 |
120 | logger.info('**********************End training**********************')
121 |
122 |
123 |
124 |
125 |
126 | if __name__ == '__main__':
127 | main()
128 |
--------------------------------------------------------------------------------
/tools/train_utils/optimization/__init__.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.optim as optim
4 | from functools import partial
5 | from .fastai_optim import OptimWrapper
6 | from .learning_schedules_fastai import OneCycle, CosineWarmupLR
7 |
8 |
9 | def build_optimizer(model, optim_cfg):
10 | if optim_cfg.type == 'adam':
11 | optimizer = optim.Adam(model.parameters(), lr=optim_cfg.lr, weight_decay=optim_cfg.weight_decay)
12 | elif optim_cfg.type == 'sgd':
13 | optimizer = optim.SGD(
14 | model.parameters(), lr=optim_cfg.lr, weight_decay=optim_cfg.weight_decay,
15 | momentum=optim_cfg.momentum
16 | )
17 | elif optim_cfg.type == 'adam_onecycle':
18 | def children(m: nn.Module):
19 | return list(m.children())
20 |
21 | def num_children(m: nn.Module) -> int:
22 | return len(children(m))
23 |
24 | flatten_model = lambda m: sum(map(flatten_model, m.children()), []) if num_children(m) else [m]
25 | get_layer_groups = lambda m: [nn.Sequential(*flatten_model(m))]
26 |
27 | optimizer_func = partial(optim.Adam, betas=(0.9, 0.99))
28 | optimizer = OptimWrapper.create(
29 | optimizer_func, optim_cfg.lr, get_layer_groups(model), wd=optim_cfg.weight_decay, true_wd=True, bn_wd=True
30 | )
31 | else:
32 | raise NotImplementedError
33 |
34 | return optimizer
35 |
36 |
37 | def build_scheduler(optimizer, total_iters_each_epoch, total_epochs, last_epoch, optim_cfg, lr_cfg):
38 |
39 | lr_warmup_scheduler = None
40 | total_steps = total_iters_each_epoch * total_epochs
41 |
42 | if lr_cfg.policy == 'onecycle':
43 | lr_scheduler = OneCycle(
44 | optimizer, total_steps, optim_cfg.lr, list(lr_cfg.moms), lr_cfg.div_factor, lr_cfg.pct_start
45 | )
46 |
47 | elif lr_cfg.policy == 'cosine':
48 | lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, total_steps, last_epoch=last_epoch)
49 |
50 | elif lr_cfg.policy == 'step':
51 |
52 | lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, lr_cfg.step, last_epoch=last_epoch)
53 |
54 | else:
55 | raise NotImplementedError
56 |
57 | if 'warmup' in lr_cfg:
58 | lr_warmup_scheduler = CosineWarmupLR(
59 | optimizer, T_max=lr_cfg.warmup_iters,
60 | eta_min=optim_cfg.lr * lr_cfg.warmup_ratio
61 | )
62 |
63 | return lr_scheduler, lr_warmup_scheduler
64 |
--------------------------------------------------------------------------------
/tools/train_utils/optimization/learning_schedules_fastai.py:
--------------------------------------------------------------------------------
1 | # This file is modified from https://github.com/traveller59/second.pytorch
2 |
3 | import numpy as np
4 | import math
5 | from functools import partial
6 | import torch.optim.lr_scheduler as lr_sched
7 | from .fastai_optim import OptimWrapper
8 |
9 |
10 | class LRSchedulerStep(object):
11 | def __init__(self, fai_optimizer: OptimWrapper, total_step, lr_phases,
12 | mom_phases):
13 | # if not isinstance(fai_optimizer, OptimWrapper):
14 | # raise TypeError('{} is not a fastai OptimWrapper'.format(
15 | # type(fai_optimizer).__name__))
16 | self.optimizer = fai_optimizer
17 | self.total_step = total_step
18 | self.lr_phases = []
19 |
20 | for i, (start, lambda_func) in enumerate(lr_phases):
21 | if len(self.lr_phases) != 0:
22 | assert self.lr_phases[-1][0] < start
23 | if isinstance(lambda_func, str):
24 | lambda_func = eval(lambda_func)
25 | if i < len(lr_phases) - 1:
26 | self.lr_phases.append((int(start * total_step), int(lr_phases[i + 1][0] * total_step), lambda_func))
27 | else:
28 | self.lr_phases.append((int(start * total_step), total_step, lambda_func))
29 | assert self.lr_phases[0][0] == 0
30 | self.mom_phases = []
31 | for i, (start, lambda_func) in enumerate(mom_phases):
32 | if len(self.mom_phases) != 0:
33 | assert self.mom_phases[-1][0] < start
34 | if isinstance(lambda_func, str):
35 | lambda_func = eval(lambda_func)
36 | if i < len(mom_phases) - 1:
37 | self.mom_phases.append((int(start * total_step), int(mom_phases[i + 1][0] * total_step), lambda_func))
38 | else:
39 | self.mom_phases.append((int(start * total_step), total_step, lambda_func))
40 | assert self.mom_phases[0][0] == 0
41 |
42 | def step(self, step):
43 | for start, end, func in self.lr_phases:
44 | if step >= start:
45 | self.optimizer.lr = func((step - start) / (end - start))
46 | for start, end, func in self.mom_phases:
47 | if step >= start:
48 | self.optimizer.mom = func((step - start) / (end - start))
49 |
50 |
51 | def annealing_cos(start, end, pct):
52 | # print(pct, start, end)
53 | "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0."
54 | cos_out = np.cos(np.pi * pct) + 1
55 | return end + (start - end) / 2 * cos_out
56 |
57 |
58 | class OneCycle(LRSchedulerStep):
59 | def __init__(self, fai_optimizer, total_step, lr_max, moms, div_factor,
60 | pct_start):
61 | self.lr_max = lr_max
62 | self.moms = moms
63 | self.div_factor = div_factor
64 | self.pct_start = pct_start
65 | a1 = int(total_step * self.pct_start)
66 | a2 = total_step - a1
67 | low_lr = self.lr_max / self.div_factor
68 | lr_phases = ((0, partial(annealing_cos, low_lr, self.lr_max)),
69 | (self.pct_start,
70 | partial(annealing_cos, self.lr_max, low_lr / 1e4)))
71 | mom_phases = ((0, partial(annealing_cos, *self.moms)),
72 | (self.pct_start, partial(annealing_cos,
73 | *self.moms[::-1])))
74 | fai_optimizer.lr, fai_optimizer.mom = low_lr, self.moms[0]
75 | super().__init__(fai_optimizer, total_step, lr_phases, mom_phases)
76 |
77 |
78 | class CosineWarmupLR(lr_sched._LRScheduler):
79 | def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1):
80 | self.T_max = T_max
81 | self.eta_min = eta_min
82 | super(CosineWarmupLR, self).__init__(optimizer, last_epoch)
83 |
84 | def get_lr(self):
85 | return [self.eta_min + (base_lr - self.eta_min) *
86 | (1 - math.cos(math.pi * self.last_epoch / self.T_max)) / 2
87 | for base_lr in self.base_lrs]
88 |
89 |
90 | class FakeOptim:
91 | def __init__(self):
92 | self.lr = 0
93 | self.mom = 0
94 |
95 |
96 | if __name__ == "__main__":
97 | import matplotlib.pyplot as plt
98 |
99 | opt = FakeOptim() # 3e-3, wd=0.4, div_factor=10
100 | schd = OneCycle(opt, 100, 3e-3, (0.95, 0.85), 10.0, 0.1)
101 |
102 | lrs = []
103 | moms = []
104 | for i in range(100):
105 | schd.step(i)
106 | lrs.append(opt.lr)
107 | moms.append(opt.mom)
108 | plt.plot(lrs)
109 | # plt.plot(moms)
110 | plt.show()
111 | plt.plot(moms)
112 | plt.show()
113 |
--------------------------------------------------------------------------------