├── .gitignore
├── .idea
    ├── .gitignore
    ├── SA-SSD.iml
    ├── inspectionProfiles
    │   └── profiles_settings.xml
    ├── misc.xml
    ├── modules.xml
    └── vcs.xml
├── configs
    ├── car_cfg.py
    └── multi_cfg.py
├── doc
    ├── hqdefault.jpg
    └── model.png
├── mmdet
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── anchor
    │   │   ├── __init__.py
    │   │   ├── anchor3d_generator.py
    │   │   ├── anchor_generator.py
    │   │   └── anchor_target.py
    │   ├── bbox
    │   │   ├── __init__.py
    │   │   ├── assignment.py
    │   │   ├── bbox_target.py
    │   │   ├── geometry.py
    │   │   ├── sampling.py
    │   │   └── transforms.py
    │   ├── bbox3d
    │   │   ├── __init__.py
    │   │   ├── bbox3d_target.py
    │   │   ├── box_coders.py
    │   │   ├── geometry.py
    │   │   ├── region_similarity.py
    │   │   └── target_ops.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── bbox_overlaps.py
    │   │   ├── class_names.py
    │   │   ├── coco_utils.py
    │   │   ├── eval_hooks.py
    │   │   ├── kitti_eval.py
    │   │   ├── mean_ap.py
    │   │   └── recall.py
    │   ├── loss
    │   │   ├── __init__.py
    │   │   └── losses.py
    │   ├── mask
    │   │   ├── __init__.py
    │   │   ├── mask_target.py
    │   │   └── utils.py
    │   ├── point_cloud
    │   │   ├── __init__.py
    │   │   ├── point_augmentor.py
    │   │   └── voxel_generator.py
    │   ├── post_processing
    │   │   ├── __init__.py
    │   │   ├── bbox_nms.py
    │   │   ├── merge_augs.py
    │   │   └── rotate_nms_gpu.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── dist_utils.py
    │   │   └── misc.py
    ├── datasets
    │   ├── __init__.py
    │   ├── coco.py
    │   ├── concat_dataset.py
    │   ├── custom.py
    │   ├── kitti.py
    │   ├── kitti_utils.py
    │   ├── loader
    │   │   ├── __init__.py
    │   │   ├── build_loader.py
    │   │   └── sampler.py
    │   ├── transforms.py
    │   ├── utils.py
    │   ├── voc.py
    │   └── xml_style.py
    ├── models
    │   ├── __init__.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   ├── pillar.py
    │   │   ├── resnet.py
    │   │   └── vxnet.py
    │   ├── bbox_heads
    │   │   ├── __init__.py
    │   │   ├── bbox_head.py
    │   │   └── convfc_bbox_head.py
    │   ├── builder.py
    │   ├── detectors
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── pointpillars.py
    │   │   ├── rpn.py
    │   │   ├── single_stage.py
    │   │   └── test_mixins.py
    │   ├── mask_heads
    │   │   ├── __init__.py
    │   │   └── fcn_mask_head.py
    │   ├── necks
    │   │   ├── __init__.py
    │   │   ├── cmn.py
    │   │   ├── fpn.py
    │   │   └── rpn.py
    │   ├── roi_extractors
    │   │   ├── __init__.py
    │   │   └── single_level.py
    │   ├── rpn_heads
    │   │   ├── __init__.py
    │   │   └── rpn_head.py
    │   ├── single_stage_heads
    │   │   ├── __init__.py
    │   │   ├── retina_head.py
    │   │   └── ssd_rotate_head.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── conv_module.py
    │   │   ├── empty.py
    │   │   ├── norm.py
    │   │   ├── sequential.py
    │   │   └── weight_init.py
    ├── ops
    │   ├── __init__.py
    │   ├── iou3d
    │   │   ├── iou3d_utils.py
    │   │   ├── setup.py
    │   │   └── src
    │   │   │   ├── iou3d.cpp
    │   │   │   └── iou3d_kernel.cu
    │   ├── pointnet2
    │   │   ├── pointnet2_utils.py
    │   │   ├── setup.py
    │   │   └── src
    │   │   │   ├── cuda_utils.h
    │   │   │   ├── interpolate.cpp
    │   │   │   ├── interpolate_gpu.cu
    │   │   │   ├── interpolate_gpu.h
    │   │   │   └── pointnet2_api.cpp
    │   └── points_op
    │   │   ├── __init__.py
    │   │   ├── points_ops.py
    │   │   ├── setup.py
    │   │   └── src
    │   │       └── points_op.cpp
    └── version.py
├── readme.md
└── tools
    ├── create_data.py
    ├── dist_train.sh
    ├── env.py
    ├── kitti_common.py
    ├── test.py
    ├── train.py
    └── train_utils
        ├── __init__.py
        └── optimization
            ├── __init__.py
            ├── fastai_optim.py
            └── learning_schedules_fastai.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled source #
 2 | ###################
 3 | *.com
 4 | *.class
 5 | *.dll
 6 | *.exe
 7 | *.o
 8 | *.so
 9 | *.pyc
10 | 
11 | # Packages #
12 | ############
13 | # it's better to unpack these files and commit the raw source
14 | # git has its own built in compression methods
15 | *.7z
16 | *.dmg
17 | *.gz
18 | *.iso
19 | *.jar
20 | *.rar
21 | *.tar
22 | *.zip
23 | 
24 | 
25 | # Specific directory #
26 | saved_model_vehicle/
27 | 


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml


--------------------------------------------------------------------------------
/.idea/SA-SSD.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.7 (SA-SSD)" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="PyDocumentationSettings">
 9 |     <option name="format" value="PLAIN" />
10 |     <option name="myDocStringFormat" value="Plain" />
11 |   </component>
12 |   <component name="TestRunnerService">
13 |     <option name="PROJECT_TEST_RUNNER" value="pytest" />
14 |   </component>
15 | </module>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="JavaScriptSettings">
4 |     <option name="languageLevel" value="ES6" />
5 |   </component>
6 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (SA-SSD)" project-jdk-type="Python SDK" />
7 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/SA-SSD.iml" filepath="$PROJECT_DIR$/.idea/SA-SSD.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/doc/hqdefault.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skyhehe123/SA-SSD/2d75c973af65453186bd9242d7fa5e62dc44ec03/doc/hqdefault.jpg


--------------------------------------------------------------------------------
/doc/model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skyhehe123/SA-SSD/2d75c973af65453186bd9242d7fa5e62dc44ec03/doc/model.png


--------------------------------------------------------------------------------
/mmdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 | 
3 | __all__ = ['__version__', 'short_version']
4 | 


--------------------------------------------------------------------------------
/mmdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import *  # noqa: F401, F403
2 | from .bbox import *  # noqa: F401, F403
3 | from .mask import *  # noqa: F401, F403
4 | from .loss import *  # noqa: F401, F403
5 | from .evaluation import *  # noqa: F401, F403
6 | from .post_processing import *  # noqa: F401, F403
7 | from .utils import *  # noqa: F401, F403
8 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_generator import AnchorGenerator
2 | from .anchor_target import anchor_target
3 | 
4 | __all__ = ['AnchorGenerator', 'anchor_target']
5 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/anchor3d_generator.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | def create_anchors_3d_stride(feature_size,
  4 |                              sizes=[1.6, 3.9, 1.56],
  5 |                              anchor_strides=[0.4, 0.4, 0.0],
  6 |                              anchor_offsets=[0.2, -39.8, -1.78],
  7 |                              rotations=[0, np.pi / 2],
  8 |                              dtype=np.float32):
  9 |     """
 10 |     Args:
 11 |         feature_size: list [D, H, W](zyx)
 12 |         sizes: [N, 3] list of list or array, size of anchors, xyz
 13 | 
 14 |     Returns:
 15 |         anchors: [*feature_size, num_sizes, num_rots, 7] tensor.
 16 |     """
 17 |     # almost 2x faster than v1
 18 |     x_stride, y_stride, z_stride = anchor_strides
 19 |     x_offset, y_offset, z_offset = anchor_offsets
 20 |     z_centers = np.arange(feature_size[0], dtype=dtype)
 21 |     y_centers = np.arange(feature_size[1], dtype=dtype)
 22 |     x_centers = np.arange(feature_size[2], dtype=dtype)
 23 |     z_centers = z_centers * z_stride + z_offset
 24 |     y_centers = y_centers * y_stride + y_offset
 25 |     x_centers = x_centers * x_stride + x_offset
 26 |     sizes = np.reshape(np.array(sizes, dtype=dtype), [-1, 3])
 27 |     rotations = np.array(rotations, dtype=dtype)
 28 |     rets = np.meshgrid(
 29 |         x_centers, y_centers, z_centers, rotations, indexing='ij')
 30 |     tile_shape = [1] * 5
 31 |     tile_shape[-2] = int(sizes.shape[0])
 32 |     for i in range(len(rets)):
 33 |         rets[i] = np.tile(rets[i][..., np.newaxis, :], tile_shape)
 34 |         rets[i] = rets[i][..., np.newaxis]  # for concat
 35 |     sizes = np.reshape(sizes, [1, 1, 1, -1, 1, 3])
 36 |     tile_size_shape = list(rets[0].shape)
 37 |     tile_size_shape[3] = 1
 38 |     sizes = np.tile(sizes, tile_size_shape)
 39 |     rets.insert(3, sizes)
 40 |     ret = np.concatenate(rets, axis=-1)
 41 |     return np.transpose(ret, [2, 1, 0, 3, 4, 5])
 42 | 
 43 | 
 44 | def create_anchors_3d_range(feature_size,
 45 |                             anchor_range,
 46 |                             sizes=[1.6, 3.9, 1.56],
 47 |                             rotations=[0, np.pi / 2],
 48 |                             dtype=np.float32):
 49 |     """
 50 |     Args:
 51 |         feature_size: list [D, H, W](zyx)
 52 |         sizes: [N, 3] list of list or array, size of anchors, xyz
 53 | 
 54 |     Returns:
 55 |         anchors: [*feature_size, num_sizes, num_rots, 7] tensor.
 56 |     """
 57 |     anchor_range = np.array(anchor_range, dtype)
 58 |     z_centers = np.linspace(
 59 |         anchor_range[2], anchor_range[5], feature_size[0], dtype=dtype)
 60 |     y_centers = np.linspace(
 61 |         anchor_range[1], anchor_range[4], feature_size[1], dtype=dtype)
 62 |     x_centers = np.linspace(
 63 |         anchor_range[0], anchor_range[3], feature_size[2], dtype=dtype)
 64 |     sizes = np.reshape(np.array(sizes, dtype=dtype), [-1, 3])
 65 |     rotations = np.array(rotations, dtype=dtype)
 66 |     rets = np.meshgrid(
 67 |         x_centers, y_centers, z_centers, rotations, indexing='ij')
 68 |     tile_shape = [1] * 5
 69 |     tile_shape[-2] = int(sizes.shape[0])
 70 |     for i in range(len(rets)):
 71 |         rets[i] = np.tile(rets[i][..., np.newaxis, :], tile_shape)
 72 |         rets[i] = rets[i][..., np.newaxis]  # for concat
 73 |     sizes = np.reshape(sizes, [1, 1, 1, -1, 1, 3])
 74 |     tile_size_shape = list(rets[0].shape)
 75 |     tile_size_shape[3] = 1
 76 |     sizes = np.tile(sizes, tile_size_shape)
 77 |     rets.insert(3, sizes)
 78 |     ret = np.concatenate(rets, axis=-1)
 79 |     return np.transpose(ret, [2, 1, 0, 3, 4, 5])
 80 | 
 81 | class AnchorGeneratorStride:
 82 |     def __init__(self,
 83 |                  sizes=[1.6, 3.9, 1.56],
 84 |                  anchor_strides=[0.4, 0.4, 1.0],
 85 |                  anchor_offsets=[0.2, -39.8, -1.78],
 86 |                  rotations=[0, np.pi / 2],
 87 |                  dtype=np.float32):
 88 |         self._sizes = sizes
 89 |         self._anchor_strides = anchor_strides
 90 |         self._anchor_offsets = anchor_offsets
 91 |         self._rotations = rotations
 92 |         self._dtype = dtype
 93 | 
 94 |     @property
 95 |     def num_anchors_per_localization(self):
 96 |         num_rot = len(self._rotations)
 97 |         num_size = np.array(self._sizes).reshape([-1, 3]).shape[0]
 98 |         return num_rot * num_size
 99 | 
100 |     def __call__(self, feature_map_size):
101 |         return create_anchors_3d_stride(
102 |             feature_map_size, self._sizes, self._anchor_strides,
103 |             self._anchor_offsets, self._rotations, self._dtype)
104 | 
105 | class AnchorGeneratorRange:
106 |     def __init__(self,
107 |                  anchor_ranges,
108 |                  sizes=[1.6, 3.9, 1.56],
109 |                  rotations=[0, np.pi / 2],
110 |                  dtype=np.float32):
111 |         self._sizes = sizes
112 |         self._anchor_ranges = anchor_ranges
113 |         self._rotations = rotations
114 |         self._dtype = dtype
115 | 
116 |     @property
117 |     def num_anchors_per_localization(self):
118 |         num_rot = len(self._rotations)
119 |         num_size = np.array(self._sizes).reshape([-1, 3]).shape[0]
120 |         return num_rot * num_size
121 | 
122 |     def __call__(self, feature_map_size):
123 |         return create_anchors_3d_range(
124 |             feature_map_size, self._anchor_ranges, self._sizes,
125 |             self._rotations, self._dtype)
126 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/anchor_generator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class AnchorGenerator(object):
 5 | 
 6 |     def __init__(self, base_size, scales, ratios, scale_major=True):
 7 |         self.base_size = base_size
 8 |         self.scales = torch.Tensor(scales)
 9 |         self.ratios = torch.Tensor(ratios)
10 |         self.scale_major = scale_major
11 |         self.base_anchors = self.gen_base_anchors()
12 | 
13 |     @property
14 |     def num_base_anchors(self):
15 |         return self.base_anchors.size(0)
16 | 
17 |     def gen_base_anchors(self):
18 |         base_anchor = torch.Tensor(
19 |             [0, 0, self.base_size - 1, self.base_size - 1])
20 | 
21 |         w = base_anchor[2] - base_anchor[0] + 1
22 |         h = base_anchor[3] - base_anchor[1] + 1
23 |         x_ctr = base_anchor[0] + 0.5 * (w - 1)
24 |         y_ctr = base_anchor[1] + 0.5 * (h - 1)
25 | 
26 |         h_ratios = torch.sqrt(self.ratios)
27 |         w_ratios = 1 / h_ratios
28 |         if self.scale_major:
29 |             ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
30 |             hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
31 |         else:
32 |             ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
33 |             hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)
34 | 
35 |         base_anchors = torch.stack(
36 |             [
37 |                 x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
38 |                 x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
39 |             ],
40 |             dim=-1).round()
41 | 
42 |         return base_anchors
43 | 
44 |     def _meshgrid(self, x, y, row_major=True):
45 |         xx = x.repeat(len(y))
46 |         yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
47 |         if row_major:
48 |             return xx, yy
49 |         else:
50 |             return yy, xx
51 | 
52 |     def grid_anchors(self, featmap_size, stride=16, device='cuda'):
53 |         base_anchors = self.base_anchors.to(device)
54 | 
55 |         feat_h, feat_w = featmap_size
56 |         shift_x = torch.arange(0, feat_w, device=device) * stride
57 |         shift_y = torch.arange(0, feat_h, device=device) * stride
58 |         shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
59 |         shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
60 |         shifts = shifts.type_as(base_anchors)
61 |         # first feat_w elements correspond to the first row of shifts
62 |         # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
63 |         # shifted anchors (K, A, 4), reshape to (K*A, 4)
64 | 
65 |         all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
66 |         all_anchors = all_anchors.view(-1, 4)
67 |         # first A rows correspond to A anchors of (0, 0) in feature map,
68 |         # then (0, 1), (0, 2), ...
69 |         return all_anchors
70 | 
71 |     def valid_flags(self, featmap_size, valid_size, device='cuda'):
72 |         feat_h, feat_w = featmap_size
73 |         valid_h, valid_w = valid_size
74 |         assert valid_h <= feat_h and valid_w <= feat_w
75 |         valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
76 |         valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
77 |         valid_x[:valid_w] = 1
78 |         valid_y[:valid_h] = 1
79 |         valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
80 |         valid = valid_xx & valid_yy
81 |         valid = valid[:, None].expand(
82 |             valid.size(0), self.num_base_anchors).contiguous().view(-1)
83 |         return valid
84 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/anchor_target.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from ..bbox import assign_and_sample, BBoxAssigner, SamplingResult, bbox2delta, rbbox3d2delta
  3 | 
  4 | def anchor_target(flat_anchors,
  5 |                   inside_flags,
  6 |                   gt_bboxes,
  7 |                   gt_labels,
  8 |                   target_means,
  9 |                   target_stds,
 10 |                   cfg,
 11 |                   cls_out_channels=1,
 12 |                   sampling=True):
 13 | 
 14 |     # assign gt and sample anchors
 15 | 
 16 |     anchors = flat_anchors[inside_flags]
 17 | 
 18 |     if sampling:
 19 |         assign_result, sampling_result = assign_and_sample(
 20 |             anchors, gt_bboxes, None, None, cfg)
 21 |     else:
 22 |         bbox_assigner = BBoxAssigner(**cfg.assigner)
 23 |         assign_result = bbox_assigner.assign(anchors, gt_bboxes, None, gt_labels)
 24 |         pos_inds = torch.nonzero(
 25 |             assign_result.gt_inds > 0).squeeze(-1).unique()
 26 |         neg_inds = torch.nonzero(
 27 |             assign_result.gt_inds == 0).squeeze(-1).unique()
 28 |         gt_flags = anchors.new_zeros(anchors.shape[0], dtype=torch.uint8)
 29 |         sampling_result = SamplingResult(pos_inds, neg_inds, anchors,
 30 |                                          gt_bboxes, assign_result, gt_flags)
 31 | 
 32 |     num_valid_anchors = anchors.shape[0]
 33 |     bbox_targets = torch.zeros_like(anchors)
 34 |     bbox_weights = torch.zeros_like(anchors)
 35 |     labels = anchors.new_zeros(num_valid_anchors, dtype=torch.long)
 36 |     label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
 37 |     dir_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)
 38 | 
 39 |     pos_inds = sampling_result.pos_inds
 40 |     neg_inds = sampling_result.neg_inds
 41 |     if len(pos_inds) > 0:
 42 |         pos_bbox_targets = rbbox3d2delta(sampling_result.pos_bboxes,
 43 |                                       sampling_result.pos_gt_bboxes,
 44 |                                       target_means, target_stds)
 45 |         bbox_targets[pos_inds, :] = pos_bbox_targets
 46 |         bbox_weights[pos_inds, :] = 1.0
 47 |         dir_weights[pos_inds] = 1.
 48 |         if gt_labels is None:
 49 |             labels[pos_inds] = 1
 50 |         else:
 51 |             labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]
 52 |         if cfg.pos_weight <= 0:
 53 |             label_weights[pos_inds] = 1.0
 54 |         else:
 55 |             label_weights[pos_inds] = cfg.pos_weight
 56 |     if len(neg_inds) > 0:
 57 |         label_weights[neg_inds] = 1.0
 58 | 
 59 |     # map up to original set of anchors
 60 |     num_total_anchors = flat_anchors.shape[0]
 61 |     labels = unmap(labels, num_total_anchors, inside_flags)
 62 |     label_weights = unmap(label_weights, num_total_anchors, inside_flags)
 63 |     if cls_out_channels > 1:
 64 |         labels, label_weights = expand_binary_labels(labels, label_weights,
 65 |                                                      cls_out_channels)
 66 |     bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
 67 |     bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)
 68 | 
 69 |     dir_labels = get_direction_target(flat_anchors, bbox_targets)
 70 |     dir_weights = unmap(dir_weights, num_total_anchors, inside_flags)
 71 | 
 72 |     return (labels, label_weights, bbox_targets, bbox_weights, dir_labels, dir_weights, pos_inds,
 73 |             neg_inds)
 74 | 
 75 | 
 76 | def expand_binary_labels(labels, label_weights, cls_out_channels):
 77 |     bin_labels = labels.new_full(
 78 |         (labels.size(0), cls_out_channels), 0, dtype=torch.float32)
 79 |     inds = torch.nonzero(labels >= 1).squeeze()
 80 |     if inds.numel() > 0:
 81 |         bin_labels[inds, labels[inds] - 1] = 1
 82 |     bin_label_weights = label_weights.view(-1, 1).expand(
 83 |         label_weights.size(0), cls_out_channels)
 84 |     return bin_labels, bin_label_weights
 85 | 
 86 | def get_direction_target(anchors, reg_targets):
 87 |     anchors = anchors.view(-1, 7)
 88 |     rot_gt = reg_targets[:, -1] + anchors[:, -1]
 89 |     dir_cls_targets = (rot_gt > 0).long()
 90 |     return dir_cls_targets
 91 | 
 92 | def anchor_inside_flags(flat_anchors, valid_flags, img_shape,
 93 |                         allowed_border=0):
 94 |     img_h, img_w = img_shape[:2]
 95 |     if allowed_border >= 0:
 96 |         inside_flags = valid_flags & \
 97 |             (flat_anchors[:, 0] >= -allowed_border) & \
 98 |             (flat_anchors[:, 1] >= -allowed_border) & \
 99 |             (flat_anchors[:, 2] < img_w + allowed_border) & \
100 |             (flat_anchors[:, 3] < img_h + allowed_border)
101 |     else:
102 |         inside_flags = valid_flags
103 |     return inside_flags
104 | 
105 | 
106 | def unmap(data, count, inds, fill=0):
107 |     """ Unmap a subset of item (data) back to the original set of items (of
108 |     size count) """
109 |     if data.dim() == 1:
110 |         ret = data.new_full((count, ), fill)
111 |         ret[inds] = data
112 |     else:
113 |         new_size = (count, ) + data.size()[1:]
114 |         ret = data.new_full(new_size, fill)
115 |         ret[inds, :] = data
116 |     return ret
117 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | from .geometry import bbox_overlaps
 2 | from .assignment import BBoxAssigner, AssignResult
 3 | from .sampling import (BBoxSampler, SamplingResult, assign_and_sample,
 4 |                        random_choice)
 5 | from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping, rbbox3d2delta, delta2rbbox3d, add_sin_difference,
 6 |                          bbox_mapping_back, bbox2roi, roi2bbox, bbox2result, rbbox2roi, kitti_bbox2results, tensor2points)
 7 | from .bbox_target import bbox_target
 8 | 
 9 | __all__ = [
10 |     'bbox_overlaps', 'BBoxAssigner', 'AssignResult', 'BBoxSampler',
11 |     'SamplingResult', 'assign_and_sample', 'random_choice', 'bbox2delta',
12 |     'delta2bbox', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi',
13 |     'roi2bbox', 'bbox2result', 'bbox_target','rbbox3d2delta','delta2rbbox3d',
14 |     'rbbox2roi', 'kitti_bbox2results','add_sin_difference','tensor2points',
15 | ]
16 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/bbox_target.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .transforms import bbox2delta, rbbox3d2delta
 4 | from ..utils import multi_apply
 5 | 
 6 | 
 7 | def bbox_target(pos_bboxes_list,
 8 |                 neg_bboxes_list,
 9 |                 pos_gt_bboxes_list,
10 |                 pos_gt_labels_list,
11 |                 cfg,
12 |                 reg_classes=1,
13 |                 target_means=[.0, .0, .0, .0],
14 |                 target_stds=[1.0, 1.0, 1.0, 1.0],
15 |                 concat=True):
16 |     labels, label_weights, bbox_targets, bbox_weights = multi_apply(
17 |         bbox_target_single,
18 |         pos_bboxes_list,
19 |         neg_bboxes_list,
20 |         pos_gt_bboxes_list,
21 |         pos_gt_labels_list,
22 |         cfg=cfg,
23 |         reg_classes=reg_classes,
24 |         target_means=target_means,
25 |         target_stds=target_stds)
26 | 
27 |     if concat:
28 |         labels = torch.cat(labels, 0)
29 |         label_weights = torch.cat(label_weights, 0)
30 |         bbox_targets = torch.cat(bbox_targets, 0)
31 |         bbox_weights = torch.cat(bbox_weights, 0)
32 |     return labels, label_weights, bbox_targets, bbox_weights
33 | 
34 | 
35 | def bbox_target_single(pos_bboxes,
36 |                        neg_bboxes,
37 |                        pos_gt_bboxes,
38 |                        pos_gt_labels,
39 |                        cfg,
40 |                        reg_classes=1,
41 |                        target_means=[.0, .0, .0, .0],
42 |                        target_stds=[1.0, 1.0, 1.0, 1.0]):
43 |     num_pos = pos_bboxes.size(0)
44 |     num_neg = neg_bboxes.size(0)
45 |     num_samples = num_pos + num_neg
46 |     labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long)
47 |     label_weights = pos_bboxes.new_zeros(num_samples)
48 |     bbox_targets = pos_bboxes.new_zeros(num_samples, 7)
49 |     bbox_weights = pos_bboxes.new_zeros(num_samples, 7)
50 |     if num_pos > 0:
51 |         labels[:num_pos] = pos_gt_labels
52 |         pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
53 |         label_weights[:num_pos] = pos_weight
54 | 
55 |         pos_bbox_targets = rbbox3d2delta(pos_bboxes, pos_gt_bboxes, target_means,
56 |                                       target_stds)
57 |         bbox_targets[:num_pos, :] = pos_bbox_targets
58 |         bbox_weights[:num_pos, :] = 1
59 |     if num_neg > 0:
60 |         label_weights[-num_neg:] = 1.0
61 |     if reg_classes > 1:
62 |         bbox_targets, bbox_weights = expand_target(bbox_targets, bbox_weights,
63 |                                                    labels, reg_classes)
64 | 
65 |     return labels, label_weights, bbox_targets, bbox_weights
66 | 
67 | 
68 | def expand_target(bbox_targets, bbox_weights, labels, num_classes):
69 |     bbox_targets_expand = bbox_targets.new_zeros((bbox_targets.size(0),
70 |                                                   7 * num_classes))
71 |     bbox_weights_expand = bbox_weights.new_zeros((bbox_weights.size(0),
72 |                                                   7 * num_classes))
73 |     for i in torch.nonzero(labels > 0).squeeze(-1):
74 |         start, end = labels[i] * 7, (labels[i] + 1) * 7
75 |         bbox_targets_expand[i, start:end] = bbox_targets[i, :]
76 |         bbox_weights_expand[i, start:end] = bbox_weights[i, :]
77 |     return bbox_targets_expand, bbox_weights_expand
78 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/geometry.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
 5 |     """Calculate overlap between two set of bboxes.
 6 | 
 7 |     If ``is_aligned`` is ``False``, then calculate the ious between each bbox
 8 |     of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
 9 |     bboxes1 and bboxes2.
10 | 
11 |     Args:
12 |         bboxes1 (Tensor): shape (m, 4)
13 |         bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n
14 |             must be equal.
15 |         mode (str): "iou" (intersection over union) or iof (intersection over
16 |             foreground).
17 | 
18 |     Returns:
19 |         ious(Tensor): shape (n, k) if is_aligned == False else shape (n, 1)
20 |     """
21 | 
22 |     assert mode in ['iou', 'iof']
23 | 
24 |     rows = bboxes1.size(0)
25 |     cols = bboxes2.size(0)
26 |     if is_aligned:
27 |         assert rows == cols
28 | 
29 |     if rows * cols == 0:
30 |         return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
31 | 
32 |     if is_aligned:
33 |         lt = torch.max(bboxes1[:, :2], bboxes2[:, :2])  # [rows, 2]
34 |         rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:])  # [rows, 2]
35 | 
36 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, 2]
37 |         overlap = wh[:, 0] * wh[:, 1]
38 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
39 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
40 | 
41 |         if mode == 'iou':
42 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
43 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
44 |             ious = overlap / (area1 + area2 - overlap)
45 |         else:
46 |             ious = overlap / area1
47 |     else:
48 |         lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2])  # [rows, cols, 2]
49 |         rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:])  # [rows, cols, 2]
50 | 
51 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, cols, 2]
52 |         overlap = wh[:, :, 0] * wh[:, :, 1]
53 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
54 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
55 | 
56 |         if mode == 'iou':
57 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
58 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
59 |             ious = overlap / (area1[:, None] + area2 - overlap)
60 |         else:
61 |             ious = overlap / (area1[:, None])
62 | 
63 |     return ious
64 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox3d/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skyhehe123/SA-SSD/2d75c973af65453186bd9242d7fa5e62dc44ec03/mmdet/core/bbox3d/__init__.py


--------------------------------------------------------------------------------
/mmdet/core/bbox3d/bbox3d_target.py:
--------------------------------------------------------------------------------
 1 | from mmdet.core.bbox3d.target_ops import create_target_np
 2 | from mmdet.core.bbox3d import region_similarity as regionSimilarity
 3 | from mmdet.core.bbox3d import box_coders as boxCoders
 4 | 
 5 | class TargetEncoder:
 6 |     def __init__(self,
 7 |                  box_coders,
 8 |                  region_similarity):
 9 | 
10 |         self._similarity_fn = getattr(regionSimilarity, region_similarity)()
11 |         self._box_coder = getattr(boxCoders, box_coders)()
12 | 
13 |     @property
14 |     def box_coder(self):
15 |         return self._box_coder
16 | 
17 |     def assign(self,
18 |                anchors,
19 |                gt_boxes,
20 |                anchors_mask=None,
21 |                gt_classes=None,
22 |                pos_iou_thr=0.6,
23 |                neg_iou_thr=0.45,
24 |                positive_fraction=None,
25 |                sample_size=512,
26 |                ):
27 | 
28 |         return create_target_np(
29 |             anchors,
30 |             gt_boxes,
31 |             anchors_mask,
32 |             gt_classes,
33 |             similarity_fn=self._similarity_fn,
34 |             box_encoding_fn = self._box_coder.encode,
35 |             matched_threshold=pos_iou_thr,
36 |             unmatched_threshold=neg_iou_thr,
37 |             positive_fraction=positive_fraction,
38 |             rpn_batch_size=sample_size,
39 |             norm_by_num_examples=False,
40 |             box_code_size=self.box_coder.code_size)
41 | 
42 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox3d/region_similarity.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Region Similarity Calculators for BoxLists.
 17 | 
 18 | Region Similarity Calculators compare a pairwise measure of similarity
 19 | between the boxes in two BoxLists.
 20 | """
 21 | 
 22 | from mmdet.core.bbox3d.geometry import rbbox2d_to_near_bbox, iou_jit, distance_similarity
 23 | from mmdet.core.post_processing.rotate_nms_gpu import rotate_iou_gpu, rotate_iou_gpu_eval
 24 | import numba
 25 | 
 26 | @numba.jit(nopython=True, parallel=True)
 27 | def d3_box_overlap_kernel(boxes, qboxes, rinc, criterion=-1):
 28 |     N, K = boxes.shape[0], qboxes.shape[0]
 29 |     for i in range(N):
 30 |         for j in range(K):
 31 |             if rinc[i, j] > 0:
 32 |                 iw = (min(boxes[i, 2], qboxes[j, 2]) - max(
 33 |                     boxes[i, 2] - boxes[i, 5], qboxes[j, 2] - qboxes[j, 5]))
 34 | 
 35 |                 if iw > 0:
 36 |                     area1 = boxes[i, 3] * boxes[i, 4] * boxes[i, 5]
 37 |                     area2 = qboxes[j, 3] * qboxes[j, 4] * qboxes[j, 5]
 38 |                     inc = iw * rinc[i, j]
 39 |                     if criterion == -1:
 40 |                         ua = (area1 + area2 - inc)
 41 |                     elif criterion == 0:
 42 |                         ua = area1
 43 |                     elif criterion == 1:
 44 |                         ua = area2
 45 |                     else:
 46 |                         ua = 1.0
 47 |                     rinc[i, j] = inc / ua
 48 |                 else:
 49 |                     rinc[i, j] = 0.0
 50 | 
 51 | class RotateIou2dSimilarity(object):
 52 |     """Class to compute similarity based on Intersection over Union (IOU) metric.
 53 | 
 54 |     This class computes pairwise similarity between two BoxLists based on IOU.
 55 |     """
 56 |     def __call__(self, boxes1, boxes2):
 57 |         boxes1_rbv = boxes1[:, [0, 1, 3, 4, 6]]
 58 |         boxes2_rbv = boxes2[:, [0, 1, 3, 4, 6]]
 59 |         return rotate_iou_gpu(boxes1_rbv, boxes2_rbv)
 60 | 
 61 | class RotateIou3dSimilarity(object):
 62 |     """Class to compute similarity based on Intersection over Union (IOU) metric.
 63 | 
 64 |     This class computes pairwise similarity between two BoxLists based on IOU.
 65 |     """
 66 |     def __call__(self, boxes1, boxes2):
 67 |         boxes1_rbv = boxes1[:, [0, 1, 3, 4, 6]]
 68 |         boxes2_rbv = boxes2[:, [0, 1, 3, 4, 6]]
 69 |         rinc = rotate_iou_gpu_eval(boxes1_rbv, boxes2_rbv, criterion=2)
 70 |         d3_box_overlap_kernel(boxes1, boxes2, rinc)
 71 |         return rinc
 72 | 
 73 | class NearestIouSimilarity(object):
 74 |     """Class to compute similarity based on the squared distance metric.
 75 | 
 76 |     This class computes pairwise similarity between two BoxLists based on the
 77 |     negative squared distance metric.
 78 |     """
 79 | 
 80 |     def __call__(self, boxes1, boxes2):
 81 |         """Compute matrix of (negated) sq distances.
 82 | 
 83 |         Args:
 84 |           boxlist1: BoxList holding N boxes.
 85 |           boxlist2: BoxList holding M boxes.
 86 | 
 87 |         Returns:
 88 |           A tensor with shape [N, M] representing negated pairwise squared distance.
 89 |         """
 90 |         boxes1_rbv = boxes1[:, [0, 1, 3, 4, 6]]
 91 |         boxes2_rbv = boxes2[:, [0, 1, 3, 4, 6]]
 92 |         boxes1_bv = rbbox2d_to_near_bbox(boxes1_rbv)
 93 |         boxes2_bv = rbbox2d_to_near_bbox(boxes2_rbv)
 94 |         ret = iou_jit(boxes1_bv, boxes2_bv, eps=0.0)
 95 |         return ret
 96 | 
 97 | 
 98 | class DistanceSimilarity(object):
 99 |     """Class to compute similarity based on Intersection over Area (IOA) metric.
100 | 
101 |     This class computes pairwise similarity between two BoxLists based on their
102 |     pairwise intersections divided by the areas of second BoxLists.
103 |     """
104 | 
105 |     def __init__(self, distance_norm, with_rotation=False, rotation_alpha=0.5):
106 |         self._distance_norm = distance_norm
107 |         self._with_rotation = with_rotation
108 |         self._rotation_alpha = rotation_alpha
109 | 
110 |     def __call__(self, boxes1, boxes2):
111 |         """Compute matrix of (negated) sq distances.
112 | 
113 |         Args:
114 |           boxlist1: BoxList holding N boxes.
115 |           boxlist2: BoxList holding M boxes.
116 | 
117 |         Returns:
118 |           A tensor with shape [N, M] representing negated pairwise squared distance.
119 |         """
120 |         boxes1_rbv = boxes1[:, [0, 1, 3, 4, 6]]
121 |         boxes2_rbv = boxes2[:, [0, 1, 3, 4, 6]]
122 |         return distance_similarity(
123 |             boxes1_rbv[..., [0, 1, -1]],
124 |             boxes2_rbv[..., [0, 1, -1]],
125 |             dist_norm=self._distance_norm,
126 |             with_rotation=self._with_rotation,
127 |             rot_alpha=self._rotation_alpha)
128 | 
129 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .class_names import (voc_classes, imagenet_det_classes,
 2 |                           imagenet_vid_classes, coco_classes, dataset_aliases,
 3 |                           get_classes)
 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json
 5 | from .eval_hooks import (DistEvalHook, CocoDistEvalRecallHook,
 6 |                          CocoDistEvalmAPHook,KittiEvalmAPHook, DistEvalmAPHook)
 7 | from .mean_ap import average_precision, eval_map, print_map_summary
 8 | from .recall import (eval_recalls, print_recall_summary, plot_num_recall,
 9 |                      plot_iou_recall)
10 | 
11 | __all__ = [
12 |     'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
13 |     'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
14 |     'fast_eval_recall', 'results2json', 'DistEvalHook',
15 |     'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
16 |     'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
17 |     'plot_num_recall', 'plot_iou_recall','KittiEvalmAPHook','DistEvalmAPHook'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
 5 |     """Calculate the ious between each bbox of bboxes1 and bboxes2.
 6 | 
 7 |     Args:
 8 |         bboxes1(ndarray): shape (n, 4)
 9 |         bboxes2(ndarray): shape (k, 4)
10 |         mode(str): iou (intersection over union) or iof (intersection
11 |             over foreground)
12 | 
13 |     Returns:
14 |         ious(ndarray): shape (n, k)
15 |     """
16 | 
17 |     assert mode in ['iou', 'iof']
18 | 
19 |     bboxes1 = bboxes1.astype(np.float32)
20 |     bboxes2 = bboxes2.astype(np.float32)
21 |     rows = bboxes1.shape[0]
22 |     cols = bboxes2.shape[0]
23 |     ious = np.zeros((rows, cols), dtype=np.float32)
24 |     if rows * cols == 0:
25 |         return ious
26 |     exchange = False
27 |     if bboxes1.shape[0] > bboxes2.shape[0]:
28 |         bboxes1, bboxes2 = bboxes2, bboxes1
29 |         ious = np.zeros((cols, rows), dtype=np.float32)
30 |         exchange = True
31 |     area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
32 |         bboxes1[:, 3] - bboxes1[:, 1] + 1)
33 |     area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
34 |         bboxes2[:, 3] - bboxes2[:, 1] + 1)
35 |     for i in range(bboxes1.shape[0]):
36 |         x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
37 |         y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
38 |         x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
39 |         y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
40 |         overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
41 |             y_end - y_start + 1, 0)
42 |         if mode == 'iou':
43 |             union = area1[i] + area2 - overlap
44 |         else:
45 |             union = area1[i] if not exchange else area2
46 |         ious[i, :] = overlap / union
47 |     if exchange:
48 |         ious = ious.T
49 |     return ious
50 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/class_names.py:
--------------------------------------------------------------------------------
  1 | import mmcv
  2 | 
  3 | 
  4 | def voc_classes():
  5 |     return [
  6 |         'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
  7 |         'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
  8 |         'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
  9 |     ]
 10 | 
 11 | 
 12 | def imagenet_det_classes():
 13 |     return [
 14 |         'accordion', 'airplane', 'ant', 'antelope', 'apple', 'armadillo',
 15 |         'artichoke', 'axe', 'baby_bed', 'backpack', 'bagel', 'balance_beam',
 16 |         'banana', 'band_aid', 'banjo', 'baseball', 'basketball', 'bathing_cap',
 17 |         'beaker', 'bear', 'bee', 'bell_pepper', 'bench', 'bicycle', 'binder',
 18 |         'bird', 'bookshelf', 'bow_tie', 'bow', 'bowl', 'brassiere', 'burrito',
 19 |         'bus', 'butterfly', 'camel', 'can_opener', 'car', 'cart', 'cattle',
 20 |         'cello', 'centipede', 'chain_saw', 'chair', 'chime', 'cocktail_shaker',
 21 |         'coffee_maker', 'computer_keyboard', 'computer_mouse', 'corkscrew',
 22 |         'cream', 'croquet_ball', 'crutch', 'cucumber', 'cup_or_mug', 'diaper',
 23 |         'digital_clock', 'dishwasher', 'dog', 'domestic_cat', 'dragonfly',
 24 |         'drum', 'dumbbell', 'electric_fan', 'elephant', 'face_powder', 'fig',
 25 |         'filing_cabinet', 'flower_pot', 'flute', 'fox', 'french_horn', 'frog',
 26 |         'frying_pan', 'giant_panda', 'goldfish', 'golf_ball', 'golfcart',
 27 |         'guacamole', 'guitar', 'hair_dryer', 'hair_spray', 'hamburger',
 28 |         'hammer', 'hamster', 'harmonica', 'harp', 'hat_with_a_wide_brim',
 29 |         'head_cabbage', 'helmet', 'hippopotamus', 'horizontal_bar', 'horse',
 30 |         'hotdog', 'iPod', 'isopod', 'jellyfish', 'koala_bear', 'ladle',
 31 |         'ladybug', 'lamp', 'laptop', 'lemon', 'lion', 'lipstick', 'lizard',
 32 |         'lobster', 'maillot', 'maraca', 'microphone', 'microwave', 'milk_can',
 33 |         'miniskirt', 'monkey', 'motorcycle', 'mushroom', 'nail', 'neck_brace',
 34 |         'oboe', 'orange', 'otter', 'pencil_box', 'pencil_sharpener', 'perfume',
 35 |         'person', 'piano', 'pineapple', 'ping-pong_ball', 'pitcher', 'pizza',
 36 |         'plastic_bag', 'plate_rack', 'pomegranate', 'popsicle', 'porcupine',
 37 |         'power_drill', 'pretzel', 'printer', 'puck', 'punching_bag', 'purse',
 38 |         'rabbit', 'racket', 'ray', 'red_panda', 'refrigerator',
 39 |         'remote_control', 'rubber_eraser', 'rugby_ball', 'ruler',
 40 |         'salt_or_pepper_shaker', 'saxophone', 'scorpion', 'screwdriver',
 41 |         'seal', 'sheep', 'ski', 'skunk', 'snail', 'snake', 'snowmobile',
 42 |         'snowplow', 'soap_dispenser', 'soccer_ball', 'sofa', 'spatula',
 43 |         'squirrel', 'starfish', 'stethoscope', 'stove', 'strainer',
 44 |         'strawberry', 'stretcher', 'sunglasses', 'swimming_trunks', 'swine',
 45 |         'syringe', 'table', 'tape_player', 'tennis_ball', 'tick', 'tie',
 46 |         'tiger', 'toaster', 'traffic_light', 'train', 'trombone', 'trumpet',
 47 |         'turtle', 'tv_or_monitor', 'unicycle', 'vacuum', 'violin',
 48 |         'volleyball', 'waffle_iron', 'washer', 'water_bottle', 'watercraft',
 49 |         'whale', 'wine_bottle', 'zebra'
 50 |     ]
 51 | 
 52 | 
 53 | def imagenet_vid_classes():
 54 |     return [
 55 |         'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
 56 |         'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
 57 |         'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
 58 |         'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
 59 |         'watercraft', 'whale', 'zebra'
 60 |     ]
 61 | 
 62 | 
 63 | def coco_classes():
 64 |     return [
 65 |         'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
 66 |         'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
 67 |         'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
 68 |         'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
 69 |         'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
 70 |         'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
 71 |         'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
 72 |         'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
 73 |         'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
 74 |         'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
 75 |         'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
 76 |         'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
 77 |         'scissors', 'teddy bear', 'hair drier', 'toothbrush'
 78 |     ]
 79 | 
 80 | def kitti_classes():
 81 |     return [
 82 |         'car', 'pedestrians','cyclists'
 83 |     ]
 84 | 
 85 | dataset_aliases = {
 86 |     'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'],
 87 |     'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'],
 88 |     'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'],
 89 |     'coco': ['coco', 'mscoco', 'ms_coco'],
 90 |     'kitti': ['kitti']
 91 | }
 92 | 
 93 | 
 94 | def get_classes(dataset):
 95 |     """Get class names of a dataset."""
 96 |     alias2name = {}
 97 |     for name, aliases in dataset_aliases.items():
 98 |         for alias in aliases:
 99 |             alias2name[alias] = name
100 | 
101 |     if mmcv.is_str(dataset):
102 |         if dataset in alias2name:
103 |             labels = eval(alias2name[dataset] + '_classes()')
104 |         else:
105 |             raise ValueError('Unrecognized dataset: {}'.format(dataset))
106 |     else:
107 |         raise TypeError('dataset must a str, but got {}'.format(type(dataset)))
108 |     return labels
109 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/coco_utils.py:
--------------------------------------------------------------------------------
  1 | import mmcv
  2 | import numpy as np
  3 | from pycocotools.coco import COCO
  4 | from pycocotools.cocoeval import COCOeval
  5 | 
  6 | from .recall import eval_recalls
  7 | 
  8 | 
  9 | def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)):
 10 |     for res_type in result_types:
 11 |         assert res_type in [
 12 |             'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
 13 |         ]
 14 | 
 15 |     if mmcv.is_str(coco):
 16 |         coco = COCO(coco)
 17 |     assert isinstance(coco, COCO)
 18 | 
 19 |     if result_types == ['proposal_fast']:
 20 |         ar = fast_eval_recall(result_file, coco, np.array(max_dets))
 21 |         for i, num in enumerate(max_dets):
 22 |             print('AR@{}\t= {:.4f}'.format(num, ar[i]))
 23 |         return
 24 | 
 25 |     assert result_file.endswith('.json')
 26 |     coco_dets = coco.loadRes(result_file)
 27 | 
 28 |     img_ids = coco.getImgIds()
 29 |     for res_type in result_types:
 30 |         iou_type = 'bbox' if res_type == 'proposal' else res_type
 31 |         cocoEval = COCOeval(coco, coco_dets, iou_type)
 32 |         cocoEval.params.imgIds = img_ids
 33 |         if res_type == 'proposal':
 34 |             cocoEval.params.useCats = 0
 35 |             cocoEval.params.maxDets = list(max_dets)
 36 |         cocoEval.evaluate()
 37 |         cocoEval.accumulate()
 38 |         cocoEval.summarize()
 39 | 
 40 | 
 41 | def fast_eval_recall(results,
 42 |                      coco,
 43 |                      max_dets,
 44 |                      iou_thrs=np.arange(0.5, 0.96, 0.05)):
 45 |     if mmcv.is_str(results):
 46 |         assert results.endswith('.pkl')
 47 |         results = mmcv.load(results)
 48 |     elif not isinstance(results, list):
 49 |         raise TypeError(
 50 |             'results must be a list of numpy arrays or a filename, not {}'.
 51 |             format(type(results)))
 52 | 
 53 |     gt_bboxes = []
 54 |     img_ids = coco.getImgIds()
 55 |     for i in range(len(img_ids)):
 56 |         ann_ids = coco.getAnnIds(imgIds=img_ids[i])
 57 |         ann_info = coco.loadAnns(ann_ids)
 58 |         if len(ann_info) == 0:
 59 |             gt_bboxes.append(np.zeros((0, 4)))
 60 |             continue
 61 |         bboxes = []
 62 |         for ann in ann_info:
 63 |             if ann.get('ignore', False) or ann['iscrowd']:
 64 |                 continue
 65 |             x1, y1, w, h = ann['bbox']
 66 |             bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1])
 67 |         bboxes = np.array(bboxes, dtype=np.float32)
 68 |         if bboxes.shape[0] == 0:
 69 |             bboxes = np.zeros((0, 4))
 70 |         gt_bboxes.append(bboxes)
 71 | 
 72 |     recalls = eval_recalls(
 73 |         gt_bboxes, results, max_dets, iou_thrs, print_summary=False)
 74 |     ar = recalls.mean(axis=1)
 75 |     return ar
 76 | 
 77 | 
 78 | def xyxy2xywh(bbox):
 79 |     _bbox = bbox.tolist()
 80 |     return [
 81 |         _bbox[0],
 82 |         _bbox[1],
 83 |         _bbox[2] - _bbox[0] + 1,
 84 |         _bbox[3] - _bbox[1] + 1,
 85 |     ]
 86 | 
 87 | 
 88 | def proposal2json(dataset, results):
 89 |     json_results = []
 90 |     for idx in range(len(dataset)):
 91 |         img_id = dataset.img_ids[idx]
 92 |         bboxes = results[idx]
 93 |         for i in range(bboxes.shape[0]):
 94 |             data = dict()
 95 |             data['image_id'] = img_id
 96 |             data['bbox'] = xyxy2xywh(bboxes[i])
 97 |             data['score'] = float(bboxes[i][4])
 98 |             data['category_id'] = 1
 99 |             json_results.append(data)
100 |     return json_results
101 | 
102 | 
103 | def det2json(dataset, results):
104 |     json_results = []
105 |     for idx in range(len(dataset)):
106 |         img_id = dataset.img_ids[idx]
107 |         result = results[idx]
108 |         for label in range(len(result)):
109 |             bboxes = result[label]
110 |             for i in range(bboxes.shape[0]):
111 |                 data = dict()
112 |                 data['image_id'] = img_id
113 |                 data['bbox'] = xyxy2xywh(bboxes[i])
114 |                 data['score'] = float(bboxes[i][4])
115 |                 data['category_id'] = dataset.cat_ids[label]
116 |                 json_results.append(data)
117 |     return json_results
118 | 
119 | 
120 | def segm2json(dataset, results):
121 |     json_results = []
122 |     for idx in range(len(dataset)):
123 |         img_id = dataset.img_ids[idx]
124 |         det, seg = results[idx]
125 |         for label in range(len(det)):
126 |             bboxes = det[label]
127 |             segms = seg[label]
128 |             for i in range(bboxes.shape[0]):
129 |                 data = dict()
130 |                 data['image_id'] = img_id
131 |                 data['bbox'] = xyxy2xywh(bboxes[i])
132 |                 data['score'] = float(bboxes[i][4])
133 |                 data['category_id'] = dataset.cat_ids[label]
134 |                 segms[i]['counts'] = segms[i]['counts'].decode()
135 |                 data['segmentation'] = segms[i]
136 |                 json_results.append(data)
137 |     return json_results
138 | 
139 | 
140 | def results2json(dataset, results, out_file):
141 |     if isinstance(results[0], list):
142 |         json_results = det2json(dataset, results)
143 |     elif isinstance(results[0], tuple):
144 |         json_results = segm2json(dataset, results)
145 |     elif isinstance(results[0], np.ndarray):
146 |         json_results = proposal2json(dataset, results)
147 |     else:
148 |         raise TypeError('invalid type of results')
149 |     mmcv.dump(json_results, out_file)
150 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/recall.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from terminaltables import AsciiTable
  3 | 
  4 | from .bbox_overlaps import bbox_overlaps
  5 | 
  6 | 
  7 | def _recalls(all_ious, proposal_nums, thrs):
  8 | 
  9 |     img_num = all_ious.shape[0]
 10 |     total_gt_num = sum([ious.shape[0] for ious in all_ious])
 11 | 
 12 |     _ious = np.zeros((proposal_nums.size, total_gt_num), dtype=np.float32)
 13 |     for k, proposal_num in enumerate(proposal_nums):
 14 |         tmp_ious = np.zeros(0)
 15 |         for i in range(img_num):
 16 |             ious = all_ious[i][:, :proposal_num].copy()
 17 |             gt_ious = np.zeros((ious.shape[0]))
 18 |             if ious.size == 0:
 19 |                 tmp_ious = np.hstack((tmp_ious, gt_ious))
 20 |                 continue
 21 |             for j in range(ious.shape[0]):
 22 |                 gt_max_overlaps = ious.argmax(axis=1)
 23 |                 max_ious = ious[np.arange(0, ious.shape[0]), gt_max_overlaps]
 24 |                 gt_idx = max_ious.argmax()
 25 |                 gt_ious[j] = max_ious[gt_idx]
 26 |                 box_idx = gt_max_overlaps[gt_idx]
 27 |                 ious[gt_idx, :] = -1
 28 |                 ious[:, box_idx] = -1
 29 |             tmp_ious = np.hstack((tmp_ious, gt_ious))
 30 |         _ious[k, :] = tmp_ious
 31 | 
 32 |     _ious = np.fliplr(np.sort(_ious, axis=1))
 33 |     recalls = np.zeros((proposal_nums.size, thrs.size))
 34 |     for i, thr in enumerate(thrs):
 35 |         recalls[:, i] = (_ious >= thr).sum(axis=1) / float(total_gt_num)
 36 | 
 37 |     return recalls
 38 | 
 39 | 
 40 | def set_recall_param(proposal_nums, iou_thrs):
 41 |     """Check proposal_nums and iou_thrs and set correct format.
 42 |     """
 43 |     if isinstance(proposal_nums, list):
 44 |         _proposal_nums = np.array(proposal_nums)
 45 |     elif isinstance(proposal_nums, int):
 46 |         _proposal_nums = np.array([proposal_nums])
 47 |     else:
 48 |         _proposal_nums = proposal_nums
 49 | 
 50 |     if iou_thrs is None:
 51 |         _iou_thrs = np.array([0.5])
 52 |     elif isinstance(iou_thrs, list):
 53 |         _iou_thrs = np.array(iou_thrs)
 54 |     elif isinstance(iou_thrs, float):
 55 |         _iou_thrs = np.array([iou_thrs])
 56 |     else:
 57 |         _iou_thrs = iou_thrs
 58 | 
 59 |     return _proposal_nums, _iou_thrs
 60 | 
 61 | 
 62 | def eval_recalls(gts,
 63 |                  proposals,
 64 |                  proposal_nums=None,
 65 |                  iou_thrs=None,
 66 |                  print_summary=True):
 67 |     """Calculate recalls.
 68 | 
 69 |     Args:
 70 |         gts(list or ndarray): a list of arrays of shape (n, 4)
 71 |         proposals(list or ndarray): a list of arrays of shape (k, 4) or (k, 5)
 72 |         proposal_nums(int or list of int or ndarray): top N proposals
 73 |         thrs(float or list or ndarray): iou thresholds
 74 | 
 75 |     Returns:
 76 |         ndarray: recalls of different ious and proposal nums
 77 |     """
 78 | 
 79 |     img_num = len(gts)
 80 |     assert img_num == len(proposals)
 81 | 
 82 |     proposal_nums, iou_thrs = set_recall_param(proposal_nums, iou_thrs)
 83 | 
 84 |     all_ious = []
 85 |     for i in range(img_num):
 86 |         if proposals[i].ndim == 2 and proposals[i].shape[1] == 5:
 87 |             scores = proposals[i][:, 4]
 88 |             sort_idx = np.argsort(scores)[::-1]
 89 |             img_proposal = proposals[i][sort_idx, :]
 90 |         else:
 91 |             img_proposal = proposals[i]
 92 |         prop_num = min(img_proposal.shape[0], proposal_nums[-1])
 93 |         if gts[i] is None or gts[i].shape[0] == 0:
 94 |             ious = np.zeros((0, img_proposal.shape[0]), dtype=np.float32)
 95 |         else:
 96 |             ious = bbox_overlaps(gts[i], img_proposal[:prop_num, :4])
 97 |         all_ious.append(ious)
 98 |     all_ious = np.array(all_ious)
 99 |     recalls = _recalls(all_ious, proposal_nums, iou_thrs)
100 |     if print_summary:
101 |         print_recall_summary(recalls, proposal_nums, iou_thrs)
102 |     return recalls
103 | 
104 | 
105 | def print_recall_summary(recalls,
106 |                          proposal_nums,
107 |                          iou_thrs,
108 |                          row_idxs=None,
109 |                          col_idxs=None):
110 |     """Print recalls in a table.
111 | 
112 |     Args:
113 |         recalls(ndarray): calculated from `bbox_recalls`
114 |         proposal_nums(ndarray or list): top N proposals
115 |         iou_thrs(ndarray or list): iou thresholds
116 |         row_idxs(ndarray): which rows(proposal nums) to print
117 |         col_idxs(ndarray): which cols(iou thresholds) to print
118 |     """
119 |     proposal_nums = np.array(proposal_nums, dtype=np.int32)
120 |     iou_thrs = np.array(iou_thrs)
121 |     if row_idxs is None:
122 |         row_idxs = np.arange(proposal_nums.size)
123 |     if col_idxs is None:
124 |         col_idxs = np.arange(iou_thrs.size)
125 |     row_header = [''] + iou_thrs[col_idxs].tolist()
126 |     table_data = [row_header]
127 |     for i, num in enumerate(proposal_nums[row_idxs]):
128 |         row = [
129 |             '{:.3f}'.format(val)
130 |             for val in recalls[row_idxs[i], col_idxs].tolist()
131 |         ]
132 |         row.insert(0, num)
133 |         table_data.append(row)
134 |     table = AsciiTable(table_data)
135 |     print(table.table)
136 | 
137 | 
138 | def plot_num_recall(recalls, proposal_nums):
139 |     """Plot Proposal_num-Recalls curve.
140 | 
141 |     Args:
142 |         recalls(ndarray or list): shape (k,)
143 |         proposal_nums(ndarray or list): same shape as `recalls`
144 |     """
145 |     if isinstance(proposal_nums, np.ndarray):
146 |         _proposal_nums = proposal_nums.tolist()
147 |     else:
148 |         _proposal_nums = proposal_nums
149 |     if isinstance(recalls, np.ndarray):
150 |         _recalls = recalls.tolist()
151 |     else:
152 |         _recalls = recalls
153 | 
154 |     import matplotlib.pyplot as plt
155 |     f = plt.figure()
156 |     plt.plot([0] + _proposal_nums, [0] + _recalls)
157 |     plt.xlabel('Proposal num')
158 |     plt.ylabel('Recall')
159 |     plt.axis([0, proposal_nums.max(), 0, 1])
160 |     f.show()
161 | 
162 | 
163 | def plot_iou_recall(recalls, iou_thrs):
164 |     """Plot IoU-Recalls curve.
165 | 
166 |     Args:
167 |         recalls(ndarray or list): shape (k,)
168 |         iou_thrs(ndarray or list): same shape as `recalls`
169 |     """
170 |     if isinstance(iou_thrs, np.ndarray):
171 |         _iou_thrs = iou_thrs.tolist()
172 |     else:
173 |         _iou_thrs = iou_thrs
174 |     if isinstance(recalls, np.ndarray):
175 |         _recalls = recalls.tolist()
176 |     else:
177 |         _recalls = recalls
178 | 
179 |     import matplotlib.pyplot as plt
180 |     f = plt.figure()
181 |     plt.plot(_iou_thrs + [1.0], _recalls + [0.])
182 |     plt.xlabel('IoU')
183 |     plt.ylabel('Recall')
184 |     plt.axis([iou_thrs.min(), 1, 0, 1])
185 |     f.show()
186 | 
187 | 
188 | 


--------------------------------------------------------------------------------
/mmdet/core/loss/__init__.py:
--------------------------------------------------------------------------------
 1 | from .losses import (weighted_nll_loss, weighted_cross_entropy,
 2 |                      weighted_binary_cross_entropy, sigmoid_focal_loss,
 3 |                      weighted_sigmoid_focal_loss, mask_cross_entropy,
 4 |                      smooth_l1_loss, weighted_smoothl1, l1_loss, weighted_l1, accuracy)
 5 | 
 6 | __all__ = [
 7 |     'weighted_nll_loss', 'weighted_cross_entropy',
 8 |     'weighted_binary_cross_entropy', 'sigmoid_focal_loss',
 9 |     'weighted_sigmoid_focal_loss', 'mask_cross_entropy', 'smooth_l1_loss',
10 |     'weighted_smoothl1', 'l1_loss', 'weighted_l1', 'accuracy'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet/core/loss/losses.py:
--------------------------------------------------------------------------------
  1 | # TODO merge naive and weighted loss.
  2 | import torch
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | def weighted_nll_loss(pred, label, weight, avg_factor=None):
  7 |     if avg_factor is None:
  8 |         avg_factor = max(torch.sum(weight > 0).float().item(), 1.)
  9 |     raw = F.nll_loss(pred, label, reduction='none')
 10 |     return torch.sum(raw * weight)[None] / avg_factor
 11 | 
 12 | 
 13 | def weighted_cross_entropy(pred, label, weight, avg_factor=None, reduce=True):
 14 |     if avg_factor is None:
 15 |         avg_factor = max(torch.sum(weight > 0).float().item(), 1.)
 16 |     raw = F.cross_entropy(pred, label, reduction='none')
 17 |     if reduce:
 18 |         return torch.sum(raw * weight)[None] / avg_factor
 19 |     else:
 20 |         return raw * weight / avg_factor
 21 | 
 22 | 
 23 | def weighted_binary_cross_entropy(pred, label, weight, avg_factor=None):
 24 |     if avg_factor is None:
 25 |         avg_factor = max(torch.sum(weight > 0).float().item(), 1.)
 26 |     return F.binary_cross_entropy_with_logits(
 27 |         pred, label.float(), weight.float(),
 28 |         reduction='sum')[None] / avg_factor
 29 | 
 30 | 
 31 | def sigmoid_focal_loss(pred,
 32 |                        target,
 33 |                        weight,
 34 |                        gamma=2.0,
 35 |                        alpha=0.25,
 36 |                        reduction='mean'):
 37 |     pred_sigmoid = pred.sigmoid()
 38 |     target = target.type_as(pred)
 39 |     pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
 40 |     weight = (alpha * target + (1 - alpha) * (1 - target)) * weight
 41 |     weight = weight * pt.pow(gamma)
 42 |     loss = F.binary_cross_entropy_with_logits(
 43 |         pred, target, reduction='none') * weight
 44 |     reduction_enum = F._Reduction.get_enum(reduction)
 45 |     # none: 0, mean:1, sum: 2
 46 |     if reduction_enum == 0:
 47 |         return loss
 48 |     elif reduction_enum == 1:
 49 |         return loss.mean()
 50 |     elif reduction_enum == 2:
 51 |         return loss.sum()
 52 | 
 53 | 
 54 | def weighted_sigmoid_focal_loss(pred,
 55 |                                 target,
 56 |                                 weight,
 57 |                                 gamma=2.0,
 58 |                                 alpha=0.25,
 59 |                                 avg_factor=None,
 60 |                                 num_classes=80):
 61 |     if avg_factor is None:
 62 |         avg_factor = torch.sum(weight > 0).float().item() / num_classes + 1e-6
 63 |     return sigmoid_focal_loss(
 64 |         pred, target, weight, gamma=gamma, alpha=alpha,
 65 |         reduction='sum')[None] / avg_factor
 66 | 
 67 | 
 68 | def mask_cross_entropy(pred, target, label):
 69 |     num_rois = pred.size()[0]
 70 |     inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)
 71 |     pred_slice = pred[inds, label].squeeze(1)
 72 |     return F.binary_cross_entropy_with_logits(
 73 |         pred_slice, target, reduction='mean')[None]
 74 | 
 75 | 
 76 | def smooth_l1_loss(pred, target, beta=1.0, reduction='mean'):
 77 |     assert beta > 0
 78 |     assert pred.size() == target.size() and target.numel() > 0
 79 |     diff = torch.abs(pred - target)
 80 |     loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
 81 |                        diff - 0.5 * beta)
 82 |     reduction_enum = F._Reduction.get_enum(reduction)
 83 |     # none: 0, mean:1, sum: 2
 84 |     if reduction_enum == 0:
 85 |         return loss
 86 |     elif reduction_enum == 1:
 87 |         return loss.sum() / pred.numel()
 88 |     elif reduction_enum == 2:
 89 |         return loss.sum()
 90 | 
 91 | 
 92 | def weighted_smoothl1(pred, target, weight, beta=1.0, avg_factor=None):
 93 |     if avg_factor is None:
 94 |         avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6
 95 |     loss = smooth_l1_loss(pred, target, beta, reduction='none')
 96 |     return torch.sum(loss * weight)[None] / avg_factor
 97 | 
 98 | def l1_loss(pred, target, reduction='mean'):
 99 |     assert pred.size() == target.size() and target.numel() > 0
100 |     loss = torch.abs(pred - target)
101 |     reduction_enum = F._Reduction.get_enum(reduction)
102 |     # none: 0, mean:1, sum: 2
103 |     if reduction_enum == 0:
104 |         return loss
105 |     elif reduction_enum == 1:
106 |         return loss.sum() / pred.numel()
107 |     elif reduction_enum == 2:
108 |         return loss.sum()
109 | 
110 | def weighted_l1(pred, target, weight, avg_factor=None):
111 |     if avg_factor is None:
112 |         avg_factor = torch.sum(weight > 0).float().item() / 4 + 1e-6
113 |     loss = l1_loss(pred, target, reduction='none')
114 |     return torch.sum(loss * weight)[None] / avg_factor
115 | 
116 | def accuracy(pred, target, topk=1):
117 |     if isinstance(topk, int):
118 |         topk = (topk, )
119 |         return_single = True
120 |     else:
121 |         return_single = False
122 | 
123 |     maxk = max(topk)
124 |     _, pred_label = pred.topk(maxk, 1, True, True)
125 |     pred_label = pred_label.t()
126 |     correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
127 | 
128 |     res = []
129 |     for k in topk:
130 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
131 |         res.append(correct_k.mul_(100.0 / pred.size(0)))
132 |     return res[0] if return_single else res


--------------------------------------------------------------------------------
/mmdet/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | from .utils import split_combined_polys
2 | from .mask_target import mask_target
3 | 
4 | __all__ = ['split_combined_polys', 'mask_target']
5 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/mask_target.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import mmcv
 4 | 
 5 | 
 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
 7 |                 cfg):
 8 |     cfg_list = [cfg for _ in range(len(pos_proposals_list))]
 9 |     mask_targets = map(mask_target_single, pos_proposals_list,
10 |                        pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
11 |     mask_targets = torch.cat(list(mask_targets))
12 |     return mask_targets
13 | 
14 | 
15 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
16 |     mask_size = cfg.mask_size
17 |     num_pos = pos_proposals.size(0)
18 |     mask_targets = []
19 |     if num_pos > 0:
20 |         proposals_np = pos_proposals.cpu().numpy()
21 |         pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
22 |         for i in range(num_pos):
23 |             gt_mask = gt_masks[pos_assigned_gt_inds[i]]
24 |             bbox = proposals_np[i, :].astype(np.int32)
25 |             x1, y1, x2, y2 = bbox
26 |             w = np.maximum(x2 - x1 + 1, 1)
27 |             h = np.maximum(y2 - y1 + 1, 1)
28 |             # mask is uint8 both before and after resizing
29 |             target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
30 |                                    (mask_size, mask_size))
31 |             mask_targets.append(target)
32 |         mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
33 |             pos_proposals.device)
34 |     else:
35 |         mask_targets = pos_proposals.new_zeros((0, mask_size, mask_size))
36 |     return mask_targets
37 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/utils.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | 
 4 | def split_combined_polys(polys, poly_lens, polys_per_mask):
 5 |     """Split the combined 1-D polys into masks.
 6 | 
 7 |     A mask is represented as a list of polys, and a poly is represented as
 8 |     a 1-D array. In dataset, all masks are concatenated into a single 1-D
 9 |     tensor. Here we need to split the tensor into original representations.
10 | 
11 |     Args:
12 |         polys (list): a list (length = image num) of 1-D tensors
13 |         poly_lens (list): a list (length = image num) of poly length
14 |         polys_per_mask (list): a list (length = image num) of poly number
15 |             of each mask
16 | 
17 |     Returns:
18 |         list: a list (length = image num) of list (length = mask num) of
19 |             list (length = poly num) of numpy array
20 |     """
21 |     mask_polys_list = []
22 |     for img_id in range(len(polys)):
23 |         polys_single = polys[img_id]
24 |         polys_lens_single = poly_lens[img_id].tolist()
25 |         polys_per_mask_single = polys_per_mask[img_id].tolist()
26 | 
27 |         split_polys = mmcv.slice_list(polys_single, polys_lens_single)
28 |         mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
29 |         mask_polys_list.append(mask_polys)
30 |     return mask_polys_list
31 | 


--------------------------------------------------------------------------------
/mmdet/core/point_cloud/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/skyhehe123/SA-SSD/2d75c973af65453186bd9242d7fa5e62dc44ec03/mmdet/core/point_cloud/__init__.py


--------------------------------------------------------------------------------
/mmdet/core/point_cloud/voxel_generator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from mmdet.ops.points_op import points_to_voxel
 3 | 
 4 | class VoxelGenerator:
 5 |     def __init__(self,
 6 |                  voxel_size,
 7 |                  point_cloud_range,
 8 |                  max_num_points,
 9 |                  max_voxels=20000):
10 |         point_cloud_range = np.array(point_cloud_range, dtype=np.float32)
11 |         # [0, -40, -3, 70.4, 40, 1]
12 |         voxel_size = np.array(voxel_size, dtype=np.float32)
13 |         grid_size = (
14 |             point_cloud_range[3:] - point_cloud_range[:3]) / voxel_size
15 |         grid_size = np.round(grid_size).astype(np.int64)
16 |         self._voxel_size = voxel_size
17 |         self._point_cloud_range = point_cloud_range
18 |         self._max_num_points = max_num_points
19 |         self._max_voxels = max_voxels
20 |         self._grid_size = grid_size
21 | 
22 |     def generate(self, points):
23 |         return points_to_voxel(
24 |             points, self._voxel_size, self._point_cloud_range,
25 |             self._max_num_points, True, self._max_voxels)
26 | 
27 |     @property
28 |     def voxel_size(self):
29 |         return self._voxel_size
30 | 
31 |     @property
32 |     def max_num_points_per_voxel(self):
33 |         return self._max_num_points
34 | 
35 |     @property
36 |     def point_cloud_range(self):
37 |         return self._point_cloud_range
38 | 
39 |     @property
40 |     def grid_size(self):
41 |         return self._grid_size
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
1 | from .merge_augs import (merge_aug_proposals, merge_aug_bboxes,
2 |                          merge_aug_scores, merge_aug_masks)
3 | from .rotate_nms_gpu import rotate_nms_gpu
4 | __all__ = [
5 |     'merge_aug_proposals', 'merge_aug_bboxes',
6 |     'merge_aug_scores', 'merge_aug_masks','rotate_nms_gpu'
7 | ]
8 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/bbox_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from mmdet.ops.iou3d.iou3d_utils import nms_gpu
 3 | 
 4 | def rotate_nms_torch(rbboxes,
 5 |                      scores,
 6 |                      pre_max_size=None,
 7 |                      post_max_size=None,
 8 |                      iou_threshold=0.5):
 9 |     if pre_max_size is not None:
10 |         num_keeped_scores = scores.shape[0]
11 |         pre_max_size = min(num_keeped_scores, pre_max_size)
12 |         scores, indices = torch.topk(scores, k=pre_max_size)
13 |         rbboxes = rbboxes[indices]
14 | 
15 |     if len(rbboxes) == 0:
16 |         keep = torch.empty((0,), dtype=torch.int64)
17 |     else:
18 |         ret = nms_gpu(rbboxes, scores, iou_threshold)
19 |         keep = ret[:post_max_size]
20 | 
21 |     if keep.shape[0] == 0:
22 |         return None
23 | 
24 |     if pre_max_size is not None:
25 |         return indices[keep]
26 |     else:
27 |         return keep


--------------------------------------------------------------------------------
/mmdet/core/post_processing/merge_augs.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import numpy as np
 4 | from mmdet.core.bbox.transforms import bbox_mapping_back
 5 | 
 6 | 
 7 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
 8 |     """Merge augmented proposals (multiscale, flip, etc.)
 9 | 
10 |     Args:
11 |         aug_proposals (list[Tensor]): proposals from different testing
12 |             schemes, shape (n, 5). Note that they are not rescaled to the
13 |             original image size.
14 |         img_metas (list[dict]): image info including "shape_scale" and "flip".
15 |         rpn_test_cfg (dict): rpn test config.
16 | 
17 |     Returns:
18 |         Tensor: shape (n, 4), proposals corresponding to original image scale.
19 |     """
20 |     recovered_proposals = []
21 |     for proposals, img_info in zip(aug_proposals, img_metas):
22 |         img_shape = img_info['img_shape']
23 |         scale_factor = img_info['scale_factor']
24 |         flip = img_info['flip']
25 |         _proposals = proposals.clone()
26 |         _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,
27 |                                               scale_factor, flip)
28 |         recovered_proposals.append(_proposals)
29 |     aug_proposals = torch.cat(recovered_proposals, dim=0)
30 |     merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr)
31 |     scores = merged_proposals[:, 4]
32 |     _, order = scores.sort(0, descending=True)
33 |     num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])
34 |     order = order[:num]
35 |     merged_proposals = merged_proposals[order, :]
36 |     return merged_proposals
37 | 
38 | 
39 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
40 |     """Merge augmented detection bboxes and scores.
41 | 
42 |     Args:
43 |         aug_bboxes (list[Tensor]): shape (n, 4*#class)
44 |         aug_scores (list[Tensor] or None): shape (n, #class)
45 |         img_shapes (list[Tensor]): shape (3, ).
46 |         rcnn_test_cfg (dict): rcnn test config.
47 | 
48 |     Returns:
49 |         tuple: (bboxes, scores)
50 |     """
51 |     recovered_bboxes = []
52 |     for bboxes, img_info in zip(aug_bboxes, img_metas):
53 |         img_shape = img_info[0]['img_shape']
54 |         scale_factor = img_info[0]['scale_factor']
55 |         flip = img_info[0]['flip']
56 |         bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
57 |         recovered_bboxes.append(bboxes)
58 |     bboxes = torch.stack(recovered_bboxes).mean(dim=0)
59 |     if aug_scores is None:
60 |         return bboxes
61 |     else:
62 |         scores = torch.stack(aug_scores).mean(dim=0)
63 |         return bboxes, scores
64 | 
65 | 
66 | def merge_aug_scores(aug_scores):
67 |     """Merge augmented bbox scores."""
68 |     if isinstance(aug_scores[0], torch.Tensor):
69 |         return torch.mean(torch.stack(aug_scores), dim=0)
70 |     else:
71 |         return np.mean(aug_scores, axis=0)
72 | 
73 | 
74 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
75 |     """Merge augmented mask prediction.
76 | 
77 |     Args:
78 |         aug_masks (list[ndarray]): shape (n, #class, h, w)
79 |         img_shapes (list[ndarray]): shape (3, ).
80 |         rcnn_test_cfg (dict): rcnn test config.
81 | 
82 |     Returns:
83 |         tuple: (bboxes, scores)
84 |     """
85 |     recovered_masks = [
86 |         mask if not img_info[0]['flip'] else mask[..., ::-1]
87 |         for mask, img_info in zip(aug_masks, img_metas)
88 |     ]
89 |     if weights is None:
90 |         merged_masks = np.mean(recovered_masks, axis=0)
91 |     else:
92 |         merged_masks = np.average(
93 |             np.array(recovered_masks), axis=0, weights=np.array(weights))
94 |     return merged_masks
95 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .dist_utils import allreduce_grads, DistOptimizerHook
2 | from .misc import tensor2imgs, unmap, multi_apply
3 | 
4 | __all__ = [
5 |     'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',
6 |     'multi_apply'
7 | ]
8 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import torch.distributed as dist
 4 | from torch._utils import (_flatten_dense_tensors, _unflatten_dense_tensors,
 5 |                           _take_tensors)
 6 | from mmcv.runner import OptimizerHook
 7 | 
 8 | 
 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
10 |     if bucket_size_mb > 0:
11 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
12 |         buckets = _take_tensors(tensors, bucket_size_bytes)
13 |     else:
14 |         buckets = OrderedDict()
15 |         for tensor in tensors:
16 |             tp = tensor.type()
17 |             if tp not in buckets:
18 |                 buckets[tp] = []
19 |             buckets[tp].append(tensor)
20 |         buckets = buckets.values()
21 | 
22 |     for bucket in buckets:
23 |         flat_tensors = _flatten_dense_tensors(bucket)
24 |         dist.all_reduce(flat_tensors)
25 |         flat_tensors.div_(world_size)
26 |         for tensor, synced in zip(
27 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
28 |             tensor.copy_(synced)
29 | 
30 | 
31 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1):
32 |     grads = [
33 |         param.grad.data for param in model.parameters()
34 |         if param.requires_grad and param.grad is not None
35 |     ]
36 |     world_size = dist.get_world_size()
37 |     if coalesce:
38 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
39 |     else:
40 |         for tensor in grads:
41 |             dist.all_reduce(tensor.div_(world_size))
42 | 
43 | 
44 | class DistOptimizerHook(OptimizerHook):
45 | 
46 |     def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
47 |         self.grad_clip = grad_clip
48 |         self.coalesce = coalesce
49 |         self.bucket_size_mb = bucket_size_mb
50 | 
51 |     def after_train_iter(self, runner):
52 |         runner.optimizer.zero_grad()
53 |         runner.outputs['loss'].backward()
54 |         allreduce_grads(runner.model, self.coalesce, self.bucket_size_mb)
55 |         if self.grad_clip is not None:
56 |             self.clip_grads(runner.model.parameters())
57 |         runner.optimizer.step()
58 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | import mmcv
 3 | import numpy as np
 4 | from six.moves import map, zip
 5 | import time
 6 | import torch
 7 | 
 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
 9 |     num_imgs = tensor.size(0)
10 |     mean = np.array(mean, dtype=np.float32)
11 |     std = np.array(std, dtype=np.float32)
12 |     imgs = []
13 |     for img_id in range(num_imgs):
14 |         img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
15 |         img = mmcv.imdenormalize(
16 |             img, mean, std, to_bgr=to_rgb).astype(np.uint8)
17 |         imgs.append(np.ascontiguousarray(img))
18 |     return imgs
19 | 
20 | 
21 | def multi_apply(func, *args, **kwargs):
22 |     pfunc = partial(func, **kwargs) if kwargs else func
23 |     map_results = map(pfunc, *args)
24 |     return tuple(map(list, zip(*map_results)))
25 | 
26 | 
27 | def unmap(data, count, inds, fill=0):
28 |     """ Unmap a subset of item (data) back to the original set of items (of
29 |     size count) """
30 |     if data.dim() == 1:
31 |         ret = data.new_full((count, ), fill)
32 |         ret[inds] = data
33 |     else:
34 |         new_size = (count, ) + data.size()[1:]
35 |         ret = data.new_full(new_size, fill)
36 |         ret[inds, :] = data
37 |     return ret
38 | 
39 | class TimeCatcher(object):
40 |     def __init__(self, show=True, cuda=True):
41 |         self.show=show
42 |         self.cuda = cuda
43 | 
44 |     def __enter__(self):
45 |         if self.cuda:
46 |             torch.cuda.synchronize()
47 |         self.start = time.time()
48 |         return self
49 | 
50 |     def __exit__(self, type, value, traceback):
51 |         if self.cuda:
52 |             torch.cuda.synchronize()
53 |         self.end = time.time()
54 |         ms = (self.end - self.start) * 1000
55 |         if self.show:
56 |             print("%.5f ms" % ms)


--------------------------------------------------------------------------------
/mmdet/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .custom import CustomDataset
 2 | from .coco import CocoDataset
 3 | from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
 4 | from .utils import to_tensor, random_scale, show_ann, get_dataset
 5 | from .concat_dataset import ConcatDataset
 6 | from .kitti import KittiLiDAR, KittiVideo
 7 | from .voc import VOCDataset
 8 | __all__ = [
 9 |     'CustomDataset', 'CocoDataset', 'GroupSampler', 'DistributedGroupSampler',
10 |     'ConcatDataset', 'build_dataloader', 'to_tensor', 'random_scale',
11 |     'show_ann', 'get_dataset', 'KittiLiDAR','KittiVideo', 'VOCDataset'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/datasets/coco.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from pycocotools.coco import COCO
  3 | 
  4 | from .custom import CustomDataset
  5 | 
  6 | 
  7 | class CocoDataset(CustomDataset):
  8 | 
  9 |     def load_annotations(self, ann_file):
 10 |         self.coco = COCO(ann_file)
 11 |         self.cat_ids = self.coco.getCatIds()
 12 |         self.cat2label = {
 13 |             cat_id: i + 1
 14 |             for i, cat_id in enumerate(self.cat_ids)
 15 |         }
 16 |         self.img_ids = self.coco.getImgIds()
 17 |         img_infos = []
 18 |         for i in self.img_ids:
 19 |             info = self.coco.loadImgs([i])[0]
 20 |             info['filename'] = info['file_name']
 21 |             img_infos.append(info)
 22 |         return img_infos
 23 | 
 24 |     def get_ann_info(self, idx):
 25 |         img_id = self.img_infos[idx]['id']
 26 |         ann_ids = self.coco.getAnnIds(imgIds=[img_id])
 27 |         ann_info = self.coco.loadAnns(ann_ids)
 28 |         return self._parse_ann_info(ann_info)
 29 | 
 30 |     def _filter_imgs(self, min_size=32):
 31 |         """Filter images too small or without ground truths."""
 32 |         valid_inds = []
 33 |         ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values())
 34 |         for i, img_info in enumerate(self.img_infos):
 35 |             if self.img_ids[i] not in ids_with_ann:
 36 |                 continue
 37 |             if min(img_info['width'], img_info['height']) >= min_size:
 38 |                 valid_inds.append(i)
 39 |         return valid_inds
 40 | 
 41 |     def _parse_ann_info(self, ann_info, with_mask=True):
 42 |         """Parse bbox and mask annotation.
 43 | 
 44 |         Args:
 45 |             ann_info (list[dict]): Annotation info of an image.
 46 |             with_mask (bool): Whether to parse mask annotations.
 47 | 
 48 |         Returns:
 49 |             dict: A dict containing the following keys: bboxes, bboxes_ignore,
 50 |                 labels, masks, mask_polys, poly_lens.
 51 |         """
 52 |         gt_bboxes = []
 53 |         gt_labels = []
 54 |         gt_bboxes_ignore = []
 55 |         # Two formats are provided.
 56 |         # 1. mask: a binary map of the same size of the image.
 57 |         # 2. polys: each mask consists of one or several polys, each poly is a
 58 |         # list of float.
 59 |         if with_mask:
 60 |             gt_masks = []
 61 |             gt_mask_polys = []
 62 |             gt_poly_lens = []
 63 |         for i, ann in enumerate(ann_info):
 64 |             if ann.get('ignore', False):
 65 |                 continue
 66 |             x1, y1, w, h = ann['bbox']
 67 |             if ann['area'] <= 0 or w < 1 or h < 1:
 68 |                 continue
 69 |             bbox = [x1, y1, x1 + w - 1, y1 + h - 1]
 70 |             if ann['iscrowd']:
 71 |                 gt_bboxes_ignore.append(bbox)
 72 |             else:
 73 |                 gt_bboxes.append(bbox)
 74 |                 gt_labels.append(self.cat2label[ann['category_id']])
 75 |             if with_mask:
 76 |                 gt_masks.append(self.coco.annToMask(ann))
 77 |                 mask_polys = [
 78 |                     p for p in ann['segmentation'] if len(p) >= 6
 79 |                 ]  # valid polygons have >= 3 points (6 coordinates)
 80 |                 poly_lens = [len(p) for p in mask_polys]
 81 |                 gt_mask_polys.append(mask_polys)
 82 |                 gt_poly_lens.extend(poly_lens)
 83 |         if gt_bboxes:
 84 |             gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
 85 |             gt_labels = np.array(gt_labels, dtype=np.int64)
 86 |         else:
 87 |             gt_bboxes = np.zeros((0, 4), dtype=np.float32)
 88 |             gt_labels = np.array([], dtype=np.int64)
 89 | 
 90 |         if gt_bboxes_ignore:
 91 |             gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
 92 |         else:
 93 |             gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
 94 | 
 95 |         ann = dict(
 96 |             bboxes=gt_bboxes, labels=gt_labels, bboxes_ignore=gt_bboxes_ignore)
 97 | 
 98 |         if with_mask:
 99 |             ann['masks'] = gt_masks
100 |             # poly format is not used in the current implementation
101 |             ann['mask_polys'] = gt_mask_polys
102 |             ann['poly_lens'] = gt_poly_lens
103 |         return ann
104 | 


--------------------------------------------------------------------------------
/mmdet/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 3 | 
 4 | 
 5 | class ConcatDataset(_ConcatDataset):
 6 |     """
 7 |     Same as torch.utils.data.dataset.ConcatDataset, but
 8 |     concat the group flag for image aspect ratio.
 9 |     """
10 |     def __init__(self, datasets):
11 |         """
12 |         flag: Images with aspect ratio greater than 1 will be set as group 1,
13 |               otherwise group 0.
14 |         """
15 |         super(ConcatDataset, self).__init__(datasets)
16 |         if hasattr(datasets[0], 'flag'):
17 |             flags = []
18 |             for i in range(0, len(datasets)):
19 |                 flags.append(datasets[i].flag)
20 |             self.flag = np.concatenate(flags)
21 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/__init__.py:
--------------------------------------------------------------------------------
1 | from .build_loader import build_dataloader
2 | from .sampler import GroupSampler, DistributedGroupSampler
3 | 
4 | __all__ = [
5 |     'GroupSampler', 'DistributedGroupSampler', 'build_dataloader'
6 | ]
7 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/build_loader.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | from mmcv.runner import get_dist_info
 4 | from mmcv.parallel import collate
 5 | from torch.utils.data import DataLoader
 6 | from .sampler import GroupSampler, DistributedGroupSampler
 7 | 
 8 | # https://github.com/pytorch/pytorch/issues/973
 9 | import resource
10 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
11 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
12 | 
13 | 
14 | def build_dataloader(dataset,
15 |                      imgs_per_gpu,
16 |                      workers_per_gpu,
17 |                      num_gpus=1,
18 |                      dist=True,
19 |                      **kwargs):
20 |     if dist:
21 |         rank, world_size = get_dist_info()
22 |         sampler = DistributedGroupSampler(dataset, imgs_per_gpu, world_size,
23 |                                           rank)
24 |         batch_size = imgs_per_gpu
25 |         num_workers = workers_per_gpu
26 |     else:
27 |         if not kwargs.get('shuffle', True):
28 |             sampler = None
29 |         else:
30 |             sampler = GroupSampler(dataset, imgs_per_gpu)
31 |         batch_size = num_gpus * imgs_per_gpu
32 |         num_workers = num_gpus * workers_per_gpu
33 | 
34 |     data_loader = DataLoader(
35 |         dataset,
36 |         batch_size=batch_size,
37 |         sampler=sampler,
38 |         num_workers=num_workers,
39 |         collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu),
40 |         pin_memory=False,
41 |         **kwargs)
42 | 
43 |     return data_loader
44 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/sampler.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | 
  3 | import math
  4 | import torch
  5 | import numpy as np
  6 | 
  7 | from torch.distributed import get_world_size, get_rank
  8 | from torch.utils.data.sampler import Sampler
  9 | 
 10 | 
 11 | class GroupSampler(Sampler):
 12 | 
 13 |     def __init__(self, dataset, samples_per_gpu=1):
 14 |         assert hasattr(dataset, 'flag')
 15 |         self.dataset = dataset
 16 |         self.samples_per_gpu = samples_per_gpu
 17 |         self.flag = dataset.flag.astype(np.int64)
 18 |         self.group_sizes = np.bincount(self.flag)
 19 |         self.num_samples = 0
 20 |         for i, size in enumerate(self.group_sizes):
 21 |             self.num_samples += int(np.ceil(
 22 |                 size / self.samples_per_gpu)) * self.samples_per_gpu
 23 | 
 24 |     def __iter__(self):
 25 |         indices = []
 26 |         for i, size in enumerate(self.group_sizes):
 27 |             if size == 0:
 28 |                 continue
 29 |             indice = np.where(self.flag == i)[0]
 30 |             assert len(indice) == size
 31 |             np.random.shuffle(indice)
 32 |             num_extra = int(np.ceil(size / self.samples_per_gpu)
 33 |                             ) * self.samples_per_gpu - len(indice)
 34 |             indice = np.concatenate([indice, indice[:num_extra]])
 35 |             indices.append(indice)
 36 |         indices = np.concatenate(indices)
 37 |         indices = [
 38 |             indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu]
 39 |             for i in np.random.permutation(
 40 |                 range(len(indices) // self.samples_per_gpu))
 41 |         ]
 42 |         indices = np.concatenate(indices)
 43 |         indices = torch.from_numpy(indices).long()
 44 |         assert len(indices) == self.num_samples
 45 |         return iter(indices)
 46 | 
 47 |     def __len__(self):
 48 |         return self.num_samples
 49 | 
 50 | 
 51 | class DistributedGroupSampler(Sampler):
 52 |     """Sampler that restricts data loading to a subset of the dataset.
 53 |     It is especially useful in conjunction with
 54 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
 55 |     process can pass a DistributedSampler instance as a DataLoader sampler,
 56 |     and load a subset of the original dataset that is exclusive to it.
 57 |     .. note::
 58 |         Dataset is assumed to be of constant size.
 59 |     Arguments:
 60 |         dataset: Dataset used for sampling.
 61 |         num_replicas (optional): Number of processes participating in
 62 |             distributed training.
 63 |         rank (optional): Rank of the current process within num_replicas.
 64 |     """
 65 | 
 66 |     def __init__(self,
 67 |                  dataset,
 68 |                  samples_per_gpu=1,
 69 |                  num_replicas=None,
 70 |                  rank=None):
 71 |         if num_replicas is None:
 72 |             num_replicas = get_world_size()
 73 |         if rank is None:
 74 |             rank = get_rank()
 75 |         self.dataset = dataset
 76 |         self.samples_per_gpu = samples_per_gpu
 77 |         self.num_replicas = num_replicas
 78 |         self.rank = rank
 79 |         self.epoch = 0
 80 | 
 81 |         assert hasattr(self.dataset, 'flag')
 82 |         self.flag = self.dataset.flag
 83 |         self.group_sizes = np.bincount(self.flag)
 84 | 
 85 |         self.num_samples = 0
 86 |         for i, j in enumerate(self.group_sizes):
 87 |             self.num_samples += int(
 88 |                 math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
 89 |                           self.num_replicas)) * self.samples_per_gpu
 90 |         self.total_size = self.num_samples * self.num_replicas
 91 | 
 92 |     def __iter__(self):
 93 |         # deterministically shuffle based on epoch
 94 |         g = torch.Generator()
 95 |         g.manual_seed(self.epoch)
 96 | 
 97 |         indices = []
 98 |         for i, size in enumerate(self.group_sizes):
 99 |             if size > 0:
100 |                 indice = np.where(self.flag == i)[0]
101 |                 assert len(indice) == size
102 |                 indice = indice[list(torch.randperm(int(size),
103 |                                                     generator=g))].tolist()
104 |                 extra = int(
105 |                     math.ceil(
106 |                         size * 1.0 / self.samples_per_gpu / self.num_replicas)
107 |                 ) * self.samples_per_gpu * self.num_replicas - len(indice)
108 |                 indice += indice[:extra]
109 |                 indices += indice
110 | 
111 |         assert len(indices) == self.total_size
112 | 
113 |         indices = [
114 |             indices[j] for i in list(
115 |                 torch.randperm(
116 |                     len(indices) // self.samples_per_gpu, generator=g))
117 |             for j in range(i * self.samples_per_gpu, (i + 1) *
118 |                            self.samples_per_gpu)
119 |         ]
120 | 
121 |         # subsample
122 |         offset = self.num_samples * self.rank
123 |         indices = indices[offset:offset + self.num_samples]
124 |         assert len(indices) == self.num_samples
125 | 
126 |         return iter(indices)
127 | 
128 |     def __len__(self):
129 |         return self.num_samples
130 | 
131 |     def set_epoch(self, epoch):
132 |         self.epoch = epoch
133 | 


--------------------------------------------------------------------------------
/mmdet/datasets/transforms.py:
--------------------------------------------------------------------------------
  1 | import mmcv
  2 | import numpy as np
  3 | import torch
  4 | 
  5 | __all__ = ['ImageTransform', 'BboxTransform', 'MaskTransform', 'Numpy2Tensor']
  6 | 
  7 | 
  8 | class ImageTransform(object):
  9 |     """Preprocess an image.
 10 | 
 11 |     1. rescale the image to expected size
 12 |     2. normalize the image
 13 |     3. flip the image (if needed)
 14 |     4. pad the image (if needed)
 15 |     5. transpose to (c, h, w)
 16 |     """
 17 | 
 18 |     def __init__(self,
 19 |                  mean=(0, 0, 0),
 20 |                  std=(1, 1, 1),
 21 |                  to_rgb=True,
 22 |                  size_divisor=None):
 23 |         self.mean = np.array(mean, dtype=np.float32)
 24 |         self.std = np.array(std, dtype=np.float32)
 25 |         self.to_rgb = to_rgb
 26 |         self.size_divisor = size_divisor
 27 | 
 28 |     def __call__(self, img, scale, flip=False):
 29 |         img, scale_factor = mmcv.imrescale(img, scale, return_scale=True)
 30 |         img_shape = img.shape
 31 |         img = mmcv.imnormalize(img, self.mean, self.std, self.to_rgb)
 32 |         if flip:
 33 |             img = mmcv.imflip(img)
 34 |         if self.size_divisor is not None:
 35 |             img = mmcv.impad_to_multiple(img, self.size_divisor)
 36 |             pad_shape = img.shape
 37 |         else:
 38 |             pad_shape = img_shape
 39 |         img = img.transpose(2, 0, 1)
 40 |         return img, img_shape, pad_shape, scale_factor
 41 | 
 42 | 
 43 | def bbox_flip(bboxes, img_shape):
 44 |     """Flip bboxes horizontally.
 45 | 
 46 |     Args:
 47 |         bboxes(ndarray): shape (..., 4*k)
 48 |         img_shape(tuple): (height, width)
 49 |     """
 50 |     assert bboxes.shape[-1] % 4 == 0
 51 |     w = img_shape[1]
 52 |     flipped = bboxes.copy()
 53 |     flipped[..., 0::4] = w - bboxes[..., 2::4] - 1
 54 |     flipped[..., 2::4] = w - bboxes[..., 0::4] - 1
 55 |     return flipped
 56 | 
 57 | 
 58 | class BboxTransform(object):
 59 |     """Preprocess gt bboxes.
 60 | 
 61 |     1. rescale bboxes according to image size
 62 |     2. flip bboxes (if needed)
 63 |     3. pad the first dimension to `max_num_gts`
 64 |     """
 65 | 
 66 |     def __init__(self, max_num_gts=None):
 67 |         self.max_num_gts = max_num_gts
 68 | 
 69 |     def __call__(self, bboxes, img_shape, scale_factor, flip=False):
 70 |         gt_bboxes = bboxes * scale_factor
 71 |         if flip:
 72 |             gt_bboxes = bbox_flip(gt_bboxes, img_shape)
 73 |         gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1])
 74 |         gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0])
 75 |         if self.max_num_gts is None:
 76 |             return gt_bboxes
 77 |         else:
 78 |             num_gts = gt_bboxes.shape[0]
 79 |             padded_bboxes = np.zeros((self.max_num_gts, 4), dtype=np.float32)
 80 |             padded_bboxes[:num_gts, :] = gt_bboxes
 81 |             return padded_bboxes
 82 | 
 83 | 
 84 | class MaskTransform(object):
 85 |     """Preprocess masks.
 86 | 
 87 |     1. resize masks to expected size and stack to a single array
 88 |     2. flip the masks (if needed)
 89 |     3. pad the masks (if needed)
 90 |     """
 91 | 
 92 |     def __call__(self, masks, pad_shape, scale_factor, flip=False):
 93 |         masks = [
 94 |             mmcv.imrescale(mask, scale_factor, interpolation='nearest')
 95 |             for mask in masks
 96 |         ]
 97 |         if flip:
 98 |             masks = [mask[:, ::-1] for mask in masks]
 99 |         padded_masks = [
100 |             mmcv.impad(mask, pad_shape[:2], pad_val=0) for mask in masks
101 |         ]
102 |         padded_masks = np.stack(padded_masks, axis=0)
103 |         return padded_masks
104 | 
105 | 
106 | class Numpy2Tensor(object):
107 | 
108 |     def __init__(self):
109 |         pass
110 | 
111 |     def __call__(self, *args):
112 |         if len(args) == 1:
113 |             return torch.from_numpy(args[0])
114 |         else:
115 |             return tuple([torch.from_numpy(np.array(array)) for array in args])


--------------------------------------------------------------------------------
/mmdet/datasets/utils.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | from collections import Sequence
  3 | 
  4 | import mmcv
  5 | from mmcv.runner import obj_from_dict
  6 | import torch
  7 | from collections import defaultdict
  8 | import matplotlib.pyplot as plt
  9 | import numpy as np
 10 | from .concat_dataset import ConcatDataset
 11 | from .. import datasets
 12 | from mmdet.core.point_cloud import voxel_generator
 13 | from mmdet.core.point_cloud import point_augmentor
 14 | from mmdet.core.bbox3d import bbox3d_target
 15 | from mmdet.core.anchor import anchor3d_generator
 16 | def to_tensor(data):
 17 |     """Convert objects of various python types to :obj:`torch.Tensor`.
 18 | 
 19 |     Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
 20 |     :class:`Sequence`, :class:`int` and :class:`float`.
 21 |     """
 22 |     if isinstance(data, torch.Tensor):
 23 |         return data
 24 |     elif isinstance(data, np.ndarray):
 25 |         return torch.from_numpy(data)
 26 |     elif isinstance(data, Sequence) and not mmcv.is_str(data):
 27 |         return [to_tensor(d) for d in data]
 28 |     elif isinstance(data, int):
 29 |         return torch.LongTensor([data])
 30 |     elif isinstance(data, float):
 31 |         return torch.FloatTensor([data])
 32 |     elif data is None:
 33 |         return data
 34 |     else:
 35 |         raise TypeError('type {} cannot be converted to tensor.'.format(
 36 |             type(data)))
 37 | 
 38 | 
 39 | def random_scale(img_scales, mode='range'):
 40 |     """Randomly select a scale from a list of scales or scale ranges.
 41 | 
 42 |     Args:
 43 |         img_scales (list[tuple]): Image scale or scale range.
 44 |         mode (str): "range" or "value".
 45 | 
 46 |     Returns:
 47 |         tuple: Sampled image scale.
 48 |     """
 49 |     num_scales = len(img_scales)
 50 |     if num_scales == 1:  # fixed scale is specified
 51 |         img_scale = img_scales[0]
 52 |     elif num_scales == 2:  # randomly sample a scale
 53 |         if mode == 'range':
 54 |             img_scale_long = [max(s) for s in img_scales]
 55 |             img_scale_short = [min(s) for s in img_scales]
 56 |             long_edge = np.random.randint(
 57 |                 min(img_scale_long),
 58 |                 max(img_scale_long) + 1)
 59 |             short_edge = np.random.randint(
 60 |                 min(img_scale_short),
 61 |                 max(img_scale_short) + 1)
 62 |             img_scale = (long_edge, short_edge)
 63 |         elif mode == 'value':
 64 |             img_scale = img_scales[np.random.randint(num_scales)]
 65 |     else:
 66 |         if mode != 'value':
 67 |             raise ValueError(
 68 |                 'Only "value" mode supports more than 2 image scales')
 69 |         img_scale = img_scales[np.random.randint(num_scales)]
 70 |     return img_scale
 71 | 
 72 | 
 73 | def show_ann(coco, img, ann_info):
 74 |     plt.imshow(mmcv.bgr2rgb(img))
 75 |     plt.axis('off')
 76 |     coco.showAnns(ann_info)
 77 |     plt.show()
 78 | 
 79 | 
 80 | def get_dataset(data_cfg):
 81 | 
 82 |     if isinstance(data_cfg['ann_file'], (list, tuple)):
 83 |         ann_files = data_cfg['ann_file']
 84 |         num_dset = len(ann_files)
 85 |     else:
 86 |         ann_files = [data_cfg['ann_file']]
 87 |         num_dset = 1
 88 | 
 89 |     if isinstance(data_cfg['img_prefix'], (list, tuple)):
 90 |         img_prefixes = data_cfg['img_prefix']
 91 |     else:
 92 |         img_prefixes = [data_cfg['img_prefix']] * num_dset
 93 |     assert len(img_prefixes) == num_dset
 94 | 
 95 |     if 'generator' in data_cfg.keys() and data_cfg['generator'] is not None:
 96 |         generator = obj_from_dict(data_cfg['generator'], voxel_generator)
 97 |     else:
 98 |         generator = None
 99 | 
100 |     if 'augmentor' in data_cfg.keys() and data_cfg['augmentor'] is not None:
101 |         augmentor = obj_from_dict(data_cfg['augmentor'], point_augmentor)
102 |     else:
103 |         augmentor = None
104 | 
105 |     if 'anchor_generator' in data_cfg.keys() and data_cfg['anchor_generator'] is not None:
106 |         anchor_generator = {cls: obj_from_dict(cfg, anchor3d_generator) for cls, cfg in data_cfg['anchor_generator'].items()}
107 |     else:
108 |         anchor_generator = None
109 | 
110 |     dsets = []
111 |     for i in range(num_dset):
112 |         data_info = copy.deepcopy(data_cfg)
113 |         data_info['ann_file'] = ann_files[i]
114 |         data_info['img_prefix'] = img_prefixes[i]
115 |         if generator is not None:
116 |             data_info['generator'] = generator
117 |         if anchor_generator is not None:
118 |             data_info['anchor_generator'] = anchor_generator
119 |         if augmentor is not None:
120 |             data_info['augmentor'] = augmentor
121 |         dset = obj_from_dict(data_info, datasets)
122 |         dsets.append(dset)
123 |     if len(dsets) > 1:
124 |         dset = ConcatDataset(dsets)
125 |     else:
126 |         dset = dsets[0]
127 |     return dset
128 | 
129 | # def example_convert_to_torch(example, device=None) -> dict:
130 | #     example_torch = {}
131 | #     torch_names = [
132 | #         'img', 'voxels','coordinates',\
133 | #         # 'anchors_mask','anchors',\
134 | #         #'gt_labels','gt_bboxes','gt_bboxes_ignore',\
135 | #         'num_points', 'right', 'grid'
136 | #     ]
137 | #     for k, v in example.items():
138 | #         if k in torch_names:
139 | #             example_torch[k] = to_tensor(v)
140 | #         else:
141 | #             example_torch[k] = v
142 | #
143 | #     return example_torch
144 | 
145 | # def merge_second_batch(batch_list, samples_per_gpu=1, to_torch=True):
146 | #     example_merged = defaultdict(list)
147 | #     for example in batch_list:
148 | #         for k, v in example.items():
149 | #             example_merged[k].append(v)
150 | #     ret = {}
151 | #
152 | #     for key, elems in example_merged.items():
153 | #         if key in [
154 | #             'voxels', 'num_points',
155 | #         ]:
156 | #             ret[key] = np.concatenate(elems, axis=0)
157 | #         elif key == 'coordinates':
158 | #             coors = []
159 | #             for i, coor in enumerate(elems):
160 | #                 coor_pad = np.pad(
161 | #                     coor, ((0, 0), (1, 0)),
162 | #                     mode='constant',
163 | #                     constant_values=i)
164 | #                 coors.append(coor_pad)
165 | #             ret[key] = np.concatenate(coors, axis=0)
166 | #         elif key in [
167 | #             'img_meta', 'img_shape', 'calib', 'sample_idx', 'gt_labels', 'gt_bboxes','gt_bboxes_ignore'
168 | #         ]:
169 | #             ret[key] = elems
170 | #         else:
171 | #             ret[key] = np.stack(elems, axis=0)
172 | #
173 | #     if to_torch:
174 | #         ret = example_convert_to_torch(ret)
175 | #     return ret


--------------------------------------------------------------------------------
/mmdet/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | from .xml_style import XMLDataset
 2 | 
 3 | 
 4 | class VOCDataset(XMLDataset):
 5 | 
 6 |     CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
 7 |                'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
 8 |                'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
 9 |                'tvmonitor')
10 | 
11 |     def __init__(self, **kwargs):
12 |         super(VOCDataset, self).__init__(**kwargs)
13 |         if 'VOC2007' in self.img_prefix:
14 |             self.year = 2007
15 |         elif 'VOC2012' in self.img_prefix:
16 |             self.year = 2012
17 |         else:
18 |             raise ValueError('Cannot infer dataset year from img_prefix')


--------------------------------------------------------------------------------
/mmdet/datasets/xml_style.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | import mmcv
 5 | import numpy as np
 6 | 
 7 | from .custom import CustomDataset
 8 | 
 9 | 
10 | class XMLDataset(CustomDataset):
11 | 
12 |     def __init__(self, **kwargs):
13 |         super(XMLDataset, self).__init__(**kwargs)
14 |         self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)}
15 | 
16 |     def load_annotations(self, ann_file):
17 |         img_infos = []
18 |         img_ids = mmcv.list_from_file(ann_file)
19 |         for img_id in img_ids:
20 |             filename = 'JPEGImages/{}.jpg'.format(img_id)
21 |             xml_path = osp.join(self.img_prefix, 'Annotations',
22 |                                 '{}.xml'.format(img_id))
23 |             tree = ET.parse(xml_path)
24 |             root = tree.getroot()
25 |             size = root.find('size')
26 |             width = int(size.find('width').text)
27 |             height = int(size.find('height').text)
28 |             img_infos.append(
29 |                 dict(id=img_id, filename=filename, width=width, height=height))
30 |         return img_infos
31 | 
32 |     def get_ann_info(self, idx):
33 |         img_id = self.img_infos[idx]['id']
34 |         xml_path = osp.join(self.img_prefix, 'Annotations',
35 |                             '{}.xml'.format(img_id))
36 |         tree = ET.parse(xml_path)
37 |         root = tree.getroot()
38 |         bboxes = []
39 |         labels = []
40 |         bboxes_ignore = []
41 |         labels_ignore = []
42 |         for obj in root.findall('object'):
43 |             name = obj.find('name').text
44 |             label = self.cat2label[name]
45 |             difficult = int(obj.find('difficult').text)
46 |             bnd_box = obj.find('bndbox')
47 |             bbox = [
48 |                 int(bnd_box.find('xmin').text),
49 |                 int(bnd_box.find('ymin').text),
50 |                 int(bnd_box.find('xmax').text),
51 |                 int(bnd_box.find('ymax').text)
52 |             ]
53 |             if difficult:
54 |                 bboxes_ignore.append(bbox)
55 |                 labels_ignore.append(label)
56 |             else:
57 |                 bboxes.append(bbox)
58 |                 labels.append(label)
59 |         if not bboxes:
60 |             bboxes = np.zeros((0, 4))
61 |             labels = np.zeros((0, ))
62 |         else:
63 |             bboxes = np.array(bboxes, ndmin=2) - 1
64 |             labels = np.array(labels)
65 |         if not bboxes_ignore:
66 |             bboxes_ignore = np.zeros((0, 4))
67 |             labels_ignore = np.zeros((0, ))
68 |         else:
69 |             bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
70 |             labels_ignore = np.array(labels_ignore)
71 |         ann = dict(
72 |             bboxes=bboxes.astype(np.float32),
73 |             labels=labels.astype(np.int64),
74 |             bboxes_ignore=bboxes_ignore.astype(np.float32),
75 |             labels_ignore=labels_ignore.astype(np.int64))
76 |         return ann


--------------------------------------------------------------------------------
/mmdet/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .detectors import (BaseDetector,RPN)
 2 | from .builder import (build_neck, build_rpn_head, build_roi_extractor,build_backbone,
 3 |                       build_bbox_head, build_mask_head, build_detector)
 4 | 
 5 | __all__ = [
 6 |     'BaseDetector', 'RPN', 'build_backbone', 'build_neck', 'build_rpn_head',
 7 |     'build_roi_extractor', 'build_bbox_head', 'build_mask_head',
 8 |     'build_detector'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import ResNet
2 | from .vxnet import *
3 | from .pillar import *
4 | __all__ = ['ResNet','VoxelFeatNet','SimpleVoxel', 'PillarFeatureNet', 'PointPillarsScatter']
5 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/pillar.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch.nn import functional as F
  4 | from ..utils import change_default_args, Empty, get_paddings_indicator
  5 | 
  6 | 
  7 | class PFNLayer(nn.Module):
  8 |     def __init__(self,
  9 |                  in_channels,
 10 |                  out_channels,
 11 |                  use_norm=True,
 12 |                  last_layer=False):
 13 |         super(PFNLayer, self).__init__()
 14 |         self.name = 'PFNLayer'
 15 |         self.last_vfe = last_layer
 16 |         if not self.last_vfe:
 17 |             out_channels = out_channels // 2
 18 |         self.units = out_channels
 19 | 
 20 |         if use_norm:
 21 |             BatchNorm1d = change_default_args(eps=1e-3, momentum=0.01)(nn.BatchNorm1d)
 22 |             Linear = change_default_args(bias=False)(nn.Linear)
 23 |         else:
 24 |             BatchNorm1d = Empty
 25 |             Linear = change_default_args(bias=True)(nn.Linear)
 26 | 
 27 |         self.linear = Linear(in_channels, self.units)
 28 |         self.norm = BatchNorm1d(self.units)
 29 | 
 30 |     def forward(self, inputs):
 31 | 
 32 |         x = self.linear(inputs)
 33 |         x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2, 1).contiguous()
 34 |         x = F.relu(x)
 35 |         x_max = torch.max(x, dim=1, keepdim=True)[0]
 36 | 
 37 |         if self.last_vfe:
 38 |             return x_max
 39 |         else:
 40 |             x_repeat = x_max.repeat(1, inputs.shape[1], 1)
 41 |             x_concatenated = torch.cat([x, x_repeat], dim=2)
 42 |             return x_concatenated
 43 | 
 44 | 
 45 | class PillarFeatureNet(nn.Module):
 46 |     def __init__(self,
 47 |                  num_input_features=4,
 48 |                  use_norm=True,
 49 |                  num_filters=(64,),
 50 |                  with_distance=False,
 51 |                  voxel_size=(0.2, 0.2, 4),
 52 |                  pc_range=(0, -40, -3, 70.4, 40, 1)
 53 |                  ):
 54 |         super(PillarFeatureNet, self).__init__()
 55 |         self.name = 'PillarFeatureNet'
 56 |         assert len(num_filters) > 0
 57 |         num_input_features += 5
 58 |         if with_distance:
 59 |             num_input_features += 1
 60 |         self._with_distance = with_distance
 61 | 
 62 |         # Create PillarFeatureNet layers
 63 |         num_filters = [num_input_features] + list(num_filters)
 64 |         pfn_layers = []
 65 |         for i in range(len(num_filters) - 1):
 66 |             in_filters = num_filters[i]
 67 |             out_filters = num_filters[i + 1]
 68 |             if i < len(num_filters) - 2:
 69 |                 last_layer = False
 70 |             else:
 71 |                 last_layer = True
 72 |             pfn_layers.append(PFNLayer(in_filters, out_filters, use_norm, last_layer=last_layer))
 73 |         self.pfn_layers = nn.ModuleList(pfn_layers)
 74 | 
 75 |         # Need pillar (voxel) size and x/y offset in order to calculate pillar offset
 76 |         self.vx = voxel_size[0]
 77 |         self.vy = voxel_size[1]
 78 |         self.x_offset = self.vx / 2 + pc_range[0]
 79 |         self.y_offset = self.vy / 2 + pc_range[1]
 80 | 
 81 |         nx = int((pc_range[3] - pc_range[0]) / self.vx)
 82 |         ny = int((pc_range[4] - pc_range[1]) / self.vy)
 83 |         self.scatter = PointPillarsScatter(nx, ny)
 84 | 
 85 |     def forward(self, features, coors, num_voxels, batch_size):
 86 | 
 87 |         # Find distance of x, y, and z from cluster center
 88 |         points_mean = features[:, :, :3].sum(dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
 89 |         f_cluster = features[:, :, :3] - points_mean
 90 | 
 91 |         # Find distance of x, y, and z from pillar center
 92 |         f_center = torch.zeros_like(features[:, :, :2])
 93 |         f_center[:, :, 0] = features[:, :, 0] - (coors[:, 3].float().unsqueeze(1) * self.vx + self.x_offset)
 94 |         f_center[:, :, 1] = features[:, :, 1] - (coors[:, 2].float().unsqueeze(1) * self.vy + self.y_offset)
 95 | 
 96 |         # Combine together feature decorations
 97 |         features_ls = [features, f_cluster, f_center]
 98 |         if self._with_distance:
 99 |             points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
100 |             features_ls.append(points_dist)
101 |         features = torch.cat(features_ls, dim=-1)
102 | 
103 |         # The feature decorations were calculated without regard to whether pillar was empty. Need to ensure that
104 |         # empty pillars remain set to zeros.
105 |         voxel_count = features.shape[1]
106 |         mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
107 |         mask = torch.unsqueeze(mask, -1).type_as(features)
108 |         features *= mask
109 | 
110 |         # Forward pass through PFNLayers
111 |         for pfn in self.pfn_layers:
112 |             features = pfn(features)
113 | 
114 |         return self.scatter(features.squeeze(), coors, batch_size)
115 | 
116 | 
117 | class PointPillarsScatter(nn.Module):
118 |     def __init__(self,
119 |                  nx, ny,
120 |                  num_input_features=64):
121 |         """
122 |         Point Pillar's Scatter.
123 |         Converts learned features from dense tensor to sparse pseudo image. This replaces SECOND's
124 |         second.pytorch.voxelnet.SparseMiddleExtractor.
125 |         :param output_shape: ([int]: 4). Required output shape of features.
126 |         :param num_input_features: <int>. Number of input features.
127 |         """
128 |         super(PointPillarsScatter, self).__init__()
129 |         self.name = 'PointPillarsScatter'
130 |         self.nx = nx
131 |         self.ny = ny
132 |         self.nchannels = num_input_features
133 | 
134 |     def forward(self, voxel_features, coords, batch_size):
135 |         # batch_canvas will be the final output.
136 |         batch_canvas = []
137 |         for batch_itt in range(batch_size):
138 |             # Create the canvas for this sample
139 |             canvas = torch.zeros(self.nchannels, self.nx * self.ny, dtype=voxel_features.dtype,
140 |                                  device=voxel_features.device)
141 | 
142 |             # Only include non-empty pillars
143 |             batch_mask = coords[:, 0] == batch_itt
144 |             this_coords = coords[batch_mask, :]
145 |             indices = this_coords[:, 2] * self.nx + this_coords[:, 3]
146 |             indices = indices.type(torch.long)
147 |             voxels = voxel_features[batch_mask, :]
148 |             voxels = voxels.t()
149 | 
150 |             # Now scatter the blob back to the canvas.
151 |             canvas[:, indices] = voxels
152 | 
153 |             # Append to a list for later stacking.
154 |             batch_canvas.append(canvas)
155 | 
156 |         # Stack to 3-dim tensor (batch-size, nchannels, nrows*ncols)
157 |         batch_canvas = torch.stack(batch_canvas, 0)
158 | 
159 |         # Undo the column stacking to final 4-dim tensor
160 |         batch_canvas = batch_canvas.view(batch_size, self.nchannels, self.ny, self.nx)
161 | 
162 |         return batch_canvas
163 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/vxnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch.nn import functional as F
  4 | from ..utils import change_default_args, Empty, get_paddings_indicator
  5 | 
  6 | 
  7 | class VFELayer(nn.Module):
  8 |     def __init__(self, in_channels, out_channels, use_norm=True, name='vfe'):
  9 |         super(VFELayer, self).__init__()
 10 |         self.name = name
 11 |         self.units = int(out_channels / 2)
 12 |         if use_norm:
 13 |             BatchNorm1d = change_default_args(
 14 |                 eps=1e-3, momentum=0.01)(nn.BatchNorm1d)
 15 |             Linear = change_default_args(bias=False)(nn.Linear)
 16 |         else:
 17 |             BatchNorm1d = Empty
 18 |             Linear = change_default_args(bias=True)(nn.Linear)
 19 |         self.linear = Linear(in_channels, self.units)
 20 |         self.norm = BatchNorm1d(self.units)
 21 | 
 22 |     def forward(self, inputs):
 23 |         # [K, T, 7] tensordot [7, units] = [K, T, units]
 24 |         voxel_count = inputs.shape[1]
 25 |         x = self.linear(inputs)
 26 |         x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,
 27 |                                                                1).contiguous()
 28 |         pointwise = F.relu(x)
 29 |         # [K, T, units]
 30 | 
 31 |         aggregated = torch.max(pointwise, dim=1, keepdim=True)[0]
 32 |         # [K, 1, units]
 33 |         repeated = aggregated.repeat(1, voxel_count, 1)
 34 | 
 35 |         concatenated = torch.cat([pointwise, repeated], dim=2)
 36 |         # [K, T, 2 * units]
 37 |         return concatenated
 38 | 
 39 | class VoxelFeatNet(nn.Module):
 40 |     def __init__(self,
 41 |                  num_input_features=4,
 42 |                  use_norm=True,
 43 |                  num_filters=[32, 128],
 44 |                  with_distance=False,
 45 |                  name='VoxelFeatureExtractor'):
 46 |         super(VoxelFeatNet, self).__init__()
 47 |         self.name = name
 48 |         if use_norm:
 49 |             BatchNorm1d = change_default_args(
 50 |                 eps=1e-3, momentum=0.01)(nn.BatchNorm1d)
 51 |             Linear = change_default_args(bias=False)(nn.Linear)
 52 |         else:
 53 |             BatchNorm1d = Empty
 54 |             Linear = change_default_args(bias=True)(nn.Linear)
 55 |         assert len(num_filters) == 2
 56 |         num_input_features += 3  # add mean features
 57 |         if with_distance:
 58 |             num_input_features += 1
 59 |         self._with_distance = with_distance
 60 |         self.vfe1 = VFELayer(num_input_features, num_filters[0], use_norm)
 61 |         self.vfe2 = VFELayer(num_filters[0], num_filters[1], use_norm)
 62 |         self.linear = Linear(num_filters[1], num_filters[1])
 63 |         # var_torch_init(self.linear.weight)
 64 |         # var_torch_init(self.linear.bias)
 65 |         self.norm = BatchNorm1d(num_filters[1])
 66 | 
 67 |     def init_weights(self, pretrained=None):
 68 |         pass
 69 | 
 70 |     def forward(self, features, num_voxels):
 71 |         # features: [concated_num_points, num_voxel_size, 3(4)]
 72 |         # num_voxels: [concated_num_points]
 73 |         points_mean = features[:, :, :3].sum(
 74 |             dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
 75 |         features_relative = features[:, :, :3] - points_mean
 76 |         if self._with_distance:
 77 |             points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
 78 |             features = torch.cat(
 79 |                 [features, features_relative, points_dist], dim=-1)
 80 |         else:
 81 |             features = torch.cat([features, features_relative], dim=-1)
 82 |         voxel_count = features.shape[1]
 83 |         mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
 84 |         mask = torch.unsqueeze(mask, -1).type_as(features)
 85 |         # mask = features.max(dim=2, keepdim=True)[0] != 0
 86 |         x = self.vfe1(features)
 87 |         x *= mask
 88 |         x = self.vfe2(x)
 89 |         x *= mask
 90 |         x = self.linear(x)
 91 |         x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,
 92 |                                                                1).contiguous()
 93 |         x = F.relu(x)
 94 |         x *= mask
 95 |         # x: [concated_num_points, num_voxel_size, 128]
 96 |         voxelwise = torch.max(x, dim=1)[0]
 97 |         return voxelwise
 98 | 
 99 | class SimpleVoxel(nn.Module):
100 |     def __init__(self,
101 |                  num_input_features=4,
102 |                  use_norm=True,
103 |                  num_filters=[32, 128],
104 |                  with_distance=False,
105 |                  name='VoxelFeatureExtractor'):
106 |         super(SimpleVoxel, self).__init__()
107 |         self.name = name
108 |         self.num_input_features = num_input_features
109 | 
110 |     def forward(self, features, num_voxels):
111 |         #return features
112 |         # features: [concated_num_points, num_voxel_size, 3(4)]
113 |         # num_voxels: [concated_num_points]
114 |         points_mean = features[:, :, :self.num_input_features].sum(
115 |             dim=1, keepdim=False) / num_voxels.type_as(features).view(-1, 1)
116 |         return points_mean.contiguous()
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/mmdet/models/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_head import BBoxHead
2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead
3 | from ..single_stage_heads import PSWarpHead
4 | __all__ = ['BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'PSWarpHead']
5 | 


--------------------------------------------------------------------------------
/mmdet/models/builder.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner import obj_from_dict
 2 | from torch import nn
 3 | 
 4 | from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads,
 5 |                mask_heads, single_stage_heads)
 6 | 
 7 | __all__ = [
 8 |     'build_backbone', 'build_neck', 'build_rpn_head', 'build_roi_extractor',
 9 |     'build_bbox_head', 'build_mask_head', 'build_single_stage_head','build_detector',
10 | ]
11 | 
12 | 
13 | def _build_module(cfg, parrent=None, default_args=None):
14 |     return cfg if isinstance(cfg, nn.Module) else obj_from_dict(
15 |         cfg, parrent, default_args)
16 | 
17 | 
18 | def build(cfg, parrent=None, default_args=None):
19 |     if isinstance(cfg, list):
20 |         modules = [_build_module(cfg_, parrent, default_args) for cfg_ in cfg]
21 |         return nn.Sequential(*modules)
22 |     else:
23 |         return _build_module(cfg, parrent, default_args)
24 | 
25 | 
26 | def build_backbone(cfg):
27 |     return build(cfg, backbones)
28 | 
29 | 
30 | def build_neck(cfg):
31 |     return build(cfg, necks)
32 | 
33 | 
34 | def build_rpn_head(cfg):
35 |     return build(cfg, rpn_heads)
36 | 
37 | 
38 | def build_roi_extractor(cfg):
39 |     return build(cfg, roi_extractors)
40 | 
41 | 
42 | def build_bbox_head(cfg):
43 |     return build(cfg, bbox_heads)
44 | 
45 | 
46 | def build_mask_head(cfg):
47 |     return build(cfg, mask_heads)
48 | 
49 | 
50 | def build_single_stage_head(cfg):
51 |     return build(cfg, single_stage_heads)
52 | 
53 | 
54 | def build_detector(cfg, train_cfg=None, test_cfg=None):
55 |     from . import detectors
56 |     return build(cfg, detectors, dict(train_cfg=train_cfg, test_cfg=test_cfg))
57 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseDetector
2 | from .single_stage import SingleStageDetector
3 | from .rpn import RPN
4 | from .pointpillars import PointPillars
5 | 
6 | __all__ = [
7 |     'BaseDetector', 'SingleStageDetector', 'RPN', 'PointPillars',
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/base.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from abc import ABCMeta, abstractmethod
  3 | 
  4 | import mmcv
  5 | import numpy as np
  6 | import torch.nn as nn
  7 | 
  8 | from mmdet.core import tensor2imgs, get_classes
  9 | 
 10 | 
 11 | class BaseDetector(nn.Module):
 12 |     """Base class for detectors"""
 13 | 
 14 |     __metaclass__ = ABCMeta
 15 | 
 16 |     def __init__(self):
 17 |         super(BaseDetector, self).__init__()
 18 | 
 19 |     @property
 20 |     def with_neck(self):
 21 |         return hasattr(self, 'neck') and self.neck is not None
 22 | 
 23 |     @property
 24 |     def with_bbox(self):
 25 |         return hasattr(self, 'bbox_head') and self.bbox_head is not None
 26 | 
 27 |     @property
 28 |     def with_mask(self):
 29 |         return hasattr(self, 'mask_head') and self.mask_head is not None
 30 | 
 31 |     @abstractmethod
 32 |     def extract_feat(self, imgs):
 33 |         pass
 34 | 
 35 |     def extract_feats(self, imgs):
 36 |         assert isinstance(imgs, list)
 37 |         for img in imgs:
 38 |             yield self.extract_feat(img)
 39 | 
 40 |     @abstractmethod
 41 |     def forward_train(self, imgs, img_metas, **kwargs):
 42 |         pass
 43 | 
 44 |     @abstractmethod
 45 |     def simple_test(self, img, img_meta, **kwargs):
 46 |         pass
 47 | 
 48 |     @abstractmethod
 49 |     def aug_test(self, imgs, img_metas, **kwargs):
 50 |         pass
 51 | 
 52 |     def init_weights(self, pretrained=None):
 53 |         if pretrained is not None:
 54 |             logger = logging.getLogger()
 55 |             logger.info('load model from: {}'.format(pretrained))
 56 | 
 57 |     def forward_test(self, imgs, img_metas, **kwargs):
 58 |         for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
 59 |             if not isinstance(var, list):
 60 |                 raise TypeError('{} must be a list, but got {}'.format(
 61 |                     name, type(var)))
 62 | 
 63 |         num_augs = len(imgs)
 64 |         if num_augs != len(img_metas):
 65 |             raise ValueError(
 66 |                 'num of augmentations ({}) != num of image meta ({})'.format(
 67 |                     len(imgs), len(img_metas)))
 68 |         # TODO: remove the restriction of imgs_per_gpu == 1 when prepared
 69 |         imgs_per_gpu = imgs[0].size(0)
 70 |         assert imgs_per_gpu == 1
 71 | 
 72 |         if num_augs == 1:
 73 |             return self.simple_test(imgs[0], img_metas[0], **kwargs)
 74 |         else:
 75 |             return self.aug_test(imgs, img_metas, **kwargs)
 76 | 
 77 |     def forward(self, img, img_meta, return_loss=True, **kwargs):
 78 |         if return_loss:
 79 |             return self.forward_train(img, img_meta, **kwargs)
 80 |         else:
 81 |             return self.forward_test(img, img_meta, **kwargs)
 82 | 
 83 |     def show_result(self,
 84 |                     data,
 85 |                     result,
 86 |                     img_norm_cfg,
 87 |                     dataset='coco',
 88 |                     score_thr=0.3):
 89 |         img_tensor = data['img'][0]
 90 |         img_metas = data['img_meta'][0].data[0]
 91 |         imgs = tensor2imgs(img_tensor, **img_norm_cfg)
 92 |         assert len(imgs) == len(img_metas)
 93 | 
 94 |         if isinstance(dataset, str):
 95 |             class_names = get_classes(dataset)
 96 |         elif isinstance(dataset, list):
 97 |             class_names = dataset
 98 |         else:
 99 |             raise TypeError('dataset must be a valid dataset name or a list'
100 |                             ' of class names, not {}'.format(type(dataset)))
101 | 
102 |         for img, img_meta in zip(imgs, img_metas):
103 |             h, w, _ = img_meta['img_shape']
104 |             img_show = img[:h, :w, :]
105 |             labels = [
106 |                 np.full(bbox.shape[0], i, dtype=np.int32)
107 |                 for i, bbox in enumerate(result)
108 |             ]
109 |             labels = np.concatenate(labels)
110 |             bboxes = np.vstack(result)
111 |             mmcv.imshow_det_bboxes(
112 |                 img_show,
113 |                 bboxes,
114 |                 labels,
115 |                 class_names=class_names,
116 |                 score_thr=score_thr)
117 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/pointpillars.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import torch
  3 | from .. import builder
  4 | from mmcv.runner import load_checkpoint
  5 | from .base import BaseDetector
  6 | import torch.nn.functional as F
  7 | 
  8 | class PointPillars(BaseDetector):
  9 | 
 10 |     def __init__(self,
 11 |                  backbone,
 12 |                  neck,
 13 |                  rpn_head=None,
 14 |                  bbox_head=None,
 15 |                  rcnn_head=None,
 16 |                  train_cfg=None,
 17 |                  test_cfg=None,
 18 |                  pretrained=None):
 19 |         super(PointPillars, self).__init__()
 20 |         self.backbone = builder.build_backbone(backbone)
 21 |         self.neck = builder.build_neck(neck)
 22 |         self.bbox_head = builder.build_single_stage_head(bbox_head)
 23 | 
 24 |         if rpn_head is not None:
 25 |             self.rpn_head = builder.build_rpn_head(rpn_head)
 26 | 
 27 |         self.train_cfg = train_cfg
 28 |         self.test_cfg = test_cfg
 29 | 
 30 |         if rcnn_head is not None:
 31 |             self.rcnn_head = builder.build_bbox_head(rcnn_head)
 32 | 
 33 |         self.init_weights(pretrained=pretrained)
 34 | 
 35 |     @property
 36 |     def with_rpn(self):
 37 |         return hasattr(self, 'rpn_head') and self.rpn_head is not None
 38 | 
 39 |     def init_weights(self, pretrained=None):
 40 |         if isinstance(pretrained, str):
 41 |             logger = logging.getLogger()
 42 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
 43 |     def freeze_layers(self, model):
 44 |         for param in model.parameters():
 45 |             param.requires_grad = False
 46 | 
 47 |     def merge_second_batch(self, batch_args):
 48 |         ret = {}
 49 |         for key, elems in batch_args.items():
 50 |             if key in [
 51 |                 'voxels', 'num_points',
 52 |             ]:
 53 |                 ret[key] = torch.cat(elems, dim=0)
 54 |             elif key == 'coordinates':
 55 |                 coors = []
 56 |                 for i, coor in enumerate(elems):
 57 |                     coor_pad = F.pad(
 58 |                         coor, [1, 0, 0, 0],
 59 |                         mode='constant',
 60 |                         value=i)
 61 |                     coors.append(coor_pad)
 62 |                 ret[key] = torch.cat(coors, dim=0)
 63 |             elif key in [
 64 |                 'img_meta', 'gt_labels', 'gt_bboxes',
 65 |             ]:
 66 |                 ret[key] = elems
 67 |             else:
 68 |                 ret[key] = torch.stack(elems, dim=0)
 69 |         return ret
 70 | 
 71 |     def forward_train(self, img, img_meta, **kwargs):
 72 | 
 73 |         batch_size = len(img_meta)
 74 |         ret = self.merge_second_batch(kwargs)
 75 | 
 76 |         losses = dict()
 77 | 
 78 |         canvas = self.backbone(ret['voxels'], ret['coordinates'], ret['num_points'], batch_size)
 79 | 
 80 |         x = self.neck(canvas)
 81 | 
 82 |         bbox_outs = self.bbox_head(x)
 83 |         bbox_loss_inputs = bbox_outs + (ret['gt_bboxes'], ret['gt_labels'], ret['anchors'], ret['anchors_mask'], self.train_cfg)
 84 |         bbox_losses = self.bbox_head.loss(*bbox_loss_inputs)
 85 |         losses.update(bbox_losses)
 86 | 
 87 |         return losses
 88 | 
 89 |     def forward_test(self, img, img_meta, **kwargs):
 90 | 
 91 |         batch_size = len(img_meta)
 92 |         ret = self.merge_second_batch(kwargs)
 93 |         canvas = self.backbone(ret['voxels'], ret['coordinates'], ret['num_points'], batch_size)
 94 |         x = self.neck(canvas)
 95 | 
 96 |         rpn_outs = self.bbox_head.forward(x)
 97 |         proposal_inputs = rpn_outs + (ret['anchors'], ret['anchors_mask'], img_meta, self.test_cfg)
 98 | 
 99 |         return self.bbox_head.get_det_bboxes_nms(*proposal_inputs)
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/rpn.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from mmdet.core import tensor2imgs, bbox_mapping
 4 | from .base import BaseDetector
 5 | from .test_mixins import RPNTestMixin
 6 | from .. import builder
 7 | 
 8 | 
 9 | class RPN(BaseDetector, RPNTestMixin):
10 | 
11 |     def __init__(self,
12 |                  backbone,
13 |                  neck,
14 |                  rpn_head,
15 |                  train_cfg,
16 |                  test_cfg,
17 |                  pretrained=None):
18 |         super(RPN, self).__init__()
19 |         self.backbone = builder.build_backbone(backbone)
20 |         self.neck = builder.build_neck(neck) if neck is not None else None
21 |         self.rpn_head = builder.build_rpn_head(rpn_head)
22 |         self.train_cfg = train_cfg
23 |         self.test_cfg = test_cfg
24 |         self.init_weights(pretrained=pretrained)
25 | 
26 |     def init_weights(self, pretrained=None):
27 |         super(RPN, self).init_weights(pretrained)
28 |         self.backbone.init_weights(pretrained=pretrained)
29 |         if self.with_neck:
30 |             self.neck.init_weights()
31 |         self.rpn_head.init_weights()
32 | 
33 |     def extract_feat(self, img):
34 |         x = self.backbone(img)
35 |         if self.with_neck:
36 |             x = self.neck(x)
37 |         return x
38 | 
39 |     def forward_train(self, img, img_meta, gt_bboxes=None):
40 |         if self.train_cfg.rpn.get('debug', False):
41 |             self.rpn_head.debug_imgs = tensor2imgs(img)
42 | 
43 |         x = self.extract_feat(img)
44 |         rpn_outs = self.rpn_head(x)
45 | 
46 |         rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn)
47 |         losses = self.rpn_head.loss(*rpn_loss_inputs)
48 |         return losses
49 | 
50 |     def simple_test(self, img, img_meta, rescale=False):
51 |         x = self.extract_feat(img)
52 |         proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn)
53 |         if rescale:
54 |             for proposals, meta in zip(proposal_list, img_meta):
55 |                 proposals[:, :4] /= meta['scale_factor']
56 |         # TODO: remove this restriction
57 |         return proposal_list[0].cpu().numpy()
58 | 
59 |     def aug_test(self, imgs, img_metas, rescale=False):
60 |         proposal_list = self.aug_test_rpn(
61 |             self.extract_feats(imgs), img_metas, self.test_cfg.rpn)
62 |         if not rescale:
63 |             for proposals, img_meta in zip(proposal_list, img_metas[0]):
64 |                 img_shape = img_meta['img_shape']
65 |                 scale_factor = img_meta['scale_factor']
66 |                 flip = img_meta['flip']
67 |                 proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape,
68 |                                                 scale_factor, flip)
69 |         # TODO: remove this restriction
70 |         return proposal_list[0].cpu().numpy()
71 | 
72 |     def show_result(self, data, result, img_norm_cfg):
73 |         """Show RPN proposals on the image.
74 | 
75 |         Although we assume batch size is 1, this method supports arbitrary
76 |         batch size.
77 |         """
78 |         img_tensor = data['img'][0]
79 |         img_metas = data['img_meta'][0].data[0]
80 |         imgs = tensor2imgs(img_tensor, **img_norm_cfg)
81 |         assert len(imgs) == len(img_metas)
82 |         for img, img_meta in zip(imgs, img_metas):
83 |             h, w, _ = img_meta['img_shape']
84 |             img_show = img[:h, :w, :]
85 |             mmcv.imshow_bboxes(img_show, result, top_k=20)
86 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/single_stage.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import logging
  4 | from mmcv.runner import load_checkpoint
  5 | from .base import BaseDetector
  6 | from .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin
  7 | from .. import builder
  8 | from mmdet.core import (assign_and_sample, bbox2roi, rbbox2roi, bbox2result, multi_apply, kitti_bbox2results,\
  9 |                         tensor2points, delta2rbbox3d, weighted_binary_cross_entropy)
 10 | import torch.nn.functional as F
 11 | 
 12 | 
 13 | class SingleStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
 14 |                        MaskTestMixin):
 15 | 
 16 |     def __init__(self,
 17 |                  backbone,
 18 |                  neck=None,
 19 |                  bbox_head=None,
 20 |                  extra_head=None,
 21 |                  train_cfg=None,
 22 |                  test_cfg=None,
 23 |                  pretrained=None):
 24 |         super(SingleStageDetector, self).__init__()
 25 |         self.backbone = builder.build_backbone(backbone)
 26 | 
 27 |         if neck is not None:
 28 |             self.neck = builder.build_neck(neck)
 29 |         else:
 30 |             raise NotImplementedError
 31 | 
 32 |         if bbox_head is not None:
 33 |             self.rpn_head = builder.build_single_stage_head(bbox_head)
 34 | 
 35 |         if extra_head is not None:
 36 |             self.extra_head = builder.build_single_stage_head(extra_head)
 37 | 
 38 |         self.train_cfg = train_cfg
 39 |         self.test_cfg = test_cfg
 40 | 
 41 |         self.init_weights(pretrained)
 42 | 
 43 |     @property
 44 |     def with_rpn(self):
 45 |         return hasattr(self, 'rpn_head') and self.rpn_head is not None
 46 | 
 47 |     def init_weights(self, pretrained=None):
 48 |         if isinstance(pretrained, str):
 49 |             logger = logging.getLogger()
 50 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
 51 | 
 52 |     def merge_second_batch(self, batch_args):
 53 |         ret = {}
 54 |         for key, elems in batch_args.items():
 55 |             if key in ['voxels', 'num_points', ]:
 56 |                 ret[key] = torch.cat(elems, dim=0)
 57 |             elif key in ['coordinates', ]:
 58 |                 coors = []
 59 |                 for i, coor in enumerate(elems):
 60 |                     coor_pad = F.pad(
 61 |                         coor, [1, 0, 0, 0],
 62 |                         mode='constant',
 63 |                         value=i)
 64 |                     coors.append(coor_pad)
 65 |                 ret[key] = torch.cat(coors, dim=0)
 66 |             elif key in ['img_meta', 'gt_labels', 'gt_bboxes', 'gt_types', ]:
 67 |                 ret[key] = elems
 68 |             else:
 69 |                 if isinstance(elems, dict):
 70 |                     ret[key] = {k: torch.stack(v, dim=0) for k, v in elems.items()}
 71 |                 else:
 72 |                     ret[key] = torch.stack(elems, dim=0)
 73 |         return ret
 74 | 
 75 |     def forward_train(self, img, img_meta, **kwargs):
 76 | 
 77 |         batch_size = len(img_meta)
 78 | 
 79 |         ret = self.merge_second_batch(kwargs)
 80 | 
 81 |         vx = self.backbone(ret['voxels'], ret['num_points'])
 82 |         x, conv6, point_misc = self.neck(vx, ret['coordinates'], batch_size, is_test=False)
 83 | 
 84 |         losses = dict()
 85 | 
 86 |         aux_loss = self.neck.aux_loss(*point_misc, gt_bboxes=ret['gt_bboxes'])
 87 |         losses.update(aux_loss)
 88 | 
 89 |         # RPN forward and loss
 90 |         if self.with_rpn:
 91 |             rpn_outs = self.rpn_head(x)
 92 |             rpn_loss_inputs = rpn_outs + (ret['gt_bboxes'], ret['gt_labels'], ret['gt_types'],\
 93 |                             ret['anchors'], ret['anchors_mask'], self.train_cfg.rpn)
 94 |             rpn_losses = self.rpn_head.loss(*rpn_loss_inputs)
 95 |             losses.update(rpn_losses)
 96 |             guided_anchors, _ = self.rpn_head.get_guided_anchors(*rpn_outs, ret['anchors'],\
 97 |                         ret['anchors_mask'], ret['gt_bboxes'], ret['gt_labels'], thr=self.train_cfg.rpn.anchor_thr)
 98 |         else:
 99 |             raise NotImplementedError
100 | 
101 |         # bbox head forward and loss
102 |         if self.extra_head:
103 |             bbox_score = self.extra_head(conv6, guided_anchors)
104 |             refine_loss_inputs = (bbox_score, ret['gt_bboxes'], ret['gt_labels'], guided_anchors, self.train_cfg.extra)
105 |             refine_losses = self.extra_head.loss(*refine_loss_inputs)
106 |             losses.update(refine_losses)
107 | 
108 |         return losses
109 | 
110 |     def forward_test(self, img, img_meta, **kwargs):
111 | 
112 |         batch_size = len(img_meta)
113 | 
114 |         ret = self.merge_second_batch(kwargs)
115 | 
116 |         vx = self.backbone(ret['voxels'], ret['num_points'])
117 |         (x, conv6) = self.neck(vx, ret['coordinates'], batch_size, is_test=True)
118 | 
119 |         rpn_outs = self.rpn_head.forward(x)
120 | 
121 |         guided_anchors, anchor_labels = self.rpn_head.get_guided_anchors(*rpn_outs, ret['anchors'], ret['anchors_mask'],
122 |                                                                        None, None, thr=.1)
123 | 
124 |         bbox_score = self.extra_head(conv6, guided_anchors, is_test=True)
125 | 
126 |         det_bboxes, det_scores, det_labels = self.extra_head.get_rescore_bboxes(
127 |             guided_anchors, bbox_score, anchor_labels, img_meta, self.test_cfg.extra)
128 | 
129 |         results = [kitti_bbox2results(*param, class_names=self.class_names) for param in zip(det_bboxes, det_scores, det_labels, img_meta)]
130 | 
131 |         return results
132 | 
133 | 
134 | 
135 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/test_mixins.py:
--------------------------------------------------------------------------------
  1 | from mmdet.core import (bbox2roi, bbox_mapping, merge_aug_proposals,
  2 |                         merge_aug_bboxes, merge_aug_masks)
  3 | 
  4 | import numpy as np
  5 | 
  6 | class RPNTestMixin(object):
  7 | 
  8 |     def simple_test_rpn(self, x, img_meta, rpn_test_cfg):
  9 |         rpn_outs = self.rpn_head(x)
 10 |         proposal_inputs = rpn_outs + (img_meta, rpn_test_cfg)
 11 |         proposal_list = self.rpn_head.get_proposals(*proposal_inputs)
 12 |         return proposal_list
 13 | 
 14 |     def aug_test_rpn(self, feats, img_metas, rpn_test_cfg):
 15 |         imgs_per_gpu = len(img_metas[0])
 16 |         aug_proposals = [[] for _ in range(imgs_per_gpu)]
 17 |         for x, img_meta in zip(feats, img_metas):
 18 |             proposal_list = self.simple_test_rpn(x, img_meta, rpn_test_cfg)
 19 |             for i, proposals in enumerate(proposal_list):
 20 |                 aug_proposals[i].append(proposals)
 21 |         # after merging, proposals will be rescaled to the original image size
 22 |         merged_proposals = [
 23 |             merge_aug_proposals(proposals, img_meta, rpn_test_cfg)
 24 |             for proposals, img_meta in zip(aug_proposals, img_metas)
 25 |         ]
 26 |         return merged_proposals
 27 | 
 28 | 
 29 | class BBoxTestMixin(object):
 30 | 
 31 |     def simple_test_bboxes(self,
 32 |                            x,
 33 |                            img_meta,
 34 |                            proposals,
 35 |                            rcnn_test_cfg,
 36 |                            rescale=False):
 37 |         """Test only det bboxes without augmentation."""
 38 |         rois = bbox2roi(proposals)
 39 |         roi_feats = self.bbox_roi_extractor(
 40 |             x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
 41 |         cls_score, bbox_pred = self.bbox_head(roi_feats)
 42 |         img_shape = img_meta[0]['img_shape']
 43 |         scale_factor = img_meta[0]['scale_factor']
 44 |         det_bboxes, det_labels = self.bbox_head.get_det_bboxes_nms(
 45 |             rois,
 46 |             cls_score,
 47 |             bbox_pred,
 48 |             img_shape,
 49 |             scale_factor,
 50 |             rescale=rescale,
 51 |             cfg=rcnn_test_cfg)
 52 |         return det_bboxes, det_labels
 53 | 
 54 |     def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
 55 |         aug_bboxes = []
 56 |         aug_scores = []
 57 |         for x, img_meta in zip(feats, img_metas):
 58 |             # only one image in the batch
 59 |             img_shape = img_meta[0]['img_shape']
 60 |             scale_factor = img_meta[0]['scale_factor']
 61 |             flip = img_meta[0]['flip']
 62 |             # TODO more flexible
 63 |             proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
 64 |                                      scale_factor, flip)
 65 |             rois = bbox2roi([proposals])
 66 |             # recompute feature maps to save GPU memory
 67 |             roi_feats = self.bbox_roi_extractor(
 68 |                 x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
 69 |             cls_score, bbox_pred = self.bbox_head(roi_feats)
 70 |             bboxes, scores = self.bbox_head.get_det_bboxes_nms(
 71 |                 rois,
 72 |                 cls_score,
 73 |                 bbox_pred,
 74 |                 img_shape,
 75 |                 scale_factor,
 76 |                 rescale=False,
 77 |                 cfg=None)
 78 |             aug_bboxes.append(bboxes)
 79 |             aug_scores.append(scores)
 80 |         # after merging, bboxes will be rescaled to the original image size
 81 |         merged_bboxes, merged_scores = merge_aug_bboxes(
 82 |             aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
 83 |         det_bboxes, det_labels = multiclass_nms(
 84 |             merged_bboxes, merged_scores, rcnn_test_cfg.score_thr,
 85 |             rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img)
 86 |         return det_bboxes, det_labels
 87 | 
 88 | class MaskTestMixin(object):
 89 | 
 90 |     def simple_test_mask(self,
 91 |                          x,
 92 |                          img_meta,
 93 |                          det_bboxes,
 94 |                          det_labels,
 95 |                          rescale=False):
 96 |         # image shape of the first image in the batch (only one)
 97 |         ori_shape = img_meta[0]['ori_shape']
 98 |         scale_factor = img_meta[0]['scale_factor']
 99 |         if det_bboxes.shape[0] == 0:
100 |             segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
101 |         else:
102 |             # if det_bboxes is rescaled to the original image size, we need to
103 |             # rescale it back to the testing scale to obtain RoIs.
104 |             _bboxes = (det_bboxes[:, :4] * scale_factor
105 |                        if rescale else det_bboxes)
106 |             mask_rois = bbox2roi([_bboxes])
107 |             mask_feats = self.mask_roi_extractor(
108 |                 x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois)
109 |             mask_pred = self.mask_head(mask_feats)
110 |             segm_result = self.mask_head.get_seg_masks(
111 |                 mask_pred, _bboxes, det_labels, self.test_cfg.rcnn, ori_shape,
112 |                 scale_factor, rescale)
113 |         return segm_result
114 | 
115 |     def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels):
116 |         if det_bboxes.shape[0] == 0:
117 |             segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
118 |         else:
119 |             aug_masks = []
120 |             for x, img_meta in zip(feats, img_metas):
121 |                 img_shape = img_meta[0]['img_shape']
122 |                 scale_factor = img_meta[0]['scale_factor']
123 |                 flip = img_meta[0]['flip']
124 |                 _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
125 |                                        scale_factor, flip)
126 |                 mask_rois = bbox2roi([_bboxes])
127 |                 mask_feats = self.mask_roi_extractor(
128 |                     x[:len(self.mask_roi_extractor.featmap_strides)],
129 |                     mask_rois)
130 |                 mask_pred = self.mask_head(mask_feats)
131 |                 # convert to numpy array to save memory
132 |                 aug_masks.append(mask_pred.sigmoid().cpu().numpy())
133 |             merged_masks = merge_aug_masks(aug_masks, img_metas,
134 |                                            self.test_cfg.rcnn)
135 | 
136 |             ori_shape = img_metas[0][0]['ori_shape']
137 |             segm_result = self.mask_head.get_seg_masks(
138 |                 merged_masks,
139 |                 det_bboxes,
140 |                 det_labels,
141 |                 self.test_cfg.rcnn,
142 |                 ori_shape,
143 |                 scale_factor=1.0,
144 |                 rescale=False)
145 |         return segm_result
146 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcn_mask_head import FCNMaskHead
2 | 
3 | __all__ = ['FCNMaskHead']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/fcn_mask_head.py:
--------------------------------------------------------------------------------
  1 | import mmcv
  2 | import numpy as np
  3 | import pycocotools.mask as mask_util
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | from ..utils import ConvModule
  8 | from mmdet.core import mask_cross_entropy, mask_target
  9 | 
 10 | 
 11 | class FCNMaskHead(nn.Module):
 12 | 
 13 |     def __init__(self,
 14 |                  num_convs=4,
 15 |                  roi_feat_size=14,
 16 |                  in_channels=256,
 17 |                  conv_kernel_size=3,
 18 |                  conv_out_channels=256,
 19 |                  upsample_method='deconv',
 20 |                  upsample_ratio=2,
 21 |                  num_classes=81,
 22 |                  class_agnostic=False,
 23 |                  normalize=None):
 24 |         super(FCNMaskHead, self).__init__()
 25 |         if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']:
 26 |             raise ValueError(
 27 |                 'Invalid upsample method {}, accepted methods '
 28 |                 'are "deconv", "nearest", "bilinear"'.format(upsample_method))
 29 |         self.num_convs = num_convs
 30 |         self.roi_feat_size = roi_feat_size  # WARN: not used and reserved
 31 |         self.in_channels = in_channels
 32 |         self.conv_kernel_size = conv_kernel_size
 33 |         self.conv_out_channels = conv_out_channels
 34 |         self.upsample_method = upsample_method
 35 |         self.upsample_ratio = upsample_ratio
 36 |         self.num_classes = num_classes
 37 |         self.class_agnostic = class_agnostic
 38 |         self.normalize = normalize
 39 |         self.with_bias = normalize is None
 40 | 
 41 |         self.convs = nn.ModuleList()
 42 |         for i in range(self.num_convs):
 43 |             in_channels = (self.in_channels
 44 |                            if i == 0 else self.conv_out_channels)
 45 |             padding = (self.conv_kernel_size - 1) // 2
 46 |             self.convs.append(
 47 |                 ConvModule(
 48 |                     in_channels,
 49 |                     self.conv_out_channels,
 50 |                     3,
 51 |                     padding=padding,
 52 |                     normalize=normalize,
 53 |                     bias=self.with_bias))
 54 |         if self.upsample_method is None:
 55 |             self.upsample = None
 56 |         elif self.upsample_method == 'deconv':
 57 |             self.upsample = nn.ConvTranspose2d(
 58 |                 self.conv_out_channels,
 59 |                 self.conv_out_channels,
 60 |                 self.upsample_ratio,
 61 |                 stride=self.upsample_ratio)
 62 |         else:
 63 |             self.upsample = nn.Upsample(
 64 |                 scale_factor=self.upsample_ratio, mode=self.upsample_method)
 65 | 
 66 |         out_channels = 1 if self.class_agnostic else self.num_classes
 67 |         self.conv_logits = nn.Conv2d(self.conv_out_channels, out_channels, 1)
 68 |         self.relu = nn.ReLU(inplace=True)
 69 |         self.debug_imgs = None
 70 | 
 71 |     def init_weights(self):
 72 |         for m in [self.upsample, self.conv_logits]:
 73 |             if m is None:
 74 |                 continue
 75 |             nn.init.kaiming_normal_(
 76 |                 m.weight, mode='fan_out', nonlinearity='relu')
 77 |             nn.init.constant_(m.bias, 0)
 78 | 
 79 |     def forward(self, x):
 80 |         for conv in self.convs:
 81 |             x = conv(x)
 82 |         if self.upsample is not None:
 83 |             x = self.upsample(x)
 84 |             if self.upsample_method == 'deconv':
 85 |                 x = self.relu(x)
 86 |         mask_pred = self.conv_logits(x)
 87 |         return mask_pred
 88 | 
 89 |     def get_target(self, sampling_results, gt_masks, rcnn_train_cfg):
 90 |         pos_proposals = [res.pos_bboxes for res in sampling_results]
 91 |         pos_assigned_gt_inds = [
 92 |             res.pos_assigned_gt_inds for res in sampling_results
 93 |         ]
 94 |         mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds,
 95 |                                    gt_masks, rcnn_train_cfg)
 96 |         return mask_targets
 97 | 
 98 |     def loss(self, mask_pred, mask_targets, labels):
 99 |         loss = dict()
100 |         loss_mask = mask_cross_entropy(mask_pred, mask_targets, labels)
101 |         loss['loss_mask'] = loss_mask
102 |         return loss
103 | 
104 |     def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,
105 |                       ori_shape, scale_factor, rescale):
106 |         """Get segmentation masks from mask_pred and bboxes.
107 | 
108 |         Args:
109 |             mask_pred (Tensor or ndarray): shape (n, #class+1, h, w).
110 |                 For single-scale testing, mask_pred is the direct output of
111 |                 model, whose type is Tensor, while for multi-scale testing,
112 |                 it will be converted to numpy array outside of this method.
113 |             det_bboxes (Tensor): shape (n, 4/5)
114 |             det_labels (Tensor): shape (n, )
115 |             img_shape (Tensor): shape (3, )
116 |             rcnn_test_cfg (dict): rcnn testing config
117 |             ori_shape: original image size
118 | 
119 |         Returns:
120 |             list[list]: encoded masks
121 |         """
122 |         if isinstance(mask_pred, torch.Tensor):
123 |             mask_pred = mask_pred.sigmoid().cpu().numpy()
124 |         assert isinstance(mask_pred, np.ndarray)
125 | 
126 |         cls_segms = [[] for _ in range(self.num_classes - 1)]
127 |         bboxes = det_bboxes.cpu().numpy()[:, :4]
128 |         labels = det_labels.cpu().numpy() + 1
129 | 
130 |         if rescale:
131 |             img_h, img_w = ori_shape[:2]
132 |         else:
133 |             img_h = np.round(ori_shape[0] * scale_factor).astype(np.int32)
134 |             img_w = np.round(ori_shape[1] * scale_factor).astype(np.int32)
135 |             scale_factor = 1.0
136 | 
137 |         for i in range(bboxes.shape[0]):
138 |             bbox = (bboxes[i, :] / scale_factor).astype(np.int32)
139 |             label = labels[i]
140 |             w = max(bbox[2] - bbox[0] + 1, 1)
141 |             h = max(bbox[3] - bbox[1] + 1, 1)
142 | 
143 |             if not self.class_agnostic:
144 |                 mask_pred_ = mask_pred[i, label, :, :]
145 |             else:
146 |                 mask_pred_ = mask_pred[i, 0, :, :]
147 |             im_mask = np.zeros((img_h, img_w), dtype=np.uint8)
148 | 
149 |             bbox_mask = mmcv.imresize(mask_pred_, (w, h))
150 |             bbox_mask = (bbox_mask > rcnn_test_cfg.mask_thr_binary).astype(
151 |                 np.uint8)
152 |             im_mask[bbox[1]:bbox[1] + h, bbox[0]:bbox[0] + w] = bbox_mask
153 |             rle = mask_util.encode(
154 |                 np.array(im_mask[:, :, np.newaxis], order='F'))[0]
155 |             cls_segms[label - 1].append(rle)
156 | 
157 |         return cls_segms
158 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import FPN
2 | from .cmn import SpMiddleFHD
3 | from .rpn import RPN
4 | __all__ = ['FPN','SpMiddleFHD','RPN']
5 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/fpn.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | from ..utils import ConvModule
  4 | from ..utils import xavier_init
  5 | 
  6 | 
  7 | class FPN(nn.Module):
  8 | 
  9 |     def __init__(self,
 10 |                  in_channels,
 11 |                  out_channels,
 12 |                  num_outs,
 13 |                  start_level=0,
 14 |                  end_level=-1,
 15 |                  add_extra_convs=False,
 16 |                  normalize=None,
 17 |                  activation=None):
 18 |         super(FPN, self).__init__()
 19 |         assert isinstance(in_channels, list)
 20 |         self.in_channels = in_channels
 21 |         self.out_channels = out_channels
 22 |         self.num_ins = len(in_channels)
 23 |         self.num_outs = num_outs
 24 |         self.activation = activation
 25 |         self.with_bias = normalize is None
 26 | 
 27 |         if end_level == -1:
 28 |             self.backbone_end_level = self.num_ins
 29 |             assert num_outs >= self.num_ins - start_level
 30 |         else:
 31 |             # if end_level < inputs, no extra level is allowed
 32 |             self.backbone_end_level = end_level
 33 |             assert end_level <= len(in_channels)
 34 |             assert num_outs == end_level - start_level
 35 |         self.start_level = start_level
 36 |         self.end_level = end_level
 37 |         self.add_extra_convs = add_extra_convs
 38 | 
 39 |         self.lateral_convs = nn.ModuleList()
 40 |         self.fpn_convs = nn.ModuleList()
 41 | 
 42 |         for i in range(self.start_level, self.backbone_end_level):
 43 |             l_conv = ConvModule(
 44 |                 in_channels[i],
 45 |                 out_channels,
 46 |                 1,
 47 |                 normalize=normalize,
 48 |                 bias=self.with_bias,
 49 |                 activation=self.activation,
 50 |                 inplace=False)
 51 |             fpn_conv = ConvModule(
 52 |                 out_channels,
 53 |                 out_channels,
 54 |                 3,
 55 |                 padding=1,
 56 |                 normalize=normalize,
 57 |                 bias=self.with_bias,
 58 |                 activation=self.activation,
 59 |                 inplace=False)
 60 | 
 61 |             self.lateral_convs.append(l_conv)
 62 |             self.fpn_convs.append(fpn_conv)
 63 | 
 64 |             # lvl_id = i - self.start_level
 65 |             # setattr(self, 'lateral_conv{}'.format(lvl_id), l_conv)
 66 |             # setattr(self, 'fpn_conv{}'.format(lvl_id), fpn_conv)
 67 | 
 68 |         # add extra conv layers (e.g., RetinaNet)
 69 |         extra_levels = num_outs - self.backbone_end_level + self.start_level
 70 |         if add_extra_convs and extra_levels >= 1:
 71 |             for i in range(extra_levels):
 72 |                 in_channels = (self.in_channels[self.backbone_end_level - 1]
 73 |                                if i == 0 else out_channels)
 74 |                 extra_fpn_conv = ConvModule(
 75 |                     in_channels,
 76 |                     out_channels,
 77 |                     3,
 78 |                     stride=2,
 79 |                     padding=1,
 80 |                     normalize=normalize,
 81 |                     bias=self.with_bias,
 82 |                     activation=self.activation,
 83 |                     inplace=False)
 84 |                 self.fpn_convs.append(extra_fpn_conv)
 85 | 
 86 |     # default init_weights for conv(msra) and norm in ConvModule
 87 |     def init_weights(self):
 88 |         for m in self.modules():
 89 |             if isinstance(m, nn.Conv2d):
 90 |                 xavier_init(m, distribution='uniform')
 91 | 
 92 |     def forward(self, inputs):
 93 |         assert len(inputs) == len(self.in_channels)
 94 | 
 95 |         # build laterals
 96 |         laterals = [
 97 |             lateral_conv(inputs[i + self.start_level])
 98 |             for i, lateral_conv in enumerate(self.lateral_convs)
 99 |         ]
100 | 
101 |         # build top-down path
102 |         used_backbone_levels = len(laterals)
103 |         for i in range(used_backbone_levels - 1, 0, -1):
104 |             laterals[i - 1] += F.interpolate(
105 |                 laterals[i], scale_factor=2, mode='nearest')
106 | 
107 |         # build outputs
108 |         # part 1: from original levels
109 |         outs = [
110 |             self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)
111 |         ]
112 |         # part 2: add extra levels
113 |         if self.num_outs > len(outs):
114 |             # use max pool to get more levels on top of outputs
115 |             # (e.g., Faster R-CNN, Mask R-CNN)
116 |             if not self.add_extra_convs:
117 |                 for i in range(self.num_outs - used_backbone_levels):
118 |                     outs.append(F.max_pool2d(outs[-1], 1, stride=2))
119 |             # add conv layers on top of original feature maps (RetinaNet)
120 |             else:
121 |                 orig = inputs[self.backbone_end_level - 1]
122 |                 outs.append(self.fpn_convs[used_backbone_levels](orig))
123 |                 for i in range(used_backbone_levels + 1, self.num_outs):
124 |                     # BUG: we should add relu before each extra conv
125 |                     outs.append(self.fpn_convs[i](outs[-1]))
126 |         return tuple(outs)
127 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/rpn.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from torch import nn
  3 | from ..utils import Empty, change_default_args, Sequential
  4 | import torch
  5 | 
  6 | class RPNBase(nn.Module):
  7 |     def __init__(self,
  8 |                  use_norm=True,
  9 |                  layer_nums=(3, 5, 5),
 10 |                  layer_strides=(2, 2, 2),
 11 |                  num_filters=(128, 128, 256),
 12 |                  upsample_strides=(1, 2, 4),
 13 |                  num_upsample_filters=(256, 256, 256),
 14 |                  num_input_features=128):
 15 | 
 16 |         """upsample_strides support float: [0.25, 0.5, 1]
 17 |         if upsample_strides < 1, conv2d will be used instead of convtranspose2d.
 18 |         """
 19 |         super(RPNBase, self).__init__()
 20 |         self._layer_strides = layer_strides
 21 |         self._num_filters = num_filters
 22 |         self._layer_nums = layer_nums
 23 |         self._upsample_strides = upsample_strides
 24 |         self._num_upsample_filters = num_upsample_filters
 25 |         self._num_input_features = num_input_features
 26 |         self._use_norm = use_norm
 27 | 
 28 |         assert len(layer_strides) == len(layer_nums)
 29 |         assert len(num_filters) == len(layer_nums)
 30 |         assert len(num_upsample_filters) == len(upsample_strides)
 31 |         self._upsample_start_idx = len(layer_nums) - len(upsample_strides)
 32 |         must_equal_list = []
 33 |         for i in range(len(upsample_strides)):
 34 |             must_equal_list.append(upsample_strides[i] / np.prod(
 35 |                 layer_strides[:i + self._upsample_start_idx + 1]))
 36 |         for val in must_equal_list:
 37 |             assert val == must_equal_list[0]
 38 | 
 39 |         if use_norm:
 40 |             BatchNorm2d = change_default_args(
 41 |                 eps=1e-3, momentum=0.01)(nn.BatchNorm2d)
 42 |             Conv2d = change_default_args(bias=False)(nn.Conv2d)
 43 |             ConvTranspose2d = change_default_args(bias=False)(
 44 |                 nn.ConvTranspose2d)
 45 |         else:
 46 |             BatchNorm2d = Empty
 47 |             Conv2d = change_default_args(bias=True)(nn.Conv2d)
 48 |             ConvTranspose2d = change_default_args(bias=True)(
 49 |                 nn.ConvTranspose2d)
 50 | 
 51 |         in_filters = [num_input_features, *num_filters[:-1]]
 52 |         blocks = []
 53 |         deblocks = []
 54 | 
 55 |         for i, layer_num in enumerate(layer_nums):
 56 |             block, num_out_filters = self._make_layer(
 57 |                 in_filters[i],
 58 |                 num_filters[i],
 59 |                 layer_num,
 60 |                 stride=layer_strides[i])
 61 |             blocks.append(block)
 62 |             if i - self._upsample_start_idx >= 0:
 63 |                 stride = upsample_strides[i - self._upsample_start_idx]
 64 |                 if stride >= 1:
 65 |                     stride = np.round(stride).astype(np.int64)
 66 |                     deblock = nn.Sequential(
 67 |                         ConvTranspose2d(
 68 |                             num_out_filters,
 69 |                             num_upsample_filters[i - self._upsample_start_idx],
 70 |                             stride,
 71 |                             stride=stride),
 72 |                         BatchNorm2d(
 73 |                             num_upsample_filters[i - self._upsample_start_idx]),
 74 |                         nn.ReLU(),
 75 |                     )
 76 |                 else:
 77 |                     stride = np.round(1 / stride).astype(np.int64)
 78 |                     deblock = nn.Sequential(
 79 |                         Conv2d(
 80 |                             num_out_filters,
 81 |                             num_upsample_filters[i - self._upsample_start_idx],
 82 |                             stride,
 83 |                             stride=stride),
 84 |                         BatchNorm2d(
 85 |                             num_upsample_filters[i - self._upsample_start_idx]),
 86 |                         nn.ReLU(),
 87 |                     )
 88 |                 deblocks.append(deblock)
 89 | 
 90 |         self._num_out_filters = num_out_filters
 91 |         self.blocks = nn.ModuleList(blocks)
 92 |         self.deblocks = nn.ModuleList(deblocks)
 93 | 
 94 |     @property
 95 |     def downsample_factor(self):
 96 |         factor = np.prod(self._layer_strides)
 97 |         if len(self._upsample_strides) > 0:
 98 |             factor /= self._upsample_strides[-1]
 99 |         return factor
100 | 
101 |     def _make_layer(self, inplanes, planes, num_blocks, stride=1):
102 |         raise NotImplementedError
103 | 
104 |     def forward(self, x):
105 |         ups = []
106 |         stage_outputs = []
107 |         for i in range(len(self.blocks)):
108 |             x = self.blocks[i](x)
109 |             stage_outputs.append(x)
110 |             if i - self._upsample_start_idx >= 0:
111 |                 ups.append(self.deblocks[i - self._upsample_start_idx](x))
112 | 
113 |         if len(ups) > 0:
114 |             x = torch.cat(ups, dim=1)
115 | 
116 |         return x
117 | 
118 | class RPN(RPNBase):
119 |     def _make_layer(self, inplanes, planes, num_blocks, stride=1):
120 |         if self._use_norm:
121 |             BatchNorm2d = change_default_args(
122 |                 eps=1e-3, momentum=0.01)(nn.BatchNorm2d)
123 |             Conv2d = change_default_args(bias=False)(nn.Conv2d)
124 |             ConvTranspose2d = change_default_args(bias=False)(
125 |                 nn.ConvTranspose2d)
126 |         else:
127 |             BatchNorm2d = Empty
128 |             Conv2d = change_default_args(bias=True)(nn.Conv2d)
129 |             ConvTranspose2d = change_default_args(bias=True)(
130 |                 nn.ConvTranspose2d)
131 | 
132 |         block = Sequential(
133 |             nn.ZeroPad2d(1),
134 |             Conv2d(inplanes, planes, 3, stride=stride),
135 |             BatchNorm2d(planes),
136 |             nn.ReLU(),
137 |         )
138 |         for j in range(num_blocks):
139 |             block.add(Conv2d(planes, planes, 3, padding=1))
140 |             block.add(BatchNorm2d(planes))
141 |             block.add(nn.ReLU())
142 | 
143 |         return block, planes


--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .single_level import SingleRoIExtractor
2 | 
3 | __all__ = ['SingleRoIExtractor']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/single_level.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import torch
 3 | import torch.nn as nn
 4 | from mmdet.core import tensor2points
 5 | from mmdet import ops
 6 | import numpy as np
 7 | 
 8 | class SingleRoIExtractor(nn.Module):
 9 |     """Extract RoI features from a single level feature map.
10 | 
11 |     If there are mulitple input feature levels, each RoI is mapped to a level
12 |     according to its scale.
13 | 
14 |     Args:
15 |         roi_layer (dict): Specify RoI layer type and arguments.
16 |         out_channels (int): Output channels of RoI layers.
17 |         featmap_strides (int): Strides of input feature maps.
18 |         finest_scale (int): Scale threshold of mapping to level 0.
19 |     """
20 | 
21 |     def __init__(self,
22 |                  roi_layer,
23 |                  out_channels,
24 |                  featmap_strides,
25 |                  finest_scale=56):
26 |         super(SingleRoIExtractor, self).__init__()
27 |         self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)
28 |         self.out_channels = out_channels
29 |         self.featmap_strides = featmap_strides
30 |         self.finest_scale = finest_scale
31 | 
32 |     @property
33 |     def num_inputs(self):
34 |         """int: Input feature map levels."""
35 |         return len(self.featmap_strides)
36 | 
37 |     def init_weights(self):
38 |         pass
39 | 
40 |     def build_roi_layers(self, layer_cfg, featmap_strides):
41 |         cfg = layer_cfg.copy()
42 |         layer_type = cfg.pop('type')
43 |         assert hasattr(ops, layer_type)
44 |         layer_cls = getattr(ops, layer_type)
45 |         roi_layers = nn.ModuleList(
46 |             [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides])
47 |         return roi_layers
48 | 
49 |     def map_roi_levels(self, rois, num_levels):
50 |         """Map rois to corresponding feature levels by scales.
51 | 
52 |         - scale < finest_scale: level 0
53 |         - finest_scale <= scale < finest_scale * 2: level 1
54 |         - finest_scale * 2 <= scale < finest_scale * 4: level 2
55 |         - scale >= finest_scale * 4: level 3
56 | 
57 |         Args:
58 |             rois (Tensor): Input RoIs, shape (k, 5).
59 |             num_levels (int): Total level number.
60 | 
61 |         Returns:
62 |             Tensor: Level index (0-based) of each RoI, shape (k, )
63 |         """
64 |         scale = torch.sqrt(
65 |             (rois[:, 3] - rois[:, 1] + 1) * (rois[:, 4] - rois[:, 2] + 1))
66 |         target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6))
67 |         target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()
68 |         return target_lvls
69 | 
70 |     def forward(self, feats, rois):
71 |         if len(feats) == 1:
72 |             return self.roi_layers[0](feats[0], rois)
73 | 
74 |         out_size = self.roi_layers[0].out_size
75 |         num_levels = len(feats)
76 |         target_lvls = self.map_roi_levels(rois, num_levels)
77 |         roi_feats = torch.cuda.FloatTensor(rois.size()[0], self.out_channels,
78 |                                            out_size, out_size).fill_(0)
79 |         for i in range(num_levels):
80 |             inds = target_lvls == i
81 |             if inds.any():
82 |                 rois_ = rois[inds, :]
83 |                 roi_feats_t = self.roi_layers[i](feats[i], rois_)
84 |                 roi_feats[inds] += roi_feats_t
85 |         return roi_feats
86 | 
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/mmdet/models/rpn_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .rpn_head import RPNHead
2 | 
3 | __all__ = ['RPNHead']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/single_stage_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .retina_head import RetinaHead
2 | from .ssd_rotate_head import *
3 | 
4 | __all__ = ['RetinaHead', "SSDRotateHead"]
5 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .conv_module import ConvModule
 2 | from .norm import build_norm_layer
 3 | from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init,
 4 |                           bias_init_with_prob)
 5 | from .empty import Empty
 6 | from .sequential import Sequential
 7 | import inspect
 8 | import torch
 9 | 
10 | def get_paddings_indicator(actual_num, max_num, axis=0):
11 |     """Create boolean mask by actually number of a padded tensor.
12 |     Args:
13 |         actual_num ([type]): [description]
14 |         max_num ([type]): [description]
15 |     Returns:
16 |         [type]: [description]
17 |     """
18 | 
19 |     actual_num = torch.unsqueeze(actual_num, axis + 1)
20 |     # tiled_actual_num: [N, M, 1]
21 |     max_num_shape = [1] * len(actual_num.shape)
22 |     max_num_shape[axis + 1] = -1
23 |     max_num = torch.arange(
24 |         max_num, dtype=torch.int, device=actual_num.device).view(max_num_shape)
25 |     # tiled_actual_num: [[3,3,3,3,3], [4,4,4,4,4], [2,2,2,2,2]]
26 |     # tiled_max_num: [[0,1,2,3,4], [0,1,2,3,4], [0,1,2,3,4]]
27 |     paddings_indicator = actual_num.int() > max_num
28 |     # paddings_indicator shape: [batch_size, max_num]
29 |     return paddings_indicator
30 | 
31 | def get_pos_to_kw_map(func):
32 |     pos_to_kw = {}
33 |     fsig = inspect.signature(func)
34 |     pos = 0
35 |     for name, info in fsig.parameters.items():
36 |         if info.kind is info.POSITIONAL_OR_KEYWORD:
37 |             pos_to_kw[pos] = name
38 |         pos += 1
39 |     return pos_to_kw
40 | 
41 | def change_default_args(**kwargs):
42 |     def layer_wrapper(layer_class):
43 |         class DefaultArgLayer(layer_class):
44 |             def __init__(self, *args, **kw):
45 |                 pos_to_kw = get_pos_to_kw_map(layer_class.__init__)
46 |                 kw_to_pos = {kw: pos for pos, kw in pos_to_kw.items()}
47 |                 for key, val in kwargs.items():
48 |                     if key not in kw and kw_to_pos[key] > len(args):
49 |                         kw[key] = val
50 |                 super().__init__(*args, **kw)
51 | 
52 |         return DefaultArgLayer
53 | 
54 |     return layer_wrapper
55 | 
56 | def one_hot(tensor, depth, dim=-1, on_value=1.0, dtype=torch.float32):
57 |     tensor_onehot = torch.zeros(
58 |         *list(tensor.shape), depth, dtype=dtype, device=tensor.device)
59 |     tensor_onehot.scatter_(dim, tensor.unsqueeze(dim).long(), on_value)
60 |     return tensor_onehot
61 | 
62 | __all__ = [
63 |     'ConvModule', 'build_norm_layer', 'xavier_init', 'normal_init',
64 |     'uniform_init', 'kaiming_init', 'bias_init_with_prob','Empty',
65 |     'change_default_args','Sequential','one_hot', 'get_paddings_indicator'
66 | ]
67 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/conv_module.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import torch.nn as nn
 4 | from mmcv.cnn import kaiming_init, constant_init
 5 | 
 6 | from .norm import build_norm_layer
 7 | 
 8 | class ConvModule(nn.Module):
 9 | 
10 |     def __init__(self,
11 |                  in_channels,
12 |                  out_channels,
13 |                  kernel_size,
14 |                  stride=1,
15 |                  padding=0,
16 |                  dilation=1,
17 |                  groups=1,
18 |                  bias=True,
19 |                  normalize=None,
20 |                  activation='relu',
21 |                  inplace=True,
22 |                  activate_last=True):
23 |         super(ConvModule, self).__init__()
24 |         self.with_norm = normalize is not None
25 |         self.with_activatation = activation is not None
26 |         self.with_bias = bias
27 |         self.activation = activation
28 |         self.activate_last = activate_last
29 | 
30 |         if self.with_norm and self.with_bias:
31 |             warnings.warn('ConvModule has norm and bias at the same time')
32 | 
33 |         self.conv = nn.Conv2d(
34 |             in_channels,
35 |             out_channels,
36 |             kernel_size,
37 |             stride,
38 |             padding,
39 |             dilation,
40 |             groups,
41 |             bias=bias)
42 | 
43 |         self.in_channels = self.conv.in_channels
44 |         self.out_channels = self.conv.out_channels
45 |         self.kernel_size = self.conv.kernel_size
46 |         self.stride = self.conv.stride
47 |         self.padding = self.conv.padding
48 |         self.dilation = self.conv.dilation
49 |         self.transposed = self.conv.transposed
50 |         self.output_padding = self.conv.output_padding
51 |         self.groups = self.conv.groups
52 | 
53 |         if self.with_norm:
54 |             norm_channels = out_channels if self.activate_last else in_channels
55 |             self.norm = build_norm_layer(normalize, norm_channels)
56 | 
57 |         if self.with_activatation:
58 |             assert activation in ['relu'], 'Only ReLU supported.'
59 |             if self.activation == 'relu':
60 |                 self.activate = nn.ReLU(inplace=inplace)
61 | 
62 |         # Default using msra init
63 |         self.init_weights()
64 | 
65 |     def init_weights(self):
66 |         nonlinearity = 'relu' if self.activation is None else self.activation
67 |         kaiming_init(self.conv, nonlinearity=nonlinearity)
68 |         if self.with_norm:
69 |             constant_init(self.norm, 1, bias=0)
70 | 
71 |     def forward(self, x, activate=True, norm=True):
72 |         if self.activate_last:
73 |             x = self.conv(x)
74 |             if norm and self.with_norm:
75 |                 x = self.norm(x)
76 |             if activate and self.with_activatation:
77 |                 x = self.activate(x)
78 |         else:
79 |             if norm and self.with_norm:
80 |                 x = self.norm(x)
81 |             if activate and self.with_activatation:
82 |                 x = self.activate(x)
83 |             x = self.conv(x)
84 |         return x
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/empty.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Empty(torch.nn.Module):
 5 |     def __init__(self, *args, **kwargs):
 6 |         super(Empty, self).__init__()
 7 | 
 8 |     def forward(self, *args, **kwargs):
 9 |         if len(args) == 1:
10 |             return args[0]
11 |         elif len(args) == 0:
12 |             return None
13 |         return args


--------------------------------------------------------------------------------
/mmdet/models/utils/norm.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | norm_cfg = {'BN': nn.BatchNorm2d, 'SyncBN': None, 'GN': None}
 4 | 
 5 | 
 6 | def build_norm_layer(cfg, num_features):
 7 |     assert isinstance(cfg, dict) and 'type' in cfg
 8 |     cfg_ = cfg.copy()
 9 |     cfg_.setdefault('eps', 1e-5)
10 |     layer_type = cfg_.pop('type')
11 | 
12 |     if layer_type not in norm_cfg:
13 |         raise KeyError('Unrecognized norm type {}'.format(layer_type))
14 |     elif norm_cfg[layer_type] is None:
15 |         raise NotImplementedError
16 | 
17 |     return norm_cfg[layer_type](num_features, **cfg_)
18 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/sequential.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from collections import OrderedDict
 3 | class Sequential(torch.nn.Module):
 4 |     r"""A sequential container.
 5 |     Modules will be added to it in the order they are passed in the constructor.
 6 |     Alternatively, an ordered dict of modules can also be passed in.
 7 | 
 8 |     To make it easier to understand, given is a small example::
 9 | 
10 |         # Example of using Sequential
11 |         model = Sequential(
12 |                   nn.Conv2d(1,20,5),
13 |                   nn.ReLU(),
14 |                   nn.Conv2d(20,64,5),
15 |                   nn.ReLU()
16 |                 )
17 | 
18 |         # Example of using Sequential with OrderedDict
19 |         model = Sequential(OrderedDict([
20 |                   ('conv1', nn.Conv2d(1,20,5)),
21 |                   ('relu1', nn.ReLU()),
22 |                   ('conv2', nn.Conv2d(20,64,5)),
23 |                   ('relu2', nn.ReLU())
24 |                 ]))
25 | 
26 |         # Example of using Sequential with kwargs(python 3.6+)
27 |         model = Sequential(
28 |                   conv1=nn.Conv2d(1,20,5),
29 |                   relu1=nn.ReLU(),
30 |                   conv2=nn.Conv2d(20,64,5),
31 |                   relu2=nn.ReLU()
32 |                 )
33 |     """
34 | 
35 |     def __init__(self, *args, **kwargs):
36 |         super(Sequential, self).__init__()
37 |         if len(args) == 1 and isinstance(args[0], OrderedDict):
38 |             for key, module in args[0].items():
39 |                 self.add_module(key, module)
40 |         else:
41 |             for idx, module in enumerate(args):
42 |                 self.add_module(str(idx), module)
43 |         for name, module in kwargs.items():
44 |             if sys.version_info < (3, 6):
45 |                 raise ValueError("kwargs only supported in py36+")
46 |             if name in self._modules:
47 |                 raise ValueError("name exists.")
48 |             self.add_module(name, module)
49 | 
50 |     def __getitem__(self, idx):
51 |         if not (-len(self) <= idx < len(self)):
52 |             raise IndexError('index {} is out of range'.format(idx))
53 |         if idx < 0:
54 |             idx += len(self)
55 |         it = iter(self._modules.values())
56 |         for i in range(idx):
57 |             next(it)
58 |         return next(it)
59 | 
60 |     def __len__(self):
61 |         return len(self._modules)
62 | 
63 |     def add(self, module, name=None):
64 |         if name is None:
65 |             name = str(len(self._modules))
66 |             if name in self._modules:
67 |                 raise KeyError("name exists")
68 |         self.add_module(name, module)
69 | 
70 |     def forward(self, input):
71 |         # i = 0
72 |         for module in self._modules.values():
73 |             # print(i)
74 |             input = module(input)
75 |             # i += 1
76 |         return input


--------------------------------------------------------------------------------
/mmdet/models/utils/weight_init.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'):
 6 |     assert distribution in ['uniform', 'normal']
 7 |     if distribution == 'uniform':
 8 |         nn.init.xavier_uniform_(module.weight, gain=gain)
 9 |     else:
10 |         nn.init.xavier_normal_(module.weight, gain=gain)
11 |     if hasattr(module, 'bias'):
12 |         nn.init.constant_(module.bias, bias)
13 | 
14 | 
15 | def normal_init(module, mean=0, std=1, bias=0):
16 |     nn.init.normal_(module.weight, mean, std)
17 |     if hasattr(module, 'bias'):
18 |         nn.init.constant_(module.bias, bias)
19 | 
20 | 
21 | def uniform_init(module, a=0, b=1, bias=0):
22 |     nn.init.uniform_(module.weight, a, b)
23 |     if hasattr(module, 'bias'):
24 |         nn.init.constant_(module.bias, bias)
25 | 
26 | 
27 | def kaiming_init(module,
28 |                  mode='fan_out',
29 |                  nonlinearity='relu',
30 |                  bias=0,
31 |                  distribution='normal'):
32 |     assert distribution in ['uniform', 'normal']
33 |     if distribution == 'uniform':
34 |         nn.init.kaiming_uniform_(
35 |             module.weight, mode=mode, nonlinearity=nonlinearity)
36 |     else:
37 |         nn.init.kaiming_normal_(
38 |             module.weight, mode=mode, nonlinearity=nonlinearity)
39 |     if hasattr(module, 'bias'):
40 |         nn.init.constant_(module.bias, bias)
41 | 
42 | 
43 | def bias_init_with_prob(prior_prob):
44 |     """ initialize conv/fc bias value according to giving probablity"""
45 |     bias_init = float(-np.log((1 - prior_prob) / prior_prob))
46 |     return bias_init
47 | 


--------------------------------------------------------------------------------
/mmdet/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .points_op import pts_in_boxes3d
2 | __all__ = ['pts_in_boxes3d']
3 | 


--------------------------------------------------------------------------------
/mmdet/ops/iou3d/iou3d_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import mmdet.ops.iou3d.iou3d_cuda as iou3d_cuda
  3 | import math
  4 | 
  5 | def limit_period(val, offset=0.5, period=math.pi):
  6 |     return val - torch.floor(val / period + offset) * period
  7 | 
  8 | def boxes3d_to_near_torch(boxes3d):
  9 |     rboxes = boxes3d[:, [0, 1, 3, 4, 6]]
 10 |     """convert rotated bbox to nearest 'standing' or 'lying' bbox.
 11 |     Args:
 12 |         rboxes: [N, 5(x, y, xdim, ydim, rad)] rotated bboxes
 13 |     Returns:
 14 |         boxes_near: [N, 4(xmin, ymin, xmax, ymax)] nearest boxes
 15 |     """
 16 |     rots = rboxes[..., -1]
 17 |     rots_0_pi_div_2 = torch.abs(limit_period(rots, 0.5, math.pi))
 18 |     cond = (rots_0_pi_div_2 > math.pi / 4)[..., None]
 19 |     boxes_center = torch.where(cond, rboxes[:, [0, 1, 3, 2]], rboxes[:, :4])
 20 |     boxes_near = torch.cat([boxes_center[:, :2] - boxes_center[:, 2:] / 2, \
 21 |                         boxes_center[:, :2] + boxes_center[:, 2:] / 2], dim=-1)
 22 |     return boxes_near
 23 | 
 24 | def boxes_iou(bboxes1, bboxes2, mode='iou', eps=0.0):
 25 |     assert mode in ['iou', 'iof']
 26 | 
 27 |     rows = bboxes1.size(0)
 28 |     cols = bboxes2.size(0)
 29 | 
 30 |     if rows * cols == 0:
 31 |         return bboxes1.new(rows, cols)
 32 | 
 33 |     lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2])  # [rows, cols, 2]
 34 |     rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:])  # [rows, cols, 2]
 35 |     wh = (rb - lt + eps).clamp(min=0)  # [rows, cols, 2]
 36 |     overlap = wh[:, :, 0] * wh[:, :, 1]
 37 |     area1 = (bboxes1[:, 2] - bboxes1[:, 0] + eps) * (
 38 |         bboxes1[:, 3] - bboxes1[:, 1] + eps)
 39 |     if mode == 'iou':
 40 |         area2 = (bboxes2[:, 2] - bboxes2[:, 0] + eps) * (
 41 |             bboxes2[:, 3] - bboxes2[:, 1] + eps)
 42 |         ious = overlap / (area1[:, None] + area2 - overlap)
 43 |     else:
 44 |         ious = overlap / (area1[:, None])
 45 |     return ious
 46 | 
 47 | def boxes3d_to_bev_torch(boxes3d):
 48 |     """
 49 |     :param boxes3d: (N, 7) [x, y, z, h, w, l, ry]
 50 |     :return:
 51 |         boxes_bev: (N, 5) [x1, y1, x2, y2, ry]
 52 |     """
 53 |     boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5)))
 54 | 
 55 |     cu, cv = boxes3d[:, 0], boxes3d[:, 1]
 56 |     half_l, half_w = boxes3d[:, 3] / 2, boxes3d[:, 4] / 2
 57 |     boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w
 58 |     boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w
 59 |     boxes_bev[:, 4] = boxes3d[:, 6]
 60 |     return boxes_bev
 61 | 
 62 | def boxes_iou_bev(boxes_a, boxes_b):
 63 |     """
 64 |     :param boxes_a: (M, 5)
 65 |     :param boxes_b: (N, 5)
 66 |     :return:
 67 |         ans_iou: (M, N)
 68 |     """
 69 |     boxes_a_bev = boxes3d_to_bev_torch(boxes_a)
 70 |     boxes_b_bev = boxes3d_to_bev_torch(boxes_b)
 71 | 
 72 |     ans_iou = torch.cuda.FloatTensor(torch.Size((boxes_a_bev.shape[0], boxes_b_bev.shape[0]))).zero_()
 73 | 
 74 |     iou3d_cuda.boxes_iou_bev_gpu(boxes_a_bev.contiguous(), boxes_b_bev.contiguous(), ans_iou)
 75 | 
 76 |     return ans_iou
 77 | 
 78 | 
 79 | def boxes_iou3d_gpu(boxes_a, boxes_b):
 80 |     """
 81 |     :param boxes_a: (N, 7) [x, y, z, h, w, l, ry]
 82 |     :param boxes_b: (M, 7) [x, y, z, h, w, l, ry]
 83 |     :return:
 84 |         ans_iou: (M, N)
 85 |     """
 86 |     boxes_a_bev = boxes3d_to_bev_torch(boxes_a)
 87 |     boxes_b_bev = boxes3d_to_bev_torch(boxes_b)
 88 | 
 89 |     # bev overlap
 90 |     overlaps_bev = torch.cuda.FloatTensor(torch.Size((boxes_a.shape[0], boxes_b.shape[0]))).zero_()  # (N, M)
 91 |     iou3d_cuda.boxes_overlap_bev_gpu(boxes_a_bev.contiguous(), boxes_b_bev.contiguous(), overlaps_bev)
 92 | 
 93 |     # height overlap
 94 |     boxes_a_height_max = (boxes_a[:, 2] + boxes_a[:, 5]).view(-1, 1)
 95 |     boxes_a_height_min = boxes_a[:, 2].view(-1, 1)
 96 |     boxes_b_height_max = (boxes_b[:, 2] + boxes_b[:, 5]).view(1, -1)
 97 |     boxes_b_height_min = boxes_b[:, 2].view(1, -1)
 98 | 
 99 |     max_of_min = torch.max(boxes_a_height_min, boxes_b_height_min)
100 |     min_of_max = torch.min(boxes_a_height_max, boxes_b_height_max)
101 |     overlaps_h = torch.clamp(min_of_max - max_of_min, min=0)
102 | 
103 |     # 3d iou
104 |     overlaps_3d = overlaps_bev * overlaps_h
105 | 
106 |     vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).view(-1, 1)
107 |     vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).view(1, -1)
108 | 
109 |     iou3d = overlaps_3d / torch.clamp(vol_a + vol_b - overlaps_3d, min=1e-7)
110 | 
111 |     return iou3d
112 | 
113 | 
114 | def nms_gpu(boxes, scores, thresh):
115 |     """
116 |     :param boxes: (N, 5) [x1, y1, x2, y2, ry]
117 |     :param scores: (N)
118 |     :param thresh:
119 |     :return:
120 |     """
121 |     # areas = (x2 - x1) * (y2 - y1)
122 |     order = scores.sort(0, descending=True)[1]
123 | 
124 |     boxes = boxes[order].contiguous()
125 | 
126 |     keep = torch.LongTensor(boxes.size(0))
127 |     num_out = iou3d_cuda.nms_gpu(boxes, keep, thresh)
128 |     return order[keep[:num_out].cuda()].contiguous()
129 | 
130 | def nms_normal_gpu(boxes, scores, thresh):
131 |     """
132 |     :param boxes: (N, 5) [x1, y1, x2, y2, ry]
133 |     :param scores: (N)
134 |     :param thresh:
135 |     :return:
136 |     """
137 |     # areas = (x2 - x1) * (y2 - y1)
138 |     order = scores.sort(0, descending=True)[1]
139 | 
140 |     boxes = boxes[order].contiguous()
141 | 
142 |     keep = torch.LongTensor(boxes.size(0))
143 |     num_out = iou3d_cuda.nms_normal_gpu(boxes, keep, thresh)
144 |     return order[keep[:num_out].cuda()].contiguous()
145 | 
146 | class RotateIou2dSimilarity(object):
147 |     """Class to compute similarity based on Intersection over Union (IOU) metric.
148 | 
149 |     This class computes pairwise similarity between two BoxLists based on IOU.
150 |     """
151 |     def __call__(self, boxes1, boxes2):
152 |         return boxes_iou_bev(boxes1, boxes2)
153 | 
154 | class RotateIou3dSimilarity(object):
155 |     """Class to compute similarity based on Intersection over Union (IOU) metric.
156 | 
157 |     This class computes pairwise similarity between two BoxLists based on IOU.
158 |     """
159 |     def __call__(self, boxes1, boxes2):
160 |         return boxes_iou3d_gpu(boxes1, boxes2)
161 | 
162 | 
163 | class NearestIouSimilarity(object):
164 |     """Class to compute similarity based on the squared distance metric.
165 | 
166 |     This class computes pairwise similarity between two BoxLists based on the
167 |     negative squared distance metric.
168 |     """
169 | 
170 |     def __call__(self, boxes1, boxes2):
171 |         """Compute matrix of (negated) sq distances.
172 | 
173 |         Args:
174 |           boxlist1: BoxList holding N boxes.
175 |           boxlist2: BoxList holding M boxes.
176 | 
177 |         Returns:
178 |           A tensor with shape [N, M] representing negated pairwise squared distance.
179 |         """
180 | 
181 |         boxes1_near = boxes3d_to_near_torch(boxes1)
182 |         boxes2_near = boxes3d_to_near_torch(boxes2)
183 |         return boxes_iou(boxes1_near, boxes2_near)
184 | 
185 | if __name__ == '__main__':
186 |     pass


--------------------------------------------------------------------------------
/mmdet/ops/iou3d/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='iou3d',
 6 |     ext_modules=[
 7 |         CUDAExtension('iou3d_cuda', [
 8 |             'src/iou3d.cpp',
 9 |             'src/iou3d_kernel.cu',
10 |         ],
11 |         extra_compile_args={'cxx': ['-g'],
12 |                             'nvcc': ['-O2']})
13 |     ],
14 |     cmdclass={'build_ext': BuildExtension})
15 | 


--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/pointnet2_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from torch.autograd import Function
 4 | import torch.nn as nn
 5 | from typing import Tuple
 6 | 
 7 | import mmdet.ops.pointnet2.pointnet2_cuda as pointnet2
 8 | 
 9 | class ThreeNN(Function):
10 | 
11 |     @staticmethod
12 |     def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
13 |         """
14 |         Find the three nearest neighbors of unknown in known
15 |         :param ctx:
16 |         :param unknown: (N, 3)
17 |         :param known: (M, 3)
18 |         :return:
19 |             dist: (N, 3) l2 distance to the three nearest neighbors
20 |             idx: (N, 3) index of 3 nearest neighbors
21 |         """
22 |         assert unknown.is_contiguous()
23 |         assert known.is_contiguous()
24 | 
25 |         N, _ = unknown.size()
26 |         m = known.size(0)
27 |         dist2 = torch.cuda.FloatTensor(N, 3)
28 |         idx = torch.cuda.IntTensor(N, 3)
29 | 
30 |         pointnet2.three_nn_wrapper(N, m, unknown, known, dist2, idx)
31 |         return torch.sqrt(dist2), idx
32 | 
33 |     @staticmethod
34 |     def backward(ctx, a=None, b=None):
35 |         return None, None
36 | 
37 | 
38 | three_nn = ThreeNN.apply
39 | 
40 | 
41 | class ThreeInterpolate(Function):
42 | 
43 |     @staticmethod
44 |     def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
45 |         """
46 |         Performs weight linear interpolation on 3 features
47 |         :param ctx:
48 |         :param features: (M, C) Features descriptors to be interpolated from
49 |         :param idx: (n, 3) three nearest neighbors of the target features in features
50 |         :param weight: (n, 3) weights
51 |         :return:
52 |             output: (N, C) tensor of the interpolated features
53 |         """
54 |         assert features.is_contiguous()
55 |         assert idx.is_contiguous()
56 |         assert weight.is_contiguous()
57 | 
58 |         m, c = features.size()
59 |         n = idx.size(0)
60 |         ctx.three_interpolate_for_backward = (idx, weight, m)
61 |         output = torch.cuda.FloatTensor(n, c)
62 | 
63 |         pointnet2.three_interpolate_wrapper(c, m, n, features, idx, weight, output)
64 |         return output
65 | 
66 |     @staticmethod
67 |     def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
68 |         """
69 |         :param ctx:
70 |         :param grad_out: (N, C) tensor with gradients of outputs
71 |         :return:
72 |             grad_features: (M, C) tensor with gradients of features
73 |             None:
74 |             None:
75 |         """
76 |         idx, weight, m = ctx.three_interpolate_for_backward
77 |         n, c = grad_out.size()
78 | 
79 |         grad_features = Variable(torch.cuda.FloatTensor(m, c).zero_())
80 |         grad_out_data = grad_out.data.contiguous()
81 | 
82 |         pointnet2.three_interpolate_grad_wrapper( c, n, m, grad_out_data, idx, weight, grad_features.data)
83 |         return grad_features, None, None
84 | 
85 | 
86 | three_interpolate = ThreeInterpolate.apply
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='pointnet2',
 6 |     ext_modules=[
 7 |         CUDAExtension('pointnet2_cuda', [
 8 |             'src/pointnet2_api.cpp',
 9 |             'src/interpolate.cpp', 
10 |             'src/interpolate_gpu.cu',
11 |         ],
12 |         extra_compile_args={'cxx': ['-g'],
13 |                             'nvcc': ['-O2']})
14 |     ],
15 |     cmdclass={'build_ext': BuildExtension}
16 | )
17 | 


--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/src/cuda_utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CUDA_UTILS_H
 2 | #define _CUDA_UTILS_H
 3 | 
 4 | #include <cmath>
 5 | 
 6 | #define TOTAL_THREADS 1024
 7 | #define THREADS_PER_BLOCK 256
 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 9 | 
10 | inline int opt_n_threads(int work_size) {
11 |     const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
12 | 
13 |     return max(min(1 << pow_2, TOTAL_THREADS), 1);
14 | }
15 | #endif
16 | 


--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/src/interpolate.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <vector>
 3 | #include <THC/THC.h>
 4 | #include <math.h>
 5 | #include <stdio.h>
 6 | #include <stdlib.h>
 7 | #include <cuda.h>
 8 | #include <cuda_runtime_api.h>
 9 | #include "interpolate_gpu.h"
10 | 
11 | extern THCState *state;
12 | 
13 | void three_nn_wrapper_fast(int n, int m, at::Tensor unknown_tensor,
14 |     at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) {
15 |     const float *unknown = unknown_tensor.data<float>();
16 |     const float *known = known_tensor.data<float>();
17 |     float *dist2 = dist2_tensor.data<float>();
18 |     int *idx = idx_tensor.data<int>();
19 | 
20 |     cudaStream_t stream = THCState_getCurrentStream(state);
21 |     three_nn_kernel_launcher_fast(n, m, unknown, known, dist2, idx, stream);
22 | }
23 | 
24 | 
25 | void three_interpolate_wrapper_fast(int c, int m, int n,
26 |                          at::Tensor points_tensor,
27 |                          at::Tensor idx_tensor,
28 |                          at::Tensor weight_tensor,
29 |                          at::Tensor out_tensor) {
30 | 
31 |     const float *points = points_tensor.data<float>();
32 |     const float *weight = weight_tensor.data<float>();
33 |     float *out = out_tensor.data<float>();
34 |     const int *idx = idx_tensor.data<int>();
35 | 
36 |     cudaStream_t stream = THCState_getCurrentStream(state);
37 |     three_interpolate_kernel_launcher_fast(c, m, n, points, idx, weight, out, stream);
38 | }
39 | 
40 | void three_interpolate_grad_wrapper_fast(int c, int n, int m,
41 |                             at::Tensor grad_out_tensor,
42 |                             at::Tensor idx_tensor,
43 |                             at::Tensor weight_tensor,
44 |                             at::Tensor grad_points_tensor) {
45 | 
46 |     const float *grad_out = grad_out_tensor.data<float>();
47 |     const float *weight = weight_tensor.data<float>();
48 |     float *grad_points = grad_points_tensor.data<float>();
49 |     const int *idx = idx_tensor.data<int>();
50 | 
51 |     cudaStream_t stream = THCState_getCurrentStream(state);
52 |     three_interpolate_grad_kernel_launcher_fast(c, n, m, grad_out, idx, weight, grad_points, stream);
53 | }


--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/src/interpolate_gpu.cu:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | 
  5 | #include "cuda_utils.h"
  6 | #include "interpolate_gpu.h"
  7 | 
  8 | 
  9 | __global__ void three_nn_kernel_fast(int n, int m, const float *__restrict__ unknown,
 10 |     const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) {
 11 |     // unknown: (N, 4)
 12 |     // known: (M, 4)
 13 |     // output:
 14 |     //      dist2: (N, 3)
 15 |     //      idx: (N, 3)
 16 | 
 17 | 
 18 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 19 |     if (pt_idx >= n) return;
 20 | 
 21 |     unknown += pt_idx * 4;
 22 | 
 23 |     dist2 += pt_idx * 3;
 24 |     idx += pt_idx * 3;
 25 | 
 26 |     float ub = unknown[0];
 27 |     float ux = unknown[1];
 28 |     float uy = unknown[2];
 29 |     float uz = unknown[3];
 30 | 
 31 |     double best1 = 1e40, best2 = 1e40, best3 = 1e40;
 32 |     int besti1 = 0, besti2 = 0, besti3 = 0;
 33 |     for (int k = 0; k < m; ++k) {
 34 |         float b = known[k * 4 + 0]; //batch number
 35 |         if (b!=ub)
 36 |             continue;
 37 |         float x = known[k * 4 + 1];
 38 |         float y = known[k * 4 + 2];
 39 |         float z = known[k * 4 + 3];
 40 |         float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
 41 |         if (d < best1) {
 42 |             best3 = best2; besti3 = besti2;
 43 |             best2 = best1; besti2 = besti1;
 44 |             best1 = d; besti1 = k;
 45 |         }
 46 |         else if (d < best2) {
 47 |             best3 = best2; besti3 = besti2;
 48 |             best2 = d; besti2 = k;
 49 |         }
 50 |         else if (d < best3) {
 51 |             best3 = d; besti3 = k;
 52 |         }
 53 |     }
 54 |     dist2[0] = best1; dist2[1] = best2; dist2[2] = best3;
 55 |     idx[0] = besti1; idx[1] = besti2; idx[2] = besti3;
 56 | }
 57 | 
 58 | void three_nn_kernel_launcher_fast(int n, int m, const float *unknown,
 59 |     const float *known, float *dist2, int *idx, cudaStream_t stream) {
 60 |     // unknown: (N, 4)
 61 |     // known: (M, 4)
 62 |     // output: 
 63 |     //      dist2: (N, 3)
 64 |     //      idx: (N, 3)
 65 | 
 66 |     cudaError_t err;
 67 |     dim3 blocks(DIVUP(n, THREADS_PER_BLOCK));  // blockIdx.x(col), blockIdx.y(row)
 68 |     dim3 threads(THREADS_PER_BLOCK);
 69 | 
 70 |     three_nn_kernel_fast<<<blocks, threads, 0, stream>>>(n, m, unknown, known, dist2, idx);
 71 | 
 72 |     err = cudaGetLastError();
 73 |     if (cudaSuccess != err) {
 74 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
 75 |         exit(-1);
 76 |     }
 77 | }
 78 | 
 79 | 
 80 | __global__ void three_interpolate_kernel_fast(int c, int m, int n, const float *__restrict__ points,
 81 |     const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) {
 82 |     // points: (M, C)
 83 |     // idx: (N, 3)
 84 |     // weight: (N, 3)
 85 |     // output:
 86 |     //      out: (N, C)
 87 | 
 88 | 
 89 |     int c_idx = blockIdx.y;
 90 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
 91 | 
 92 |     if (c_idx >= c || pt_idx >= n) return;
 93 | 
 94 |     weight += pt_idx * 3;
 95 |     //points += c_idx * m;
 96 | 
 97 |     idx += pt_idx * 3;
 98 | 
 99 |     out += pt_idx * c;
100 | 
101 |     out[c_idx] = weight[0] * points[idx[0] * c + c_idx] + weight[1] * points[idx[1] * c + c_idx] + weight[2] * points[idx[2] * c + c_idx];
102 | }
103 | 
104 | void three_interpolate_kernel_launcher_fast(int c, int m, int n,
105 |     const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream) {
106 |    // points: (M, C)
107 |     // idx: (N, 3)
108 |     // weight: (N, 3)
109 |     // output:
110 |     //      out: (N, C)
111 | 
112 |     cudaError_t err;
113 |     dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c);  // blockIdx.x(col), blockIdx.y(row)
114 |     dim3 threads(THREADS_PER_BLOCK);
115 |     three_interpolate_kernel_fast<<<blocks, threads, 0, stream>>>(c, m, n, points, idx, weight, out);
116 | 
117 |     err = cudaGetLastError();
118 |     if (cudaSuccess != err) {
119 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
120 |         exit(-1);
121 |     }
122 | }
123 | 
124 | __global__ void three_interpolate_grad_kernel_fast(int c, int n, int m, const float *__restrict__ grad_out,
125 |     const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ grad_points) {
126 |     // grad_out: (N, C)
127 |     // weight: (N, 3)
128 |     // idx: (N, 3)
129 |     // output:
130 |     //      grad_points: (M, C)
131 | 
132 | 
133 |     int c_idx = blockIdx.y;
134 |     int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
135 | 
136 |     if (c_idx >= c || pt_idx >= n) return;
137 |     
138 |     grad_out += pt_idx * c + c_idx;
139 |     weight += pt_idx * 3;
140 |     //grad_points += c_idx * m;
141 |     idx += pt_idx * 3;
142 | 
143 |     atomicAdd(grad_points + idx[0] * c + c_idx, grad_out[0] * weight[0]);
144 |     atomicAdd(grad_points + idx[1] * c + c_idx, grad_out[0] * weight[1]);
145 |     atomicAdd(grad_points + idx[2] * c + c_idx, grad_out[0] * weight[2]);
146 | }
147 | 
148 | void three_interpolate_grad_kernel_launcher_fast(int c, int n, int m, const float *grad_out,
149 |     const int *idx, const float *weight, float *grad_points, cudaStream_t stream) {
150 |     // grad_out: (N, C)
151 |     // weight: (N, 3)
152 |     // idx: (N, 3)
153 |     // output:
154 |     //      grad_points: (M, C)
155 | 
156 |     cudaError_t err;
157 |     dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c);  // blockIdx.x(col), blockIdx.y(row)
158 |     dim3 threads(THREADS_PER_BLOCK);
159 |     three_interpolate_grad_kernel_fast<<<blocks, threads, 0, stream>>>(c, n, m, grad_out, idx, weight, grad_points);
160 | 
161 |     err = cudaGetLastError();
162 |     if (cudaSuccess != err) {
163 |         fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
164 |         exit(-1);
165 |     }
166 | }


--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/src/interpolate_gpu.h:
--------------------------------------------------------------------------------
 1 | #ifndef _INTERPOLATE_GPU_H
 2 | #define _INTERPOLATE_GPU_H
 3 | 
 4 | #include <torch/serialize/tensor.h>
 5 | #include<vector>
 6 | #include <cuda.h>
 7 | #include <cuda_runtime_api.h>
 8 | 
 9 | 
10 | void three_nn_wrapper_fast(int n, int m, at::Tensor unknown_tensor,
11 |   at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor);
12 | 
13 | void three_nn_kernel_launcher_fast(int n, int m, const float *unknown,
14 | 	const float *known, float *dist2, int *idx, cudaStream_t stream);
15 | 
16 | 
17 | void three_interpolate_wrapper_fast(int c, int m, int n, at::Tensor points_tensor,
18 |     at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor);
19 | 
20 | void three_interpolate_kernel_launcher_fast(int c, int m, int n,
21 |     const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream);
22 | 
23 | 
24 | void three_interpolate_grad_wrapper_fast(int c, int n, int m, at::Tensor grad_out_tensor,
25 |     at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor);
26 | 
27 | void three_interpolate_grad_kernel_launcher_fast(int c, int n, int m, const float *grad_out,
28 |     const int *idx, const float *weight, float *grad_points, cudaStream_t stream);
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/mmdet/ops/pointnet2/src/pointnet2_api.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/serialize/tensor.h>
 2 | #include <torch/extension.h>
 3 | 
 4 | #include "interpolate_gpu.h"
 5 | 
 6 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 7 |     m.def("three_nn_wrapper", &three_nn_wrapper_fast, "three_nn_wrapper_fast");
 8 |     m.def("three_interpolate_wrapper", &three_interpolate_wrapper_fast, "three_interpolate_wrapper_fast");
 9 |     m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_fast, "three_interpolate_grad_wrapper_fast");
10 | }
11 | 


--------------------------------------------------------------------------------
/mmdet/ops/points_op/__init__.py:
--------------------------------------------------------------------------------
 1 | from .points_ops import *
 2 | from mmdet.ops.points_op import points_op_cpu
 3 | import torch
 4 | 
 5 | def pts_in_boxes3d(pts, boxes3d):
 6 |     N = len(pts)
 7 |     M = len(boxes3d)
 8 |     pts_in_flag = torch.IntTensor(M, N).fill_(0)
 9 |     reg_target = torch.FloatTensor(N, 3).fill_(0)
10 |     points_op_cpu.pts_in_boxes3d(pts.contiguous(), boxes3d.contiguous(), pts_in_flag, reg_target)
11 |     return pts_in_flag, reg_target
12 | 
13 | 


--------------------------------------------------------------------------------
/mmdet/ops/points_op/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension
 3 | 
 4 | class get_pybind_include(object):
 5 |     """Helper class to determine the pybind11 include path
 6 |     The purpose of this class is to postpone importing pybind11
 7 |     until it is actually installed, so that the ``get_include()``
 8 |     method can be invoked. """
 9 | 
10 |     def __init__(self, user=False):
11 |         self.user = user
12 | 
13 |     def __str__(self):
14 |         import pybind11
15 |         return pybind11.get_include(self.user)
16 | 
17 | ext_modules = [
18 |     CppExtension(
19 |         name='points_op_cpu',
20 |         sources = ['src/points_op.cpp'],
21 |         extra_compile_args=['-g'],
22 |         include_dirs=[
23 |             # Path to pybind11 headers
24 |             get_pybind_include(),
25 |             get_pybind_include(user=True)
26 |         ],
27 |     ),
28 | ]
29 | 
30 | setup(
31 |     name='cpplib',
32 |     ext_modules=ext_modules,
33 |     cmdclass={
34 |         'build_ext': BuildExtension
35 |     })
36 | 
37 | 


--------------------------------------------------------------------------------
/mmdet/version.py:
--------------------------------------------------------------------------------
1 | # GENERATED VERSION FILE
2 | # TIME: Thu Mar  7 20:30:16 2019
3 | 
4 | __version__ = '0.5.4+a6ee053'
5 | short_version = '0.5.4'
6 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | ## SA-SSD: Structure Aware Single-stage 3D Object Detection from Point Cloud (CVPR 2020) [\[paper\]](https://www4.comp.polyu.edu.hk/~cslzhang/paper/SA-SSD.pdf)
  2 | Currently 1st place in KITTI BEV and 3rd in KITTI 3D. The detector can run at 25 FPS. 
  3 | 
  4 | **Authors**: [Chenhang He](https://github.com/skyhehe123), [Zeng Hui](https://github.com/HuiZeng), Jianqiang Huang, Xiansheng Hua, [Lei Zhang](https://www4.comp.polyu.edu.hk/~cslzhang/).
  5 | 
  6 | ## Updates
  7 | 2020-04-13: Add one_cycle (with Adam) training as default scheduler.
  8 | 
  9 | 2020-08-04: Multi-class training is supported. (The multi-class traning is not well tuned and will slightly deteriote the performance of model with single class training (i.e. each class has a individual model), please find the bellow AP@(11 recall points) for your reference.)
 10 | ```
 11 | Car AP@0.70, 0.70, 0.70:
 12 | bbox AP:98.96, 90.06, 89.52
 13 | bev  AP:90.59, 88.43, 87.49
 14 | 3d   AP:89.69, 79.41, 78.33
 15 | aos  AP:98.94, 89.89, 89.19
 16 | Car AP@0.70, 0.50, 0.50:
 17 | bbox AP:98.96, 90.06, 89.52
 18 | bev  AP:98.99, 90.13, 89.68
 19 | 3d   AP:98.97, 90.10, 89.63
 20 | aos  AP:98.94, 89.89, 89.19
 21 | 
 22 | Pedestrian AP@0.50, 0.50, 0.50:
 23 | bbox AP:62.88, 60.26, 53.58
 24 | bev  AP:58.52, 50.29, 44.10
 25 | 3d   AP:55.75, 48.01, 41.94
 26 | aos  AP:58.57, 55.19, 49.07
 27 | Pedestrian AP@0.50, 0.25, 0.25:
 28 | bbox AP:62.88, 60.26, 53.58
 29 | bev  AP:71.34, 62.80, 55.64
 30 | 3d   AP:71.33, 62.76, 55.60
 31 | aos  AP:58.57, 55.19, 49.07
 32 | 
 33 | Cyclist AP@0.50, 0.50, 0.50:
 34 | bbox AP:87.25, 73.74, 67.84
 35 | bev  AP:85.40, 70.48, 64.59
 36 | 3d   AP:82.80, 63.37, 61.60
 37 | aos  AP:86.93, 73.26, 67.41
 38 | Cyclist AP@0.50, 0.25, 0.25:
 39 | bbox AP:87.25, 73.74, 67.84
 40 | bev  AP:86.78, 71.55, 65.85
 41 | 3d   AP:86.78, 71.54, 65.85
 42 | aos  AP:86.93, 73.26, 67.41
 43 | ```
 44 | 
 45 | ## Demo
 46 | [![Demo](https://github.com/skyhehe123/SA-SSD/blob/master/doc/hqdefault.jpg)](https://www.youtube.com/watch?v=jrAb3ts4tAs)
 47 | 
 48 | # Introduction
 49 | ![model](https://github.com/skyhehe123/SA-SSD/blob/master/doc/model.png)
 50 | Current single-stage detectors are efficient by progressively downscaling the 3D point clouds in a fully convolutional manner. However, the downscaled features inevitably lose spatial information and cannot make full use of the structure information of 3D point cloud, degrading their localization precision. In this work, we propose to improve the localization precision of single-stage detectors by explicitly leveraging the structure information of 3D point cloud. Specifically, we design an auxiliary network which converts the convolutional features in the backbone network back to point-level representations. The auxiliary network is jointly optimized, by two point-level supervisions, to guide the convolutional features in the backbone network to be aware of the object structure. The auxiliary network can be detached after training and therefore introduces no extra computation in the inference stage. Besides, considering that single-stage detectors suffer from the discordance between the predicted bounding boxes and corresponding classification confidences, we develop an efficient part-sensitive warping operation to align the confidences to the predicted bounding boxes.
 51 | 
 52 | # Dependencies
 53 | - `python3.5+`
 54 | - `pytorch` (tested on 1.1.0)
 55 | - `opencv`
 56 | - `shapely`
 57 | - `mayavi`
 58 | - `spconv` (v1.0)
 59 | 
 60 | # Installation
 61 | 1. Clone this repository.
 62 | 2. Compile C++/CUDA modules in mmdet/ops by running the following command at each directory, e.g.
 63 | ```bash
 64 | $ cd mmdet/ops/points_op
 65 | $ python3 setup.py build_ext --inplace
 66 | ```
 67 | 3. Setup following Environment variables, you may add them to ~/.bashrc:
 68 | ```bash
 69 | export NUMBAPRO_CUDA_DRIVER=/usr/lib/x86_64-linux-gnu/libcuda.so
 70 | export NUMBAPRO_NVVM=/usr/local/cuda/nvvm/lib64/libnvvm.so
 71 | export NUMBAPRO_LIBDEVICE=/usr/local/cuda/nvvm/libdevice
 72 | export LD_LIBRARY_PATH=/home/billyhe/anaconda3/lib/python3.7/site-packages/spconv;
 73 | ```
 74 | 
 75 | # Data Preparation
 76 | 1. Download the 3D KITTI detection dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). Data to download include:
 77 |     * Velodyne point clouds (29 GB): input data to VoxelNet
 78 |     * Training labels of object data set (5 MB): input label to VoxelNet
 79 |     * Camera calibration matrices of object data set (16 MB): for visualization of predictions
 80 |     * Left color images of object data set (12 GB): for visualization of predictions
 81 | 
 82 | 2. Create cropped point cloud and sample pool for data augmentation, please refer to [SECOND](https://github.com/traveller59/second.pytorch).
 83 | ```bash
 84 | $ python3 tools/create_data.py
 85 | ```
 86 | 
 87 | 3. Split the training set into training and validation set according to the protocol [here](https://xiaozhichen.github.io/files/mv3d/imagesets.tar.gz).
 88 | ```plain
 89 | └── DATA_DIR
 90 |        ├── training   <-- training data
 91 |        |   ├── image_2
 92 |        |   ├── label_2
 93 |        |   ├── velodyne
 94 |        |   └── velodyne_reduced
 95 |        └── testing  <--- testing data
 96 |        |   ├── image_2
 97 |        |   ├── label_2
 98 |        |   ├── velodyne
 99 |        |   └── velodyne_reduced
100 | ```
101 | 
102 | # Pretrained Model
103 | You can download the pretrained model [here](https://drive.google.com/file/d/1WJnJDMOeNKszdZH3P077wKXcoty7XOUb/view?usp=sharing), 
104 | which is trained on the train split (3712 samples) and evaluated on the val split (3769 samples) and test split (7518 samples). 
105 | The performance (using 40 recall poisitions) on validation set is as follows:
106 | ```
107 | Car  AP@0.70, 0.70, 0.70:
108 | bbox AP:99.12, 96.09, 93.61
109 | bev  AP:96.55, 92.79, 90.32
110 | 3d   AP:93.13, 84.54, 81.71
111 | ```
112 | # Train
113 | To train the SA-SSD with single GPU, run the following command:
114 | ```
115 | cd mmdet/tools
116 | python3 train.py ../configs/car_cfg.py
117 | ```
118 | To train the SA-SSD with multiple GPUs, run the following command:
119 | ```
120 | bash dist_train.sh
121 | ```
122 | # Eval
123 | To evaluate the model, run the following command:
124 | ```
125 | cd mmdet/tools
126 | python3 test.py ../configs/car_cfg.py ../saved_model_vehicle/epoch_50.pth
127 | ```
128 | ## Citation
129 | If you find this work useful in your research, please consider cite:
130 | ```
131 | @inproceedings{he2020sassd,
132 | title={Structure Aware Single-stage 3D Object Detection from Point Cloud},
133 | author={He, Chenhang and Zeng, Hui and Huang, Jianqiang and Hua, Xian-Sheng and Zhang, Lei},
134 |   booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
135 |   year={2020}
136 | }
137 | ```
138 | 
139 | ## Acknowledgement
140 | The code is devloped based on mmdetection, some part of codes are borrowed from SECOND and PointRCNN.
141 | * [mmdetection](https://github.com/open-mmlab/mmdetection) 
142 | * [mmcv](https://github.com/open-mmlab/mmcv)
143 | * [second.pytorch](https://github.com/traveller59/second.pytorch)
144 | * [PointRCNN](https://github.com/sshaoshuai/PointRCNN)
145 | 
146 | 
147 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | PYTHON=${PYTHON:-"python"}
4 | 
5 | $PYTHON -m torch.distributed.launch --nproc_per_node=2 $(dirname "$0")/train.py ../configs/car_cfg.py --launcher pytorch ${@:3}
6 | 


--------------------------------------------------------------------------------
/tools/env.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import random
 4 | import time
 5 | import numpy as np
 6 | import torch
 7 | import torch.distributed as dist
 8 | import torch.multiprocessing as mp
 9 | from mmcv.runner import get_dist_info
10 | 
11 | 
12 | def init_dist(launcher, backend='nccl', **kwargs):
13 |     if mp.get_start_method(allow_none=True) is None:
14 |         mp.set_start_method('spawn')
15 |     if launcher == 'pytorch':
16 |         _init_dist_pytorch(backend, **kwargs)
17 |     elif launcher == 'mpi':
18 |         _init_dist_mpi(backend, **kwargs)
19 |     elif launcher == 'slurm':
20 |         _init_dist_slurm(backend, **kwargs)
21 |     else:
22 |         raise ValueError('Invalid launcher type: {}'.format(launcher))
23 | 
24 | 
25 | def _init_dist_pytorch(backend, **kwargs):
26 |     # TODO: use local_rank instead of rank % num_gpus
27 |     rank = int(os.environ['RANK'])
28 |     num_gpus = torch.cuda.device_count()
29 |     torch.cuda.set_device(rank % num_gpus)
30 |     dist.init_process_group(backend=backend, **kwargs)
31 | 
32 | 
33 | def _init_dist_mpi(backend, **kwargs):
34 |     raise NotImplementedError
35 | 
36 | 
37 | def _init_dist_slurm(backend, **kwargs):
38 |     raise NotImplementedError
39 | 
40 | 
41 | def set_random_seed(seed):
42 |     random.seed(seed)
43 |     np.random.seed(seed)
44 |     torch.manual_seed(seed)
45 |     torch.cuda.manual_seed_all(seed)
46 | 
47 | 
48 | def get_root_logger(work_dir):
49 |     logging.basicConfig(
50 |         format='%(asctime)s - %(levelname)s - %(message)s',
51 |         level=logging.INFO)
52 | 
53 |     logger = logging.getLogger()
54 |     rank, _ = get_dist_info()
55 |     if rank != 0:
56 |         logger.setLevel('ERROR')
57 | 
58 |     filename = '{}.log'.format(time.strftime('%Y%m%d_%H%M%S', time.localtime()))
59 |     log_file = os.path.join(work_dir, filename)
60 |     file_handler = logging.FileHandler(log_file, 'w')
61 |     file_handler.setLevel(logging.INFO)
62 |     logger.addHandler(file_handler)
63 | 
64 |     return logger
65 | 


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | sys.path.append('/home/billyhe/SA-SSD')
  4 | import torch
  5 | import mmcv
  6 | from mmcv.runner import load_checkpoint, parallel_test
  7 | from mmcv.parallel import scatter, collate, MMDataParallel
  8 | from mmdet.core.evaluation.kitti_eval import get_official_eval_result
  9 | from mmdet.core import results2json, coco_eval
 10 | from mmdet.datasets import build_dataloader
 11 | from mmdet.models import build_detector, detectors
 12 | import tools.kitti_common as kitti
 13 | import numpy as np
 14 | import torch.utils.data
 15 | import os
 16 | from tools.train_utils import load_params_from_file
 17 | from mmdet.datasets import utils
 18 | 
 19 | def single_test(model, data_loader, saveto=None, class_names=['Car']):
 20 |     template = '{} ' + ' '.join(['{:.4f}' for _ in range(15)]) + '\n'
 21 |     if saveto is not None:
 22 |         mmcv.mkdir_or_exist(saveto)
 23 | 
 24 |     model.eval()
 25 |     annos = []
 26 | 
 27 |     prog_bar = mmcv.ProgressBar(len(data_loader))
 28 | 
 29 |     for i, data in enumerate(data_loader):
 30 |         with torch.no_grad():
 31 |             results = model(return_loss=False, **data)
 32 |         annos+=results
 33 |         # image_shape = (375,1242)
 34 |         # for re in results:
 35 |         #     img_idx = re['image_idx']
 36 |         #     if re['bbox'] is not None:
 37 |         #         box2d = re['bbox']
 38 |         #         box3d = re['box3d_camera']
 39 |         #         labels = re['label_preds']
 40 |         #         scores = re['scores']
 41 |         #         alphas = re['alphas']
 42 |         #         anno = kitti.get_start_result_anno()
 43 |         #         num_example = 0
 44 |         #         for bbox2d, bbox3d, label, score, alpha in zip(box2d, box3d, labels, scores, alphas):
 45 |         #             if bbox2d[0] > image_shape[1] or bbox2d[1] > image_shape[0]:
 46 |         #                 continue
 47 |         #             if bbox2d[2] < 0 or bbox2d[3] < 0:
 48 |         #                 continue
 49 |         #             bbox2d[2:] = np.minimum(bbox2d[2:], image_shape[::-1])
 50 |         #             bbox2d[:2] = np.maximum(bbox2d[:2], [0, 0])
 51 |         #             anno["name"].append(class_names[int(label)])
 52 |         #             anno["truncated"].append(0.0)
 53 |         #             anno["occluded"].append(0)
 54 |         #             # anno["alpha"].append(-10)
 55 |         #             anno["alpha"].append(alpha)
 56 |         #             anno["bbox"].append(bbox2d)
 57 |         #             # anno["dimensions"].append(np.array([-1,-1,-1]))
 58 |         #             anno["dimensions"].append(bbox3d[[3, 4, 5]])
 59 |         #             # anno["location"].append(np.array([-1000,-1000,-1000]))
 60 |         #             anno["location"].append(bbox3d[:3])
 61 |         #             # anno["rotation_y"].append(-10)
 62 |         #             anno["rotation_y"].append(bbox3d[6])
 63 |         #             anno["score"].append(score)
 64 |         #             num_example += 1
 65 |         #         if num_example != 0:
 66 |         #             if saveto is not None:
 67 |         #                 of_path = os.path.join(saveto, '%06d.txt' % img_idx)
 68 |         #                 with open(of_path, 'w+') as f:
 69 |         #                     for name, bbox, dim, loc, ry, score, alpha in zip(anno['name'], anno["bbox"], \
 70 |         #                     anno["dimensions"], anno["location"], anno["rotation_y"], anno["score"],anno["alpha"]):
 71 |         #                         line = template.format(name, 0, 0, alpha, *bbox, *dim[[1,2,0]], *loc, ry, score)
 72 |         #                         f.write(line)
 73 |         #
 74 |         #             anno = {n: np.stack(v) for n, v in anno.items()}
 75 |         #             annos.append(anno)
 76 |         #         else:
 77 |         #             if saveto is not None:
 78 |         #                 of_path = os.path.join(saveto, '%06d.txt' % img_idx)
 79 |         #                 f = open(of_path, 'w+')
 80 |         #                 f.close()
 81 |         #             annos.append(kitti.empty_result_anno())
 82 |         #     else:
 83 |         #         if saveto is not None:
 84 |         #             of_path = os.path.join(saveto, '%06d.txt' % img_idx)
 85 |         #             f = open(of_path, 'w+')
 86 |         #             f.close()
 87 |         #         annos.append(kitti.empty_result_anno())
 88 |         #
 89 |         #     num_example = annos[-1]["name"].shape[0]
 90 |         #     annos[-1]["image_idx"] = np.array(
 91 |         #         [img_idx] * num_example, dtype=np.int64)
 92 |         prog_bar.update()
 93 | 
 94 |     return annos
 95 | 
 96 | 
 97 | def _data_func(data, device_id):
 98 |     data = scatter(collate([data], samples_per_gpu=1), [device_id])[0]
 99 |     return dict(return_loss=False, rescale=True, **data)
100 | 
101 | 
102 | def parse_args():
103 |     parser = argparse.ArgumentParser(description='MMDet test detector')
104 |     parser.add_argument('config', help='test config file path')
105 |     parser.add_argument('checkpoint', help='checkpoint file')
106 |     parser.add_argument(
107 |         '--gpus', default=1, type=int, help='GPU number used for testing')
108 |     parser.add_argument(
109 |         '--proc_per_gpu',
110 |         default=1,
111 |         type=int,
112 |         help='Number of processes per GPU')
113 |     parser.add_argument('--out', help='output result file')
114 |     parser.add_argument(
115 |         '--eval',
116 |         type=str,
117 |         nargs='+',
118 |         choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'],
119 |         help='eval types')
120 |     parser.add_argument('--show', action='store_true', help='show results')
121 |     args = parser.parse_args()
122 |     return args
123 | 
124 | 
125 | def main():
126 |     args = parse_args()
127 | 
128 |     cfg = mmcv.Config.fromfile(args.config)
129 |     cfg.model.pretrained = None
130 | 
131 |     dataset = utils.get_dataset(cfg.data.val)
132 |     class_names = cfg.data.val.class_names
133 | 
134 |     if args.gpus == 1:
135 |         model = build_detector(
136 |             cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
137 | 
138 |         print("Evaluate on", cfg.data.val.class_names)
139 |         setattr(model, 'class_names', class_names)
140 | 
141 |         #load_checkpoint(model, args.checkpoint)
142 |         model = MMDataParallel(model, device_ids=[0])
143 |         load_params_from_file(model, args.checkpoint)
144 |         data_loader = build_dataloader(
145 |             dataset,
146 |             1,
147 |             cfg.data.workers_per_gpu,
148 |             num_gpus=1,
149 |             shuffle=False,
150 |             dist=False)
151 |         outputs = single_test(model, data_loader, args.out)
152 |     else:
153 |         NotImplementedError
154 | 
155 |     # kitti evaluation
156 |     gt_annos = kitti.get_label_annos(dataset.label_prefix, dataset.sample_ids)
157 |     result = get_official_eval_result(gt_annos, outputs, current_classes=class_names)
158 |     print(result)
159 | 
160 | 
161 | if __name__ == '__main__':
162 |     main()
163 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import argparse
  3 | import sys
  4 | sys.path.append('/home/billyhe/SA-SSD')
  5 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
  6 | from mmdet.datasets import build_dataloader
  7 | from tools.env import get_root_logger, init_dist, set_random_seed
  8 | from tools.train_utils import train_model
  9 | import pathlib
 10 | from mmcv import Config
 11 | from mmdet.datasets import get_dataset
 12 | from mmdet.models import build_detector
 13 | from tools.train_utils.optimization import build_optimizer, build_scheduler
 14 | 
 15 | def parse_args():
 16 |     parser = argparse.ArgumentParser(description='Train a detector')
 17 |     parser.add_argument('config', help='train config file path')
 18 |     parser.add_argument('--work_dir', help='the dir to save logs and models')
 19 |     parser.add_argument(
 20 |         '--validate',
 21 |         action='store_true',
 22 |         help='whether to evaluate the checkpoint during training')
 23 |     parser.add_argument(
 24 |         '--gpus',
 25 |         type=int,
 26 |         default=1,
 27 |         help='number of gpus to use '
 28 |              '(only applicable to non-distributed training)')
 29 |     parser.add_argument('--seed', type=int, default=0, help='random seed')
 30 |     parser.add_argument(
 31 |         '--launcher',
 32 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 33 |         default='none',
 34 |         help='job launcher')
 35 |     parser.add_argument('--local_rank', type=int, default=0)
 36 |     parser.add_argument('--max_ckpt_save_num', type=int, default=10)
 37 | 
 38 |     args = parser.parse_args()
 39 | 
 40 |     return args
 41 | 
 42 | 
 43 | 
 44 | def main():
 45 | 
 46 |     args = parse_args()
 47 | 
 48 |     cfg = Config.fromfile(args.config)
 49 | 
 50 |     if args.work_dir is not None:
 51 |         cfg.work_dir = args.work_dir
 52 | 
 53 |     pathlib.Path(cfg.work_dir).mkdir(parents=True, exist_ok=True)
 54 | 
 55 |     cfg.gpus = args.gpus
 56 | 
 57 |     # init distributed env first, since logger depends on the dist info.
 58 |     if args.launcher == 'none':
 59 |         distributed = False
 60 |     else:
 61 |         distributed = True
 62 |         init_dist(args.launcher, **cfg.dist_params)
 63 | 
 64 |     # init logger before other steps
 65 |     logger = get_root_logger(cfg.work_dir)
 66 | 
 67 |     logger.info('Distributed training: {}'.format(distributed))
 68 | 
 69 |     # set random seeds
 70 |     if args.seed is not None:
 71 |         logger.info('Set random seed to {}'.format(args.seed))
 72 |         set_random_seed(args.seed)
 73 | 
 74 |     model = build_detector(
 75 |         cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
 76 | 
 77 |     if distributed:
 78 |         model = MMDistributedDataParallel(model.cuda())
 79 |     else:
 80 |         model = MMDataParallel(model, device_ids=range(cfg.gpus)).cuda()
 81 | 
 82 |     train_dataset = get_dataset(cfg.data.train)
 83 | 
 84 |     optimizer = build_optimizer(model, cfg.optimizer)
 85 | 
 86 |     train_loader = build_dataloader(
 87 |         train_dataset,
 88 |         cfg.data.imgs_per_gpu,
 89 |         cfg.data.workers_per_gpu,
 90 |         dist=distributed)
 91 | 
 92 |     start_epoch = it = 0
 93 |     last_epoch = -1
 94 | 
 95 |     lr_scheduler, lr_warmup_scheduler = build_scheduler(
 96 |         optimizer, total_iters_each_epoch=len(train_loader), total_epochs=cfg.total_epochs,
 97 |         last_epoch=last_epoch, optim_cfg=cfg.optimizer, lr_cfg=cfg.lr_config
 98 |     )
 99 |     # -----------------------start training---------------------------
100 |     logger.info('**********************Start training**********************')
101 | 
102 |     train_model(
103 |         model,
104 |         optimizer,
105 |         train_loader,
106 |         lr_scheduler=lr_scheduler,
107 |         optim_cfg=cfg.optimizer,
108 |         start_epoch=start_epoch,
109 |         total_epochs=cfg.total_epochs,
110 |         start_iter=it,
111 |         rank=args.local_rank,
112 |         logger = logger,
113 |         ckpt_save_dir=cfg.work_dir,
114 |         lr_warmup_scheduler=lr_warmup_scheduler,
115 |         ckpt_save_interval=cfg.checkpoint_config.interval,
116 |         max_ckpt_save_num=args.max_ckpt_save_num,
117 |         log_interval = cfg.log_config.interval
118 |     )
119 | 
120 |     logger.info('**********************End training**********************')
121 | 
122 | 
123 | 
124 | 
125 | 
126 | if __name__ == '__main__':
127 |     main()
128 | 


--------------------------------------------------------------------------------
/tools/train_utils/optimization/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.optim as optim
 4 | from functools import partial
 5 | from .fastai_optim import OptimWrapper
 6 | from .learning_schedules_fastai import OneCycle, CosineWarmupLR
 7 | 
 8 | 
 9 | def build_optimizer(model, optim_cfg):
10 |     if optim_cfg.type == 'adam':
11 |         optimizer = optim.Adam(model.parameters(), lr=optim_cfg.lr, weight_decay=optim_cfg.weight_decay)
12 |     elif optim_cfg.type == 'sgd':
13 |         optimizer = optim.SGD(
14 |             model.parameters(), lr=optim_cfg.lr, weight_decay=optim_cfg.weight_decay,
15 |             momentum=optim_cfg.momentum
16 |         )
17 |     elif optim_cfg.type == 'adam_onecycle':
18 |         def children(m: nn.Module):
19 |             return list(m.children())
20 | 
21 |         def num_children(m: nn.Module) -> int:
22 |             return len(children(m))
23 | 
24 |         flatten_model = lambda m: sum(map(flatten_model, m.children()), []) if num_children(m) else [m]
25 |         get_layer_groups = lambda m: [nn.Sequential(*flatten_model(m))]
26 | 
27 |         optimizer_func = partial(optim.Adam, betas=(0.9, 0.99))
28 |         optimizer = OptimWrapper.create(
29 |             optimizer_func, optim_cfg.lr, get_layer_groups(model), wd=optim_cfg.weight_decay, true_wd=True, bn_wd=True
30 |         )
31 |     else:
32 |         raise NotImplementedError
33 | 
34 |     return optimizer
35 | 
36 | 
37 | def build_scheduler(optimizer, total_iters_each_epoch, total_epochs, last_epoch, optim_cfg, lr_cfg):
38 | 
39 |     lr_warmup_scheduler = None
40 |     total_steps = total_iters_each_epoch * total_epochs
41 | 
42 |     if lr_cfg.policy == 'onecycle':
43 |         lr_scheduler = OneCycle(
44 |             optimizer, total_steps, optim_cfg.lr, list(lr_cfg.moms), lr_cfg.div_factor, lr_cfg.pct_start
45 |         )
46 | 
47 |     elif lr_cfg.policy == 'cosine':
48 |         lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, total_steps, last_epoch=last_epoch)
49 | 
50 |     elif lr_cfg.policy == 'step':
51 | 
52 |         lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, lr_cfg.step, last_epoch=last_epoch)
53 | 
54 |     else:
55 |         raise NotImplementedError
56 | 
57 |     if 'warmup' in lr_cfg:
58 |           lr_warmup_scheduler = CosineWarmupLR(
59 |               optimizer, T_max=lr_cfg.warmup_iters,
60 |               eta_min=optim_cfg.lr * lr_cfg.warmup_ratio
61 |           )
62 | 
63 |     return lr_scheduler, lr_warmup_scheduler
64 | 


--------------------------------------------------------------------------------
/tools/train_utils/optimization/learning_schedules_fastai.py:
--------------------------------------------------------------------------------
  1 | # This file is modified from https://github.com/traveller59/second.pytorch
  2 | 
  3 | import numpy as np
  4 | import math
  5 | from functools import partial
  6 | import torch.optim.lr_scheduler as lr_sched
  7 | from .fastai_optim import OptimWrapper
  8 | 
  9 | 
 10 | class LRSchedulerStep(object):
 11 |     def __init__(self, fai_optimizer: OptimWrapper, total_step, lr_phases,
 12 |                  mom_phases):
 13 |         # if not isinstance(fai_optimizer, OptimWrapper):
 14 |         #     raise TypeError('{} is not a fastai OptimWrapper'.format(
 15 |         #         type(fai_optimizer).__name__))
 16 |         self.optimizer = fai_optimizer
 17 |         self.total_step = total_step
 18 |         self.lr_phases = []
 19 | 
 20 |         for i, (start, lambda_func) in enumerate(lr_phases):
 21 |             if len(self.lr_phases) != 0:
 22 |                 assert self.lr_phases[-1][0] < start
 23 |             if isinstance(lambda_func, str):
 24 |                 lambda_func = eval(lambda_func)
 25 |             if i < len(lr_phases) - 1:
 26 |                 self.lr_phases.append((int(start * total_step), int(lr_phases[i + 1][0] * total_step), lambda_func))
 27 |             else:
 28 |                 self.lr_phases.append((int(start * total_step), total_step, lambda_func))
 29 |         assert self.lr_phases[0][0] == 0
 30 |         self.mom_phases = []
 31 |         for i, (start, lambda_func) in enumerate(mom_phases):
 32 |             if len(self.mom_phases) != 0:
 33 |                 assert self.mom_phases[-1][0] < start
 34 |             if isinstance(lambda_func, str):
 35 |                 lambda_func = eval(lambda_func)
 36 |             if i < len(mom_phases) - 1:
 37 |                 self.mom_phases.append((int(start * total_step), int(mom_phases[i + 1][0] * total_step), lambda_func))
 38 |             else:
 39 |                 self.mom_phases.append((int(start * total_step), total_step, lambda_func))
 40 |         assert self.mom_phases[0][0] == 0
 41 | 
 42 |     def step(self, step):
 43 |         for start, end, func in self.lr_phases:
 44 |             if step >= start:
 45 |                 self.optimizer.lr = func((step - start) / (end - start))
 46 |         for start, end, func in self.mom_phases:
 47 |             if step >= start:
 48 |                 self.optimizer.mom = func((step - start) / (end - start))
 49 | 
 50 | 
 51 | def annealing_cos(start, end, pct):
 52 |     # print(pct, start, end)
 53 |     "Cosine anneal from `start` to `end` as pct goes from 0.0 to 1.0."
 54 |     cos_out = np.cos(np.pi * pct) + 1
 55 |     return end + (start - end) / 2 * cos_out
 56 | 
 57 | 
 58 | class OneCycle(LRSchedulerStep):
 59 |     def __init__(self, fai_optimizer, total_step, lr_max, moms, div_factor,
 60 |                  pct_start):
 61 |         self.lr_max = lr_max
 62 |         self.moms = moms
 63 |         self.div_factor = div_factor
 64 |         self.pct_start = pct_start
 65 |         a1 = int(total_step * self.pct_start)
 66 |         a2 = total_step - a1
 67 |         low_lr = self.lr_max / self.div_factor
 68 |         lr_phases = ((0, partial(annealing_cos, low_lr, self.lr_max)),
 69 |                      (self.pct_start,
 70 |                       partial(annealing_cos, self.lr_max, low_lr / 1e4)))
 71 |         mom_phases = ((0, partial(annealing_cos, *self.moms)),
 72 |                       (self.pct_start, partial(annealing_cos,
 73 |                                                *self.moms[::-1])))
 74 |         fai_optimizer.lr, fai_optimizer.mom = low_lr, self.moms[0]
 75 |         super().__init__(fai_optimizer, total_step, lr_phases, mom_phases)
 76 | 
 77 | 
 78 | class CosineWarmupLR(lr_sched._LRScheduler):
 79 |     def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1):
 80 |         self.T_max = T_max
 81 |         self.eta_min = eta_min
 82 |         super(CosineWarmupLR, self).__init__(optimizer, last_epoch)
 83 | 
 84 |     def get_lr(self):
 85 |         return [self.eta_min + (base_lr - self.eta_min) *
 86 |                 (1 - math.cos(math.pi * self.last_epoch / self.T_max)) / 2
 87 |                 for base_lr in self.base_lrs]
 88 | 
 89 | 
 90 | class FakeOptim:
 91 |     def __init__(self):
 92 |         self.lr = 0
 93 |         self.mom = 0
 94 | 
 95 | 
 96 | if __name__ == "__main__":
 97 |     import matplotlib.pyplot as plt
 98 | 
 99 |     opt = FakeOptim()  # 3e-3, wd=0.4, div_factor=10
100 |     schd = OneCycle(opt, 100, 3e-3, (0.95, 0.85), 10.0, 0.1)
101 | 
102 |     lrs = []
103 |     moms = []
104 |     for i in range(100):
105 |         schd.step(i)
106 |         lrs.append(opt.lr)
107 |         moms.append(opt.mom)
108 |     plt.plot(lrs)
109 |     # plt.plot(moms)
110 |     plt.show()
111 |     plt.plot(moms)
112 |     plt.show()
113 | 


--------------------------------------------------------------------------------