├── tools ├── __init__.py ├── analysis_tools │ ├── __init__.py │ ├── get_params.py │ └── benchmark.py ├── data_converter │ ├── __init__.py │ ├── lyft_data_fixer.py │ └── indoor_converter.py ├── dist_train.sh ├── dist_test.sh ├── misc │ ├── print_config.py │ ├── visualize_results.py │ └── fuse_conv_bn.py └── model_converters │ ├── publish_model.py │ ├── regnet2mmdet.py │ └── convert_votenet_checkpoints.py ├── projects ├── __init__.py ├── mmdet3d_plugin │ ├── models │ │ ├── opt │ │ │ ├── __init__.py │ │ │ └── adamw.py │ │ ├── hooks │ │ │ ├── __init__.py │ │ │ └── hooks.py │ │ ├── backbones │ │ │ └── __init__.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── bricks.py │ │ │ ├── visual.py │ │ │ ├── position_embedding.py │ │ │ └── grid_mask.py │ ├── bevformer │ │ ├── detectors │ │ │ └── __init__.py │ │ ├── dense_heads │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── apis │ │ │ ├── __init__.py │ │ │ └── train.py │ │ └── modules │ │ │ └── __init__.py │ ├── core │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ └── eval_hooks.py │ │ └── bbox │ │ │ ├── coders │ │ │ ├── __init__.py │ │ │ └── nms_free_coder.py │ │ │ ├── assigners │ │ │ └── __init__.py │ │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ └── match_cost.py │ │ │ └── util.py │ ├── datasets │ │ ├── __init__.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── sampler.py │ │ │ ├── distributed_sampler.py │ │ │ └── group_sampler.py │ │ └── pipelines │ │ │ ├── __init__.py │ │ │ └── formating.py │ └── __init__.py └── configs │ ├── _base_ │ ├── models │ │ ├── paconv_cuda_ssg.py │ │ ├── hv_pointpillars_fpn_lyft.py │ │ ├── hv_pointpillars_fpn_range100_lyft.py │ │ ├── pointnet2_msg.py │ │ ├── pointnet2_ssg.py │ │ ├── paconv_ssg.py │ │ ├── fcos3d.py │ │ ├── votenet.py │ │ ├── groupfree3d.py │ │ ├── hv_second_secfpn_kitti.py │ │ ├── 3dssd.py │ │ ├── hv_pointpillars_secfpn_kitti.py │ │ ├── centerpoint_02pillar_second_secfpn_nus.py │ │ ├── centerpoint_01voxel_second_secfpn_nus.py │ │ ├── hv_pointpillars_fpn_nus.py │ │ ├── hv_second_secfpn_waymo.py │ │ ├── imvotenet_image.py │ │ ├── hv_pointpillars_secfpn_waymo.py │ │ └── mask_rcnn_r50_fpn.py │ ├── schedules │ │ ├── mmdet_schedule_1x.py │ │ ├── seg_cosine_200e.py │ │ ├── seg_cosine_50e.py │ │ ├── seg_cosine_150e.py │ │ ├── schedule_3x.py │ │ ├── schedule_2x.py │ │ ├── cosine.py │ │ ├── cyclic_20e.py │ │ └── cyclic_40e.py │ ├── default_runtime.py │ └── datasets │ │ ├── coco_instance.py │ │ ├── nuim_instance.py │ │ ├── nus-mono3d.py │ │ ├── sunrgbd-3d-10class.py │ │ ├── s3dis-3d-5class.py │ │ ├── scannet-3d-18class.py │ │ ├── scannet_seg-3d-20class.py │ │ ├── s3dis_seg-3d-13class.py │ │ ├── kitti-3d-car.py │ │ ├── lyft-3d.py │ │ ├── range100_lyft-3d.py │ │ ├── kitti-3d-3class.py │ │ ├── waymoD5-3d-car.py │ │ ├── waymoD5-3d-3class.py │ │ └── nus-3d.py │ └── datasets │ ├── custom_waymo-3d.py │ ├── custom_lyft-3d.py │ └── custom_nus-3d.py ├── figs ├── arch.png └── sota_results.png ├── docs ├── getting_started.md ├── prepare_dataset.md └── install.md ├── README.md └── hf_guide.md /tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /projects/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/analysis_tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/opt/__init__.py: -------------------------------------------------------------------------------- 1 | from .adamw import AdamW2 -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .hooks import GradChecker -------------------------------------------------------------------------------- /figs/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HFAiLab/BEVFormer/HEAD/figs/arch.png -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer import BEVFormer -------------------------------------------------------------------------------- /tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval_hooks import CustomDistEvalHook -------------------------------------------------------------------------------- /figs/sota_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HFAiLab/BEVFormer/HEAD/figs/sota_results.png -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer_head import BEVFormerHead -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .vovnet import VoVNet 2 | 3 | __all__ = ['VoVNet'] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_free_coder import NMSFreeCoder 2 | 3 | __all__ = ['NMSFreeCoder'] 4 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .dense_heads import * 3 | from .detectors import * 4 | from .modules import * 5 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .hungarian_assigner_3d import HungarianAssigner3D 2 | 3 | __all__ = ['HungarianAssigner3D'] 4 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .train import custom_train_model 2 | from .mmdet_train import custom_train_detector 3 | # from .test import custom_multi_gpu_test -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .nuscenes_dataset import CustomNuScenesDataset 2 | from .builder import custom_build_dataset 3 | 4 | __all__ = [ 5 | 'CustomNuScenesDataset' 6 | ] 7 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox.match_costs import build_match_cost 2 | from .match_cost import BBox3DL1Cost 3 | 4 | __all__ = ['build_match_cost', 'BBox3DL1Cost'] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .bricks import run_time 3 | from .grid_mask import GridMask 4 | from .position_embedding import RelPositionEmbedding 5 | from .visual import save_tensor -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_sampler import DistributedGroupSampler 2 | from .distributed_sampler import DistributedSampler 3 | from .sampler import SAMPLER, build_sampler 4 | 5 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/paconv_cuda_ssg.py: -------------------------------------------------------------------------------- 1 | _base_ = './paconv_ssg.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | sa_cfg=dict( 6 | type='PAConvCUDASAModule', 7 | scorenet_cfg=dict(mlp_channels=[8, 16, 16])))) 8 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-28509} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic 10 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/sampler.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils.registry import Registry, build_from_cfg 2 | 3 | SAMPLER = Registry('sampler') 4 | 5 | 6 | def build_sampler(cfg, default_args): 7 | return build_from_cfg(cfg, SAMPLER, default_args) 8 | -------------------------------------------------------------------------------- /tools/analysis_tools/get_params.py: -------------------------------------------------------------------------------- 1 | import torch 2 | file_path = './ckpts/bevformer_v4.pth' 3 | model = torch.load(file_path, map_location='cpu') 4 | all = 0 5 | for key in list(model['state_dict'].keys()): 6 | all += model['state_dict'][key].nelement() 7 | print(all) 8 | 9 | # smaller 63374123 10 | # v4 69140395 11 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29503} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --eval bbox 11 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import PerceptionTransformer 2 | from .spatial_cross_attention import SpatialCrossAttention, MSDeformableAttention3D 3 | from .temporal_self_attention import TemporalSelfAttention 4 | from .encoder import BEVFormerEncoder, BEVFormerLayer 5 | from .decoder import DetectionTransformerDecoder 6 | 7 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/mmdet_schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_200e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on ScanNet dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=200) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_50e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=50) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_150e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=150) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/schedule_3x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on indoor dataset, 3 | # e.g., VoteNet on SUNRGBD and ScanNet 4 | lr = 0.008 # max learning rate 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32]) 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=36) 10 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/hooks/hooks.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.hooks.hook import HOOKS, Hook 2 | from projects.mmdet3d_plugin.models.utils import run_time 3 | 4 | 5 | @HOOKS.register_module() 6 | class GradChecker(Hook): 7 | 8 | def after_train_iter(self, runner): 9 | for key, val in runner.model.named_parameters(): 10 | if val.grad == None and val.requires_grad: 11 | print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key)) 12 | 13 | 14 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on nuScenes dataset 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01) 4 | # max_norm=10 is better for SECOND 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 6 | lr_config = dict( 7 | policy='step', 8 | warmup='linear', 9 | warmup_iters=1000, 10 | warmup_ratio=1.0 / 1000, 11 | step=[20, 23]) 12 | momentum_config = None 13 | # runtime settings 14 | runner = dict(type='EpochBasedRunner', max_epochs=24) 15 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/__init__.py: -------------------------------------------------------------------------------- 1 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D 2 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder 3 | from .core.bbox.match_costs import BBox3DL1Cost 4 | from .core.evaluation.eval_hooks import CustomDistEvalHook 5 | from .datasets.pipelines import ( 6 | PhotoMetricDistortionMultiViewImage, PadMultiViewImage, 7 | NormalizeMultiviewImage, CustomCollect3D) 8 | from .models.backbones.vovnet import VoVNet 9 | from .models.utils import * 10 | from .models.opt.adamw import AdamW2 11 | from .bevformer import * 12 | -------------------------------------------------------------------------------- /projects/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=50, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook') 11 | ]) 12 | # yapf:enable 13 | dist_params = dict(backend='nccl') 14 | log_level = 'INFO' 15 | work_dir = None 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .transform_3d import ( 2 | PadMultiViewImage, NormalizeMultiviewImage, 3 | PhotoMetricDistortionMultiViewImage, CustomCollect3D, RandomScaleImageMultiViewImage) 4 | from .formating import CustomDefaultFormatBundle3D 5 | from .loading import FFrecordClient, LoadMultiViewImageFromFilesHF 6 | __all__ = [ 7 | 'PadMultiViewImage', 'NormalizeMultiviewImage', 8 | 'PhotoMetricDistortionMultiViewImage', 'CustomDefaultFormatBundle3D', 'CustomCollect3D', 9 | 'RandomScaleImageMultiViewImage', 'FFrecordClient', 'LoadMultiViewImageFromFilesHF' 10 | ] -------------------------------------------------------------------------------- /docs/getting_started.md: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | 3 | **Please ensure you have prepared the environment and the nuScenes dataset.** 4 | 5 | # Train and Test 6 | 7 | Train BEVFormer with 8 GPUs 8 | ``` 9 | ./tools/dist_train.sh ./projects/configs/bevformer/bevformer_base.py 8 10 | ``` 11 | 12 | Eval BEVFormer with 8 GPUs 13 | ``` 14 | ./tools/dist_test.sh ./projects/configs/bevformer/bevformer_base.py ./path/to/ckpts.pth 8 15 | ``` 16 | Note: using 1 GPU to eval can obtain slightly higher performance because continuous video may be truncated with multiple GPUs. By default we report the score evaled with 8 GPUs. 17 | 18 | 19 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cosine.py: -------------------------------------------------------------------------------- 1 | # This schedule is mainly used by models with dynamic voxelization 2 | # optimizer 3 | lr = 0.003 # max learning rate 4 | optimizer = dict( 5 | type='AdamW', 6 | lr=lr, 7 | betas=(0.95, 0.99), # the momentum is change during training 8 | weight_decay=0.001) 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 10 | 11 | lr_config = dict( 12 | policy='CosineAnnealing', 13 | warmup='linear', 14 | warmup_iters=1000, 15 | warmup_ratio=1.0 / 10, 16 | min_lr_ratio=1e-5) 17 | 18 | momentum_config = None 19 | 20 | runner = dict(type='EpochBasedRunner', max_epochs=40) 21 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/bricks.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import time 3 | from collections import defaultdict 4 | time_maps = defaultdict(lambda :0.) 5 | count_maps = defaultdict(lambda :0.) 6 | def run_time(name): 7 | def middle(fn): 8 | def wrapper(*args, **kwargs): 9 | start = time.time() 10 | res = fn(*args, **kwargs) 11 | time_maps['%s : %s'%(name, fn.__name__) ] += time.time()-start 12 | count_maps['%s : %s'%(name, fn.__name__) ] +=1 13 | print("%s : %s takes up %f "% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] )) 14 | return res 15 | return wrapper 16 | return middle 17 | -------------------------------------------------------------------------------- /tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | from mmcv import Config, DictAction 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser(description='Print the whole config') 8 | parser.add_argument('config', help='config file path') 9 | parser.add_argument( 10 | '--options', nargs='+', action=DictAction, help='arguments in dict') 11 | args = parser.parse_args() 12 | 13 | return args 14 | 15 | 16 | def main(): 17 | args = parse_args() 18 | 19 | cfg = Config.fromfile(args.config) 20 | if args.options is not None: 21 | cfg.merge_from_dict(args.options) 22 | print(f'Config:\n{cfg.pretty_text}') 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/visual.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision.utils import make_grid 3 | import torchvision 4 | import matplotlib.pyplot as plt 5 | import cv2 6 | 7 | 8 | def convert_color(img_path): 9 | plt.figure() 10 | img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 11 | plt.imsave(img_path, img, cmap=plt.get_cmap('viridis')) 12 | plt.close() 13 | 14 | 15 | def save_tensor(tensor, path, pad_value=254.0,): 16 | print('save_tensor', path) 17 | tensor = tensor.to(torch.float).detach().cpu() 18 | if tensor.type() == 'torch.BoolTensor': 19 | tensor = tensor*255 20 | if len(tensor.shape) == 3: 21 | tensor = tensor.unsqueeze(1) 22 | tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy() 23 | torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path) 24 | convert_color(path) 25 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cyclic_20e.py: -------------------------------------------------------------------------------- 1 | # For nuScenes dataset, we usually evaluate the model at the end of training. 2 | # Since the models are trained by 24 epochs by default, we set evaluation 3 | # interval to be 20. Please change the interval accordingly if you do not 4 | # use a default schedule. 5 | # optimizer 6 | # This schedule is mainly used by models on nuScenes dataset 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01) 8 | # max_norm=10 is better for SECOND 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 10 | lr_config = dict( 11 | policy='cyclic', 12 | target_ratio=(10, 1e-4), 13 | cyclic_times=1, 14 | step_ratio_up=0.4, 15 | ) 16 | momentum_config = dict( 17 | policy='cyclic', 18 | target_ratio=(0.85 / 0.95, 1), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | 23 | # runtime settings 24 | runner = dict(type='EpochBasedRunner', max_epochs=20) 25 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST 3 | 4 | 5 | @MATCH_COST.register_module() 6 | class BBox3DL1Cost(object): 7 | """BBox3DL1Cost. 8 | Args: 9 | weight (int | float, optional): loss_weight 10 | """ 11 | 12 | def __init__(self, weight=1.): 13 | self.weight = weight 14 | 15 | def __call__(self, bbox_pred, gt_bboxes): 16 | """ 17 | Args: 18 | bbox_pred (Tensor): Predicted boxes with normalized coordinates 19 | (cx, cy, w, h), which are all in range [0, 1]. Shape 20 | [num_query, 4]. 21 | gt_bboxes (Tensor): Ground truth boxes with normalized 22 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 23 | Returns: 24 | torch.Tensor: bbox_cost value with weight 25 | """ 26 | bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1) 27 | return bbox_cost * self.weight -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-80, -80, -5, 80, 80, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]), 15 | pts_middle_encoder=dict(output_shape=[640, 640]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-100, -100, -5, 100, 100, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]), 15 | pts_middle_encoder=dict(output_shape=[800, 800]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /docs/prepare_dataset.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## NuScenes 4 | Download nuScenes V1.0 full dataset data and CAN bus expansion data [HERE](https://www.nuscenes.org/download). Prepare nuscenes data by running 5 | 6 | 7 | **Download CAN bus expansion** 8 | ``` 9 | # download 'can_bus.zip' 10 | unzip can_bus.zip 11 | # move can_bus to data dir 12 | ``` 13 | 14 | **Prepare nuScenes data** 15 | 16 | *We genetate custom annotation files which are different from mmdet3d's* 17 | ``` 18 | python tools/create_data.py nuscenes --root-path ./data/nuscenes --out-dir ./data/nuscenes --extra-tag nuscenes --version v1.0 --canbus ./data 19 | ``` 20 | 21 | Using the above code will generate `nuscenes_infos_temporal_{train,val}.pkl`. 22 | 23 | **Folder structure** 24 | ``` 25 | bevformer 26 | ├── projects/ 27 | ├── tools/ 28 | ├── configs/ 29 | ├── ckpts/ 30 | │ ├── r101_dcn_fcos3d_pretrain.pth 31 | ├── data/ 32 | │ ├── can_bus/ 33 | │ ├── nuscenes/ 34 | │ │ ├── maps/ 35 | │ │ ├── samples/ 36 | │ │ ├── sweeps/ 37 | │ │ ├── v1.0-test/ 38 | | | ├── v1.0-trainval/ 39 | | | ├── nuscenes_infos_temporal_train.pkl 40 | | | ├── nuscenes_infos_temporal_val.pkl 41 | ``` 42 | -------------------------------------------------------------------------------- /tools/model_converters/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | import torch 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='Process a checkpoint to be published') 10 | parser.add_argument('in_file', help='input checkpoint filename') 11 | parser.add_argument('out_file', help='output checkpoint filename') 12 | args = parser.parse_args() 13 | return args 14 | 15 | 16 | def process_checkpoint(in_file, out_file): 17 | checkpoint = torch.load(in_file, map_location='cpu') 18 | # remove optimizer for smaller file size 19 | if 'optimizer' in checkpoint: 20 | del checkpoint['optimizer'] 21 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 22 | # add the code here. 23 | torch.save(checkpoint, out_file) 24 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 25 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 26 | subprocess.Popen(['mv', out_file, final_file]) 27 | 28 | 29 | def main(): 30 | args = parse_args() 31 | process_checkpoint(args.in_file, args.out_file) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/pointnet2_msg.py: -------------------------------------------------------------------------------- 1 | _base_ = './pointnet2_ssg.py' 2 | 3 | # model settings 4 | model = dict( 5 | backbone=dict( 6 | _delete_=True, 7 | type='PointNet2SAMSG', 8 | in_channels=6, # [xyz, rgb], should be modified with dataset 9 | num_points=(1024, 256, 64, 16), 10 | radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)), 11 | num_samples=((16, 32), (16, 32), (16, 32), (16, 32)), 12 | sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96, 13 | 128)), 14 | ((128, 196, 256), (128, 196, 256)), ((256, 256, 512), 15 | (256, 384, 512))), 16 | aggregation_channels=(None, None, None, None), 17 | fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')), 18 | fps_sample_range_lists=((-1), (-1), (-1), (-1)), 19 | dilated_group=(False, False, False, False), 20 | out_indices=(0, 1, 2, 3), 21 | sa_cfg=dict( 22 | type='PointSAModuleMSG', 23 | pool_mod='max', 24 | use_xyz=True, 25 | normalize_xyz=False)), 26 | decode_head=dict( 27 | fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128), 28 | (128, 128, 128, 128)))) 29 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/pointnet2_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=6, # [xyz, rgb], should be modified with dataset 7 | num_points=(1024, 256, 64, 16), 8 | radius=(0.1, 0.2, 0.4, 0.8), 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d'), 14 | sa_cfg=dict( 15 | type='PointSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False)), 19 | decode_head=dict( 20 | type='PointNet2Head', 21 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 22 | (128, 128, 128, 128)), 23 | channels=128, 24 | dropout_ratio=0.5, 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | act_cfg=dict(type='ReLU'), 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | class_weight=None, # should be modified with dataset 32 | loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict(), 35 | test_cfg=dict(mode='slide')) 36 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.utils.data import DistributedSampler as _DistributedSampler 5 | from .sampler import SAMPLER 6 | 7 | 8 | @SAMPLER.register_module() 9 | class DistributedSampler(_DistributedSampler): 10 | 11 | def __init__(self, 12 | dataset=None, 13 | num_replicas=None, 14 | rank=None, 15 | shuffle=True, 16 | seed=0): 17 | super().__init__( 18 | dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) 19 | # for the compatibility from PyTorch 1.3+ 20 | self.seed = seed if seed is not None else 0 21 | 22 | def __iter__(self): 23 | # deterministically shuffle based on epoch 24 | if self.shuffle: 25 | assert False 26 | else: 27 | indices = torch.arange(len(self.dataset)).tolist() 28 | 29 | # add extra samples to make it evenly divisible 30 | # in case that indices is shorter than half of total_size 31 | indices = (indices * 32 | math.ceil(self.total_size / len(indices)))[:self.total_size] 33 | assert len(indices) == self.total_size 34 | 35 | # subsample 36 | per_replicas = self.total_size//self.num_replicas 37 | # indices = indices[self.rank:self.total_size:self.num_replicas] 38 | indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas] 39 | assert len(indices) == self.num_samples 40 | 41 | return iter(indices) 42 | -------------------------------------------------------------------------------- /tools/data_converter/lyft_data_fixer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import numpy as np 4 | import os 5 | 6 | 7 | def fix_lyft(root_folder='./data/lyft', version='v1.01'): 8 | # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000 # noqa 9 | lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin' 10 | root_folder = os.path.join(root_folder, f'{version}-train') 11 | lidar_path = os.path.join(root_folder, lidar_path) 12 | assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \ 13 | f'dataset and make sure {lidar_path} is present.' 14 | points = np.fromfile(lidar_path, dtype=np.float32, count=-1) 15 | try: 16 | points.reshape([-1, 5]) 17 | print(f'This fix is not required for version {version}.') 18 | except ValueError: 19 | new_points = np.array(list(points) + [100.0, 1.0], dtype='float32') 20 | new_points.tofile(lidar_path) 21 | print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.') 22 | 23 | 24 | parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser') 25 | parser.add_argument( 26 | '--root-folder', 27 | type=str, 28 | default='./data/lyft', 29 | help='specify the root path of Lyft dataset') 30 | parser.add_argument( 31 | '--version', 32 | type=str, 33 | default='v1.01', 34 | help='specify Lyft dataset version') 35 | args = parser.parse_args() 36 | 37 | if __name__ == '__main__': 38 | fix_lyft(root_folder=args.root_folder, version=args.version) 39 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/position_embedding.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | 5 | class RelPositionEmbedding(nn.Module): 6 | def __init__(self, num_pos_feats=64, pos_norm=True): 7 | super().__init__() 8 | self.num_pos_feats = num_pos_feats 9 | self.fc = nn.Linear(4, self.num_pos_feats,bias=False) 10 | #nn.init.orthogonal_(self.fc.weight) 11 | #self.fc.weight.requires_grad = False 12 | self.pos_norm = pos_norm 13 | if self.pos_norm: 14 | self.norm = nn.LayerNorm(self.num_pos_feats) 15 | def forward(self, tensor): 16 | #mask = nesttensor.mask 17 | B,C,H,W = tensor.shape 18 | #print('tensor.shape', tensor.shape) 19 | y_range = (torch.arange(H) / float(H - 1)).to(tensor.device) 20 | #y_axis = torch.stack((y_range, 1-y_range),dim=1) 21 | y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1) 22 | y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2) 23 | 24 | x_range = (torch.arange(W) / float(W - 1)).to(tensor.device) 25 | #x_axis =torch.stack((x_range,1-x_range),dim=1) 26 | x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1) 27 | x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2) 28 | x_pos = torch.cat((y_axis, x_axis), dim=1) 29 | x_pos = self.fc(x_pos) 30 | 31 | if self.pos_norm: 32 | x_pos = self.norm(x_pos) 33 | #print('xpos,', x_pos.max(),x_pos.min()) 34 | return x_pos -------------------------------------------------------------------------------- /tools/misc/visualize_results.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import mmcv 4 | from mmcv import Config 5 | 6 | from mmdet3d.datasets import build_dataset 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser( 11 | description='MMDet3D visualize the results') 12 | parser.add_argument('config', help='test config file path') 13 | parser.add_argument('--result', help='results file in pickle format') 14 | parser.add_argument( 15 | '--show-dir', help='directory where visualize results will be saved') 16 | args = parser.parse_args() 17 | 18 | return args 19 | 20 | 21 | def main(): 22 | args = parse_args() 23 | 24 | if args.result is not None and \ 25 | not args.result.endswith(('.pkl', '.pickle')): 26 | raise ValueError('The results file must be a pkl file.') 27 | 28 | cfg = Config.fromfile(args.config) 29 | cfg.data.test.test_mode = True 30 | 31 | # build the dataset 32 | dataset = build_dataset(cfg.data.test) 33 | results = mmcv.load(args.result) 34 | 35 | if getattr(dataset, 'show', None) is not None: 36 | # data loading pipeline for showing 37 | eval_pipeline = cfg.get('eval_pipeline', {}) 38 | if eval_pipeline: 39 | dataset.show(results, args.show_dir, pipeline=eval_pipeline) 40 | else: 41 | dataset.show(results, args.show_dir) # use default pipeline 42 | else: 43 | raise NotImplementedError( 44 | 'Show is not implemented for dataset {}!'.format( 45 | type(dataset).__name__)) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def normalize_bbox(bboxes, pc_range): 5 | 6 | cx = bboxes[..., 0:1] 7 | cy = bboxes[..., 1:2] 8 | cz = bboxes[..., 2:3] 9 | w = bboxes[..., 3:4].log() 10 | l = bboxes[..., 4:5].log() 11 | h = bboxes[..., 5:6].log() 12 | 13 | rot = bboxes[..., 6:7] 14 | if bboxes.size(-1) > 7: 15 | vx = bboxes[..., 7:8] 16 | vy = bboxes[..., 8:9] 17 | normalized_bboxes = torch.cat( 18 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1 19 | ) 20 | else: 21 | normalized_bboxes = torch.cat( 22 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1 23 | ) 24 | return normalized_bboxes 25 | 26 | def denormalize_bbox(normalized_bboxes, pc_range): 27 | # rotation 28 | rot_sine = normalized_bboxes[..., 6:7] 29 | 30 | rot_cosine = normalized_bboxes[..., 7:8] 31 | rot = torch.atan2(rot_sine, rot_cosine) 32 | 33 | # center in the bev 34 | cx = normalized_bboxes[..., 0:1] 35 | cy = normalized_bboxes[..., 1:2] 36 | cz = normalized_bboxes[..., 4:5] 37 | 38 | # size 39 | w = normalized_bboxes[..., 2:3] 40 | l = normalized_bboxes[..., 3:4] 41 | h = normalized_bboxes[..., 5:6] 42 | 43 | w = w.exp() 44 | l = l.exp() 45 | h = h.exp() 46 | if normalized_bboxes.size(-1) > 8: 47 | # velocity 48 | vx = normalized_bboxes[:, 8:9] 49 | vy = normalized_bboxes[:, 9:10] 50 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1) 51 | else: 52 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1) 53 | return denormalized_bboxes -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cyclic_40e.py: -------------------------------------------------------------------------------- 1 | # The schedule is usually used by models trained on KITTI dataset 2 | 3 | # The learning rate set in the cyclic schedule is the initial learning rate 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4), 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4 6 | lr = 0.0018 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use 8 | # the offcial AdamW optimizer implemented by PyTorch. 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) 10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch 12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa 13 | # We implement them in mmcv, for more details, please refer to 14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa 15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa 16 | lr_config = dict( 17 | policy='cyclic', 18 | target_ratio=(10, 1e-4), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | momentum_config = dict( 23 | policy='cyclic', 24 | target_ratio=(0.85 / 0.95, 1), 25 | cyclic_times=1, 26 | step_ratio_up=0.4, 27 | ) 28 | # Although the max_epochs is 40, this schedule is usually used we 29 | # RepeatDataset with repeat ratio N, thus the actual max epoch 30 | # number could be Nx40 31 | runner = dict(type='EpochBasedRunner', max_epochs=40) 32 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/formating.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | import numpy as np 4 | from mmcv.parallel import DataContainer as DC 5 | 6 | from mmdet3d.core.bbox import BaseInstance3DBoxes 7 | from mmdet3d.core.points import BasePoints 8 | from mmdet.datasets.builder import PIPELINES 9 | from mmdet.datasets.pipelines import to_tensor 10 | from mmdet3d.datasets.pipelines import DefaultFormatBundle3D 11 | 12 | @PIPELINES.register_module() 13 | class CustomDefaultFormatBundle3D(DefaultFormatBundle3D): 14 | """Default formatting bundle. 15 | It simplifies the pipeline of formatting common fields for voxels, 16 | including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and 17 | "gt_semantic_seg". 18 | These fields are formatted as follows. 19 | - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) 20 | - proposals: (1)to tensor, (2)to DataContainer 21 | - gt_bboxes: (1)to tensor, (2)to DataContainer 22 | - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer 23 | - gt_labels: (1)to tensor, (2)to DataContainer 24 | """ 25 | 26 | def __call__(self, results): 27 | """Call function to transform and format common fields in results. 28 | Args: 29 | results (dict): Result dict contains the data to convert. 30 | Returns: 31 | dict: The result dict contains the data that is formatted with 32 | default bundle. 33 | """ 34 | # Format 3D data 35 | results = super(CustomDefaultFormatBundle3D, self).__call__(results) 36 | results['gt_map_masks'] = DC( 37 | to_tensor(results['gt_map_masks']), stack=True) 38 | 39 | return results -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | # Step-by-step installation instructions 2 | 3 | Following https://mmdetection3d.readthedocs.io/en/latest/getting_started.html#installation 4 | 5 | 6 | 7 | **a. Create a conda virtual environment and activate it.** 8 | ```shell 9 | conda create -n open-mmlab python=3.8 -y 10 | conda activate open-mmlab 11 | ``` 12 | 13 | **b. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/).** 14 | ```shell 15 | pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html 16 | # Recommended torch>=1.9 17 | 18 | ``` 19 | 20 | **c. Install gcc>=5 in conda env (optional).** 21 | ```shell 22 | conda install -c omgarcia gcc-6 # gcc-6.2 23 | ``` 24 | 25 | **c. Install mmcv-full.** 26 | ```shell 27 | pip install mmcv-full==1.4.0 28 | # pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html 29 | ``` 30 | 31 | **d. Install mmdet and mmseg.** 32 | ```shell 33 | pip install mmdet==2.14.0 34 | pip install mmsegmentation==0.14.1 35 | ``` 36 | 37 | **e. Install mmdet3d from source code.** 38 | ```shell 39 | git clone https://github.com/open-mmlab/mmdetection3d.git 40 | cd mmdetection3d 41 | git checkout v0.17.1 # Other versions may not be compatible. 42 | python setup.py install 43 | ``` 44 | 45 | **f. Install timm.** 46 | ```shell 47 | pip install timm 48 | ``` 49 | 50 | 51 | **g. Clone BEVFormer.** 52 | ``` 53 | git clone https://github.com/zhiqi-li/BEVFormer.git 54 | ``` 55 | 56 | **h. Prepare pretrained models.** 57 | ```shell 58 | cd bevformer 59 | mkdir ckpts 60 | 61 | cd ckpts & wget https://github.com/zhiqi-li/storage/releases/download/v1.0/r101_dcn_fcos3d_pretrain.pth 62 | ``` 63 | 64 | note: this pretrained model is the same model used in [detr3d](https://github.com/WangYueFt/detr3d) 65 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 9 | dict(type='RandomFlip', flip_ratio=0.5), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='Pad', size_divisor=32), 12 | dict(type='DefaultFormatBundle'), 13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 14 | ] 15 | test_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict( 18 | type='MultiScaleFlipAug', 19 | img_scale=(1333, 800), 20 | flip=False, 21 | transforms=[ 22 | dict(type='Resize', keep_ratio=True), 23 | dict(type='RandomFlip'), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='Pad', size_divisor=32), 26 | dict(type='ImageToTensor', keys=['img']), 27 | dict(type='Collect', keys=['img']), 28 | ]) 29 | ] 30 | data = dict( 31 | samples_per_gpu=2, 32 | workers_per_gpu=2, 33 | train=dict( 34 | type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_val2017.json', 41 | img_prefix=data_root + 'val2017/', 42 | pipeline=test_pipeline), 43 | test=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline)) 48 | evaluation = dict(metric=['bbox', 'segm']) 49 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/train.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------- 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | # --------------------------------------------- 4 | # Modified by Zhiqi Li 5 | # --------------------------------------------- 6 | 7 | from .mmdet_train import custom_train_detector 8 | from mmseg.apis import train_segmentor 9 | from mmdet.apis import train_detector 10 | 11 | def custom_train_model(model, 12 | dataset, 13 | cfg, 14 | distributed=False, 15 | validate=False, 16 | timestamp=None, 17 | meta=None): 18 | """A function wrapper for launching model training according to cfg. 19 | 20 | Because we need different eval_hook in runner. Should be deprecated in the 21 | future. 22 | """ 23 | if cfg.model.type in ['EncoderDecoder3D']: 24 | assert False 25 | else: 26 | custom_train_detector( 27 | model, 28 | dataset, 29 | cfg, 30 | distributed=distributed, 31 | validate=validate, 32 | timestamp=timestamp, 33 | meta=meta) 34 | 35 | 36 | def train_model(model, 37 | dataset, 38 | cfg, 39 | distributed=False, 40 | validate=False, 41 | timestamp=None, 42 | meta=None): 43 | """A function wrapper for launching model training according to cfg. 44 | 45 | Because we need different eval_hook in runner. Should be deprecated in the 46 | future. 47 | """ 48 | if cfg.model.type in ['EncoderDecoder3D']: 49 | train_segmentor( 50 | model, 51 | dataset, 52 | cfg, 53 | distributed=distributed, 54 | validate=validate, 55 | timestamp=timestamp, 56 | meta=meta) 57 | else: 58 | train_detector( 59 | model, 60 | dataset, 61 | cfg, 62 | distributed=distributed, 63 | validate=validate, 64 | timestamp=timestamp, 65 | meta=meta) 66 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/paconv_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=9, # [xyz, rgb, normalized_xyz] 7 | num_points=(1024, 256, 64, 16), 8 | radius=(None, None, None, None), # use kNN instead of ball query 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d', momentum=0.1), 14 | sa_cfg=dict( 15 | type='PAConvSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False, 19 | paconv_num_kernels=[16, 16, 16], 20 | paconv_kernel_input='w_neighbor', 21 | scorenet_input='w_neighbor_dist', 22 | scorenet_cfg=dict( 23 | mlp_channels=[16, 16, 16], 24 | score_norm='softmax', 25 | temp_factor=1.0, 26 | last_bn=False))), 27 | decode_head=dict( 28 | type='PAConvHead', 29 | # PAConv model's decoder takes skip connections from beckbone 30 | # different from PointNet++, it also concats input features in the last 31 | # level of decoder, leading to `128 + 6` as the channel number 32 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 33 | (128 + 6, 128, 128, 128)), 34 | channels=128, 35 | dropout_ratio=0.5, 36 | conv_cfg=dict(type='Conv1d'), 37 | norm_cfg=dict(type='BN1d'), 38 | act_cfg=dict(type='ReLU'), 39 | loss_decode=dict( 40 | type='CrossEntropyLoss', 41 | use_sigmoid=False, 42 | class_weight=None, # should be modified with dataset 43 | loss_weight=1.0)), 44 | # correlation loss to regularize PAConv's kernel weights 45 | loss_regularization=dict( 46 | type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='slide')) 50 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/nuim_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/nuimages/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | img_norm_cfg = dict( 8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 12 | dict( 13 | type='Resize', 14 | img_scale=[(1280, 720), (1920, 1080)], 15 | multiscale_mode='range', 16 | keep_ratio=True), 17 | dict(type='RandomFlip', flip_ratio=0.5), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 22 | ] 23 | test_pipeline = [ 24 | dict(type='LoadImageFromFile'), 25 | dict( 26 | type='MultiScaleFlipAug', 27 | img_scale=(1600, 900), 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='Pad', size_divisor=32), 34 | dict(type='ImageToTensor', keys=['img']), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | data = dict( 39 | samples_per_gpu=2, 40 | workers_per_gpu=2, 41 | train=dict( 42 | type=dataset_type, 43 | ann_file=data_root + 'annotations/nuimages_v1.0-train.json', 44 | img_prefix=data_root, 45 | classes=class_names, 46 | pipeline=train_pipeline), 47 | val=dict( 48 | type=dataset_type, 49 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 50 | img_prefix=data_root, 51 | classes=class_names, 52 | pipeline=test_pipeline), 53 | test=dict( 54 | type=dataset_type, 55 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 56 | img_prefix=data_root, 57 | classes=class_names, 58 | pipeline=test_pipeline)) 59 | evaluation = dict(metric=['bbox', 'segm']) 60 | -------------------------------------------------------------------------------- /tools/misc/fuse_conv_bn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import torch 4 | from mmcv.runner import save_checkpoint 5 | from torch import nn as nn 6 | 7 | from mmdet.apis import init_model 8 | 9 | 10 | def fuse_conv_bn(conv, bn): 11 | """During inference, the functionary of batch norm layers is turned off but 12 | only the mean and var alone channels are used, which exposes the chance to 13 | fuse it with the preceding conv layers to save computations and simplify 14 | network structures.""" 15 | conv_w = conv.weight 16 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like( 17 | bn.running_mean) 18 | 19 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) 20 | conv.weight = nn.Parameter(conv_w * 21 | factor.reshape([conv.out_channels, 1, 1, 1])) 22 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) 23 | return conv 24 | 25 | 26 | def fuse_module(m): 27 | last_conv = None 28 | last_conv_name = None 29 | 30 | for name, child in m.named_children(): 31 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)): 32 | if last_conv is None: # only fuse BN that is after Conv 33 | continue 34 | fused_conv = fuse_conv_bn(last_conv, child) 35 | m._modules[last_conv_name] = fused_conv 36 | # To reduce changes, set BN as Identity instead of deleting it. 37 | m._modules[name] = nn.Identity() 38 | last_conv = None 39 | elif isinstance(child, nn.Conv2d): 40 | last_conv = child 41 | last_conv_name = name 42 | else: 43 | fuse_module(child) 44 | return m 45 | 46 | 47 | def parse_args(): 48 | parser = argparse.ArgumentParser( 49 | description='fuse Conv and BN layers in a model') 50 | parser.add_argument('config', help='config file path') 51 | parser.add_argument('checkpoint', help='checkpoint file path') 52 | parser.add_argument('out', help='output path of the converted model') 53 | args = parser.parse_args() 54 | return args 55 | 56 | 57 | def main(): 58 | args = parse_args() 59 | # build the model from a config file and a checkpoint file 60 | model = init_model(args.config, args.checkpoint) 61 | # fuse conv and bn layers of the model 62 | fused_model = fuse_module(model) 63 | save_checkpoint(fused_model, args.out) 64 | 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/fcos3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='FCOSMono3D', 3 | pretrained='open-mmlab://detectron2/resnet101_caffe', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=False), 11 | norm_eval=True, 12 | style='caffe'), 13 | neck=dict( 14 | type='FPN', 15 | in_channels=[256, 512, 1024, 2048], 16 | out_channels=256, 17 | start_level=1, 18 | add_extra_convs='on_output', 19 | num_outs=5, 20 | relu_before_extra_convs=True), 21 | bbox_head=dict( 22 | type='FCOSMono3DHead', 23 | num_classes=10, 24 | in_channels=256, 25 | stacked_convs=2, 26 | feat_channels=256, 27 | use_direction_classifier=True, 28 | diff_rad_by_sin=True, 29 | pred_attrs=True, 30 | pred_velo=True, 31 | dir_offset=0.7854, # pi/4 32 | strides=[8, 16, 32, 64, 128], 33 | group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo 34 | cls_branch=(256, ), 35 | reg_branch=( 36 | (256, ), # offset 37 | (256, ), # depth 38 | (256, ), # size 39 | (256, ), # rot 40 | () # velo 41 | ), 42 | dir_branch=(256, ), 43 | attr_branch=(256, ), 44 | loss_cls=dict( 45 | type='FocalLoss', 46 | use_sigmoid=True, 47 | gamma=2.0, 48 | alpha=0.25, 49 | loss_weight=1.0), 50 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 51 | loss_dir=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_attr=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 57 | norm_on_bbox=True, 58 | centerness_on_reg=True, 59 | center_sampling=True, 60 | conv_bias=True, 61 | dcn_on_last_conv=True), 62 | train_cfg=dict( 63 | allowed_border=0, 64 | code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05], 65 | pos_weight=-1, 66 | debug=False), 67 | test_cfg=dict( 68 | use_rotate_nms=True, 69 | nms_across_levels=False, 70 | nms_pre=1000, 71 | nms_thr=0.8, 72 | score_thr=0.05, 73 | min_bbox_size=0, 74 | max_per_img=200)) 75 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # BEVFormer: a Cutting-edge Baseline for Camera-based Detection 4 |
5 | 6 | 7 | > **BEVFormer: Learning Bird's-Eye-View Representation from Multi-Camera Images via Spatiotemporal Transformers** 8 | > - [Paper](http://arxiv.org/abs/2203.17270) | [Blog](https://www.zhihu.com/question/521842610/answer/2431585901) (in Chinese) | Presentation Slides at CVPR 2022 Workshop (soon) | Live-streaming video on BEV Perception (soon) 9 | 10 | 11 | 12 | # Abstract 13 | In this work, the authors present a new framework termed BEVFormer, which learns unified BEV representations with spatiotemporal transformers to support multiple autonomous driving perception tasks. In a nutshell, BEVFormer exploits both spatial and temporal information by interacting with spatial and temporal space through predefined grid-shaped BEV queries. To aggregate spatial information, the authors design a spatial cross-attention that each BEV query extracts the spatial features from the regions of interest across camera views. For temporal information, the authors propose a temporal self-attention to recurrently fuse the history BEV information. 14 | The proposed approach achieves the new state-of-the-art **56.9\%** in terms of NDS metric on the nuScenes test set, which is **9.0** points higher than previous best arts and on par with the performance of LiDAR-based baselines. 15 | 16 | 17 | # Methods 18 | ![method](figs/arch.png "model arch") 19 | 20 | 21 | # Getting Started 22 | - [Installation](docs/install.md) 23 | - [Prepare Dataset](docs/prepare_dataset.md) 24 | - [Run and Eval](docs/getting_started.md) 25 | 26 | 27 | # HFai Adaptation 28 | 29 | Follow [hf_guide](./hf_guide.md) to adapt to Fire-Flyer II. 30 | 31 | Train BEVFormer with 10 Nodes 32 | ``` 33 | hfai python tools/train.py projects/configs/bevformer/bevformer_base.py --work-dir out/node10_train --cfg-options optimizer.lr=0.0008 -- --nodes 10 --priority 40 --name node10_train 34 | ``` 35 | 36 | Eval BEVFormer with 10 Nodes 37 | ``` 38 | hfai python tools/test.py projects/configs/bevformer/bevformer_base.py out/node10_train/epoch_24.pth --launcher pytorch --eval bbox -- --nodes 10 --priority 40 --name node10_test 39 | ``` 40 | 41 | # Bibtex 42 | If this work is helpful for your research, please consider citing the following BibTeX entry. 43 | 44 | ``` 45 | @article{li2022bevformer, 46 | title={BEVFormer: Learning Bird’s-Eye-View Representation from Multi-Camera Images via Spatiotemporal Transformers}, 47 | author={Li, Zhiqi and Wang, Wenhai and Li, Hongyang and Xie, Enze and Sima, Chonghao and Lu, Tong and Qiao, Yu and Dai, Jifeng} 48 | journal={arXiv preprint arXiv:2203.17270}, 49 | year={2022} 50 | } 51 | ``` 52 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/votenet.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='VoteNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=4, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 256)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='VoteHead', 20 | vote_module_cfg=dict( 21 | in_channels=256, 22 | vote_per_seed=1, 23 | gt_per_seed=3, 24 | conv_channels=(256, 256), 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | norm_feats=True, 28 | vote_loss=dict( 29 | type='ChamferDistance', 30 | mode='l1', 31 | reduction='none', 32 | loss_dst_weight=10.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModule', 35 | num_point=256, 36 | radius=0.3, 37 | num_sample=16, 38 | mlp_channels=[256, 128, 128, 128], 39 | use_xyz=True, 40 | normalize_xyz=True), 41 | pred_layer_cfg=dict( 42 | in_channels=128, shared_conv_channels=(128, 128), bias=True), 43 | conv_cfg=dict(type='Conv1d'), 44 | norm_cfg=dict(type='BN1d'), 45 | objectness_loss=dict( 46 | type='CrossEntropyLoss', 47 | class_weight=[0.2, 0.8], 48 | reduction='sum', 49 | loss_weight=5.0), 50 | center_loss=dict( 51 | type='ChamferDistance', 52 | mode='l2', 53 | reduction='sum', 54 | loss_src_weight=10.0, 55 | loss_dst_weight=10.0), 56 | dir_class_loss=dict( 57 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 58 | dir_res_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 60 | size_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | size_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0), 64 | semantic_loss=dict( 65 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 66 | # model training and testing settings 67 | train_cfg=dict( 68 | pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'), 69 | test_cfg=dict( 70 | sample_mod='seed', 71 | nms_thr=0.25, 72 | score_thr=0.05, 73 | per_class_proposal=True)) 74 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/groupfree3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='GroupFree3DNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=3, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 288)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='GroupFree3DHead', 20 | in_channels=288, 21 | num_decoder_layers=6, 22 | num_proposal=256, 23 | transformerlayers=dict( 24 | type='BaseTransformerLayer', 25 | attn_cfgs=dict( 26 | type='GroupFree3DMHA', 27 | embed_dims=288, 28 | num_heads=8, 29 | attn_drop=0.1, 30 | dropout_layer=dict(type='Dropout', drop_prob=0.1)), 31 | ffn_cfgs=dict( 32 | embed_dims=288, 33 | feedforward_channels=2048, 34 | ffn_drop=0.1, 35 | act_cfg=dict(type='ReLU', inplace=True)), 36 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 37 | 'norm')), 38 | pred_layer_cfg=dict( 39 | in_channels=288, shared_conv_channels=(288, 288), bias=True), 40 | sampling_objectness_loss=dict( 41 | type='FocalLoss', 42 | use_sigmoid=True, 43 | gamma=2.0, 44 | alpha=0.25, 45 | loss_weight=8.0), 46 | objectness_loss=dict( 47 | type='FocalLoss', 48 | use_sigmoid=True, 49 | gamma=2.0, 50 | alpha=0.25, 51 | loss_weight=1.0), 52 | center_loss=dict( 53 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 54 | dir_class_loss=dict( 55 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 56 | dir_res_loss=dict( 57 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 58 | size_class_loss=dict( 59 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 60 | size_res_loss=dict( 61 | type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0), 62 | semantic_loss=dict( 63 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 64 | # model training and testing settings 65 | train_cfg=dict(sample_mod='kps'), 66 | test_cfg=dict( 67 | sample_mod='kps', 68 | nms_thr=0.25, 69 | score_thr=0.0, 70 | per_class_proposal=True, 71 | prediction_stages='last')) 72 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_second_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.05, 0.05, 0.1] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=5, 7 | point_cloud_range=[0, -40, -3, 70.4, 40, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000)), 10 | voxel_encoder=dict(type='HardSimpleVFE'), 11 | middle_encoder=dict( 12 | type='SparseEncoder', 13 | in_channels=4, 14 | sparse_shape=[41, 1600, 1408], 15 | order=('conv', 'norm', 'act')), 16 | backbone=dict( 17 | type='SECOND', 18 | in_channels=256, 19 | layer_nums=[5, 5], 20 | layer_strides=[1, 2], 21 | out_channels=[128, 256]), 22 | neck=dict( 23 | type='SECONDFPN', 24 | in_channels=[128, 256], 25 | upsample_strides=[1, 2], 26 | out_channels=[256, 256]), 27 | bbox_head=dict( 28 | type='Anchor3DHead', 29 | num_classes=3, 30 | in_channels=512, 31 | feat_channels=512, 32 | use_direction_classifier=True, 33 | anchor_generator=dict( 34 | type='Anchor3DRangeGenerator', 35 | ranges=[ 36 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 37 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 38 | [0, -40.0, -1.78, 70.4, 40.0, -1.78], 39 | ], 40 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], 41 | rotations=[0, 1.57], 42 | reshape_out=False), 43 | diff_rad_by_sin=True, 44 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 45 | loss_cls=dict( 46 | type='FocalLoss', 47 | use_sigmoid=True, 48 | gamma=2.0, 49 | alpha=0.25, 50 | loss_weight=1.0), 51 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 52 | loss_dir=dict( 53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 54 | # model training and testing settings 55 | train_cfg=dict( 56 | assigner=[ 57 | dict( # for Pedestrian 58 | type='MaxIoUAssigner', 59 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 60 | pos_iou_thr=0.35, 61 | neg_iou_thr=0.2, 62 | min_pos_iou=0.2, 63 | ignore_iof_thr=-1), 64 | dict( # for Cyclist 65 | type='MaxIoUAssigner', 66 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 67 | pos_iou_thr=0.35, 68 | neg_iou_thr=0.2, 69 | min_pos_iou=0.2, 70 | ignore_iof_thr=-1), 71 | dict( # for Car 72 | type='MaxIoUAssigner', 73 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 74 | pos_iou_thr=0.6, 75 | neg_iou_thr=0.45, 76 | min_pos_iou=0.45, 77 | ignore_iof_thr=-1), 78 | ], 79 | allowed_border=0, 80 | pos_weight=-1, 81 | debug=False), 82 | test_cfg=dict( 83 | use_rotate_nms=True, 84 | nms_across_levels=False, 85 | nms_thr=0.01, 86 | score_thr=0.1, 87 | min_bbox_size=0, 88 | nms_pre=100, 89 | max_num=50)) 90 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/3dssd.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='SSD3DNet', 3 | backbone=dict( 4 | type='PointNet2SAMSG', 5 | in_channels=4, 6 | num_points=(4096, 512, (256, 256)), 7 | radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)), 8 | num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)), 9 | sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)), 10 | ((64, 64, 128), (64, 64, 128), (64, 96, 128)), 11 | ((128, 128, 256), (128, 192, 256), (128, 256, 256))), 12 | aggregation_channels=(64, 128, 256), 13 | fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')), 14 | fps_sample_range_lists=((-1), (-1), (512, -1)), 15 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 16 | sa_cfg=dict( 17 | type='PointSAModuleMSG', 18 | pool_mod='max', 19 | use_xyz=True, 20 | normalize_xyz=False)), 21 | bbox_head=dict( 22 | type='SSD3DHead', 23 | in_channels=256, 24 | vote_module_cfg=dict( 25 | in_channels=256, 26 | num_points=256, 27 | gt_per_seed=1, 28 | conv_channels=(128, ), 29 | conv_cfg=dict(type='Conv1d'), 30 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 31 | with_res_feat=False, 32 | vote_xyz_range=(3.0, 3.0, 2.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModuleMSG', 35 | num_point=256, 36 | radii=(4.8, 6.4), 37 | sample_nums=(16, 32), 38 | mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)), 39 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 40 | use_xyz=True, 41 | normalize_xyz=False, 42 | bias=True), 43 | pred_layer_cfg=dict( 44 | in_channels=1536, 45 | shared_conv_channels=(512, 128), 46 | cls_conv_channels=(128, ), 47 | reg_conv_channels=(128, ), 48 | conv_cfg=dict(type='Conv1d'), 49 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 50 | bias=True), 51 | conv_cfg=dict(type='Conv1d'), 52 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 53 | objectness_loss=dict( 54 | type='CrossEntropyLoss', 55 | use_sigmoid=True, 56 | reduction='sum', 57 | loss_weight=1.0), 58 | center_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 60 | dir_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | dir_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 64 | size_res_loss=dict( 65 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 66 | corner_loss=dict( 67 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 68 | vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05), 72 | test_cfg=dict( 73 | nms_cfg=dict(type='nms', iou_thr=0.1), 74 | sample_mod='spec', 75 | score_thr=0.0, 76 | per_class_proposal=True, 77 | max_output_num=100)) 78 | -------------------------------------------------------------------------------- /tools/model_converters/regnet2mmdet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import torch 4 | from collections import OrderedDict 5 | 6 | 7 | def convert_stem(model_key, model_weight, state_dict, converted_names): 8 | new_key = model_key.replace('stem.conv', 'conv1') 9 | new_key = new_key.replace('stem.bn', 'bn1') 10 | state_dict[new_key] = model_weight 11 | converted_names.add(model_key) 12 | print(f'Convert {model_key} to {new_key}') 13 | 14 | 15 | def convert_head(model_key, model_weight, state_dict, converted_names): 16 | new_key = model_key.replace('head.fc', 'fc') 17 | state_dict[new_key] = model_weight 18 | converted_names.add(model_key) 19 | print(f'Convert {model_key} to {new_key}') 20 | 21 | 22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names): 23 | split_keys = model_key.split('.') 24 | layer, block, module = split_keys[:3] 25 | block_id = int(block[1:]) 26 | layer_name = f'layer{int(layer[1:])}' 27 | block_name = f'{block_id - 1}' 28 | 29 | if block_id == 1 and module == 'bn': 30 | new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}' 31 | elif block_id == 1 and module == 'proj': 32 | new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}' 33 | elif module == 'f': 34 | if split_keys[3] == 'a_bn': 35 | module_name = 'bn1' 36 | elif split_keys[3] == 'b_bn': 37 | module_name = 'bn2' 38 | elif split_keys[3] == 'c_bn': 39 | module_name = 'bn3' 40 | elif split_keys[3] == 'a': 41 | module_name = 'conv1' 42 | elif split_keys[3] == 'b': 43 | module_name = 'conv2' 44 | elif split_keys[3] == 'c': 45 | module_name = 'conv3' 46 | new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}' 47 | else: 48 | raise ValueError(f'Unsupported conversion of key {model_key}') 49 | print(f'Convert {model_key} to {new_key}') 50 | state_dict[new_key] = model_weight 51 | converted_names.add(model_key) 52 | 53 | 54 | def convert(src, dst): 55 | """Convert keys in pycls pretrained RegNet models to mmdet style.""" 56 | # load caffe model 57 | regnet_model = torch.load(src) 58 | blobs = regnet_model['model_state'] 59 | # convert to pytorch style 60 | state_dict = OrderedDict() 61 | converted_names = set() 62 | for key, weight in blobs.items(): 63 | if 'stem' in key: 64 | convert_stem(key, weight, state_dict, converted_names) 65 | elif 'head' in key: 66 | convert_head(key, weight, state_dict, converted_names) 67 | elif key.startswith('s'): 68 | convert_reslayer(key, weight, state_dict, converted_names) 69 | 70 | # check if all layers are converted 71 | for key in blobs: 72 | if key not in converted_names: 73 | print(f'not converted: {key}') 74 | # save checkpoint 75 | checkpoint = dict() 76 | checkpoint['state_dict'] = state_dict 77 | torch.save(checkpoint, dst) 78 | 79 | 80 | def main(): 81 | parser = argparse.ArgumentParser(description='Convert model keys') 82 | parser.add_argument('src', help='src detectron model path') 83 | parser.add_argument('dst', help='save path') 84 | args = parser.parse_args() 85 | convert(args.src, args.dst) 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.16, 0.16, 4] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=32, # max_points_per_voxel 7 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000) # (training, testing) max_voxels 10 | ), 11 | voxel_encoder=dict( 12 | type='PillarFeatureNet', 13 | in_channels=4, 14 | feat_channels=[64], 15 | with_distance=False, 16 | voxel_size=voxel_size, 17 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]), 18 | middle_encoder=dict( 19 | type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=64, 23 | layer_nums=[3, 5, 5], 24 | layer_strides=[2, 2, 2], 25 | out_channels=[64, 128, 256]), 26 | neck=dict( 27 | type='SECONDFPN', 28 | in_channels=[64, 128, 256], 29 | upsample_strides=[1, 2, 4], 30 | out_channels=[128, 128, 128]), 31 | bbox_head=dict( 32 | type='Anchor3DHead', 33 | num_classes=3, 34 | in_channels=384, 35 | feat_channels=384, 36 | use_direction_classifier=True, 37 | anchor_generator=dict( 38 | type='Anchor3DRangeGenerator', 39 | ranges=[ 40 | [0, -39.68, -0.6, 70.4, 39.68, -0.6], 41 | [0, -39.68, -0.6, 70.4, 39.68, -0.6], 42 | [0, -39.68, -1.78, 70.4, 39.68, -1.78], 43 | ], 44 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], 45 | rotations=[0, 1.57], 46 | reshape_out=False), 47 | diff_rad_by_sin=True, 48 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 49 | loss_cls=dict( 50 | type='FocalLoss', 51 | use_sigmoid=True, 52 | gamma=2.0, 53 | alpha=0.25, 54 | loss_weight=1.0), 55 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 56 | loss_dir=dict( 57 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 58 | # model training and testing settings 59 | train_cfg=dict( 60 | assigner=[ 61 | dict( # for Pedestrian 62 | type='MaxIoUAssigner', 63 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 64 | pos_iou_thr=0.5, 65 | neg_iou_thr=0.35, 66 | min_pos_iou=0.35, 67 | ignore_iof_thr=-1), 68 | dict( # for Cyclist 69 | type='MaxIoUAssigner', 70 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 71 | pos_iou_thr=0.5, 72 | neg_iou_thr=0.35, 73 | min_pos_iou=0.35, 74 | ignore_iof_thr=-1), 75 | dict( # for Car 76 | type='MaxIoUAssigner', 77 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 78 | pos_iou_thr=0.6, 79 | neg_iou_thr=0.45, 80 | min_pos_iou=0.45, 81 | ignore_iof_thr=-1), 82 | ], 83 | allowed_border=0, 84 | pos_weight=-1, 85 | debug=False), 86 | test_cfg=dict( 87 | use_rotate_nms=True, 88 | nms_across_levels=False, 89 | nms_thr=0.01, 90 | score_thr=0.1, 91 | min_bbox_size=0, 92 | nms_pre=100, 93 | max_num=50)) 94 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.2, 0.2, 8] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)), 6 | pts_voxel_encoder=dict( 7 | type='PillarFeatureNet', 8 | in_channels=5, 9 | feat_channels=[64], 10 | with_distance=False, 11 | voxel_size=(0.2, 0.2, 8), 12 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), 13 | legacy=False), 14 | pts_middle_encoder=dict( 15 | type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)), 16 | pts_backbone=dict( 17 | type='SECOND', 18 | in_channels=64, 19 | out_channels=[64, 128, 256], 20 | layer_nums=[3, 5, 5], 21 | layer_strides=[2, 2, 2], 22 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 23 | conv_cfg=dict(type='Conv2d', bias=False)), 24 | pts_neck=dict( 25 | type='SECONDFPN', 26 | in_channels=[64, 128, 256], 27 | out_channels=[128, 128, 128], 28 | upsample_strides=[0.5, 1, 2], 29 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 30 | upsample_cfg=dict(type='deconv', bias=False), 31 | use_conv_for_no_stride=True), 32 | pts_bbox_head=dict( 33 | type='CenterHead', 34 | in_channels=sum([128, 128, 128]), 35 | tasks=[ 36 | dict(num_class=1, class_names=['car']), 37 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 38 | dict(num_class=2, class_names=['bus', 'trailer']), 39 | dict(num_class=1, class_names=['barrier']), 40 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 41 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 42 | ], 43 | common_heads=dict( 44 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 45 | share_conv_channel=64, 46 | bbox_coder=dict( 47 | type='CenterPointBBoxCoder', 48 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 49 | max_num=500, 50 | score_threshold=0.1, 51 | out_size_factor=4, 52 | voxel_size=voxel_size[:2], 53 | code_size=9), 54 | separate_head=dict( 55 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 56 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 57 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 58 | norm_bbox=True), 59 | # model training and testing settings 60 | train_cfg=dict( 61 | pts=dict( 62 | grid_size=[512, 512, 1], 63 | voxel_size=voxel_size, 64 | out_size_factor=4, 65 | dense_reg=1, 66 | gaussian_overlap=0.1, 67 | max_objs=500, 68 | min_radius=2, 69 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 70 | test_cfg=dict( 71 | pts=dict( 72 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 73 | max_per_img=500, 74 | max_pool_nms=False, 75 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 76 | score_threshold=0.1, 77 | pc_range=[-51.2, -51.2], 78 | out_size_factor=4, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.1, 0.1, 0.2] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)), 6 | pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 7 | pts_middle_encoder=dict( 8 | type='SparseEncoder', 9 | in_channels=5, 10 | sparse_shape=[41, 1024, 1024], 11 | output_channels=128, 12 | order=('conv', 'norm', 'act'), 13 | encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 14 | 128)), 15 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)), 16 | block_type='basicblock'), 17 | pts_backbone=dict( 18 | type='SECOND', 19 | in_channels=256, 20 | out_channels=[128, 256], 21 | layer_nums=[5, 5], 22 | layer_strides=[1, 2], 23 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 24 | conv_cfg=dict(type='Conv2d', bias=False)), 25 | pts_neck=dict( 26 | type='SECONDFPN', 27 | in_channels=[128, 256], 28 | out_channels=[256, 256], 29 | upsample_strides=[1, 2], 30 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 31 | upsample_cfg=dict(type='deconv', bias=False), 32 | use_conv_for_no_stride=True), 33 | pts_bbox_head=dict( 34 | type='CenterHead', 35 | in_channels=sum([256, 256]), 36 | tasks=[ 37 | dict(num_class=1, class_names=['car']), 38 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 39 | dict(num_class=2, class_names=['bus', 'trailer']), 40 | dict(num_class=1, class_names=['barrier']), 41 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 42 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 43 | ], 44 | common_heads=dict( 45 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 46 | share_conv_channel=64, 47 | bbox_coder=dict( 48 | type='CenterPointBBoxCoder', 49 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 50 | max_num=500, 51 | score_threshold=0.1, 52 | out_size_factor=8, 53 | voxel_size=voxel_size[:2], 54 | code_size=9), 55 | separate_head=dict( 56 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 57 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 58 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 59 | norm_bbox=True), 60 | # model training and testing settings 61 | train_cfg=dict( 62 | pts=dict( 63 | grid_size=[1024, 1024, 40], 64 | voxel_size=voxel_size, 65 | out_size_factor=8, 66 | dense_reg=1, 67 | gaussian_overlap=0.1, 68 | max_objs=500, 69 | min_radius=2, 70 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 71 | test_cfg=dict( 72 | pts=dict( 73 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 74 | max_per_img=500, 75 | max_pool_nms=False, 76 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 77 | score_threshold=0.1, 78 | out_size_factor=8, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /tools/analysis_tools/benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import time 4 | import torch 5 | from mmcv import Config 6 | from mmcv.parallel import MMDataParallel 7 | from mmcv.runner import load_checkpoint, wrap_fp16_model 8 | import sys 9 | sys.path.append('.') 10 | from projects.mmdet3d_plugin.datasets.builder import build_dataloader 11 | from projects.mmdet3d_plugin.datasets import custom_build_dataset 12 | # from mmdet3d.datasets import build_dataloader, build_dataset 13 | from mmdet3d.models import build_detector 14 | #from tools.misc.fuse_conv_bn import fuse_module 15 | 16 | 17 | def parse_args(): 18 | parser = argparse.ArgumentParser(description='MMDet benchmark a model') 19 | parser.add_argument('config', help='test config file path') 20 | parser.add_argument('--checkpoint', default=None, help='checkpoint file') 21 | parser.add_argument('--samples', default=2000, help='samples to benchmark') 22 | parser.add_argument( 23 | '--log-interval', default=50, help='interval of logging') 24 | parser.add_argument( 25 | '--fuse-conv-bn', 26 | action='store_true', 27 | help='Whether to fuse conv and bn, this will slightly increase' 28 | 'the inference speed') 29 | args = parser.parse_args() 30 | return args 31 | 32 | 33 | def main(): 34 | args = parse_args() 35 | 36 | cfg = Config.fromfile(args.config) 37 | # set cudnn_benchmark 38 | if cfg.get('cudnn_benchmark', False): 39 | torch.backends.cudnn.benchmark = True 40 | cfg.model.pretrained = None 41 | cfg.data.test.test_mode = True 42 | 43 | # build the dataloader 44 | # TODO: support multiple images per gpu (only minor changes are needed) 45 | print(cfg.data.test) 46 | dataset = custom_build_dataset(cfg.data.test) 47 | data_loader = build_dataloader( 48 | dataset, 49 | samples_per_gpu=1, 50 | workers_per_gpu=cfg.data.workers_per_gpu, 51 | dist=False, 52 | shuffle=False) 53 | 54 | # build the model and load checkpoint 55 | cfg.model.train_cfg = None 56 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) 57 | fp16_cfg = cfg.get('fp16', None) 58 | if fp16_cfg is not None: 59 | wrap_fp16_model(model) 60 | if args.checkpoint is not None: 61 | load_checkpoint(model, args.checkpoint, map_location='cpu') 62 | #if args.fuse_conv_bn: 63 | # model = fuse_module(model) 64 | 65 | model = MMDataParallel(model, device_ids=[0]) 66 | 67 | model.eval() 68 | 69 | # the first several iterations may be very slow so skip them 70 | num_warmup = 5 71 | pure_inf_time = 0 72 | 73 | # benchmark with several samples and take the average 74 | for i, data in enumerate(data_loader): 75 | torch.cuda.synchronize() 76 | start_time = time.perf_counter() 77 | with torch.no_grad(): 78 | model(return_loss=False, rescale=True, **data) 79 | 80 | torch.cuda.synchronize() 81 | elapsed = time.perf_counter() - start_time 82 | 83 | if i >= num_warmup: 84 | pure_inf_time += elapsed 85 | if (i + 1) % args.log_interval == 0: 86 | fps = (i + 1 - num_warmup) / pure_inf_time 87 | print(f'Done image [{i + 1:<3}/ {args.samples}], ' 88 | f'fps: {fps:.1f} img / s') 89 | 90 | if (i + 1) == args.samples: 91 | pure_inf_time += elapsed 92 | fps = (i + 1 - num_warmup) / pure_inf_time 93 | print(f'Overall fps: {fps:.1f} img / s') 94 | break 95 | 96 | 97 | if __name__ == '__main__': 98 | main() 99 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_nus.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.25, 0.25, 8] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=64, 11 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 12 | voxel_size=voxel_size, 13 | max_voxels=(30000, 40000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=4, 17 | feat_channels=[64, 64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[2, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='FPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | act_cfg=dict(type='ReLU'), 37 | in_channels=[64, 128, 256], 38 | out_channels=256, 39 | start_level=0, 40 | num_outs=3), 41 | pts_bbox_head=dict( 42 | type='Anchor3DHead', 43 | num_classes=10, 44 | in_channels=256, 45 | feat_channels=256, 46 | use_direction_classifier=True, 47 | anchor_generator=dict( 48 | type='AlignedAnchor3DRangeGenerator', 49 | ranges=[[-50, -50, -1.8, 50, 50, -1.8]], 50 | scales=[1, 2, 4], 51 | sizes=[ 52 | [0.8660, 2.5981, 1.], # 1.5/sqrt(3) 53 | [0.5774, 1.7321, 1.], # 1/sqrt(3) 54 | [1., 1., 1.], 55 | [0.4, 0.4, 1], 56 | ], 57 | custom_values=[0, 0], 58 | rotations=[0, 1.57], 59 | reshape_out=True), 60 | assigner_per_size=False, 61 | diff_rad_by_sin=True, 62 | dir_offset=0.7854, # pi/4 63 | dir_limit_offset=0, 64 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9), 65 | loss_cls=dict( 66 | type='FocalLoss', 67 | use_sigmoid=True, 68 | gamma=2.0, 69 | alpha=0.25, 70 | loss_weight=1.0), 71 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 72 | loss_dir=dict( 73 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 74 | # model training and testing settings 75 | train_cfg=dict( 76 | pts=dict( 77 | assigner=dict( 78 | type='MaxIoUAssigner', 79 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 80 | pos_iou_thr=0.6, 81 | neg_iou_thr=0.3, 82 | min_pos_iou=0.3, 83 | ignore_iof_thr=-1), 84 | allowed_border=0, 85 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], 86 | pos_weight=-1, 87 | debug=False)), 88 | test_cfg=dict( 89 | pts=dict( 90 | use_rotate_nms=True, 91 | nms_across_levels=False, 92 | nms_pre=1000, 93 | nms_thr=0.2, 94 | score_thr=0.05, 95 | min_bbox_size=0, 96 | max_num=500))) 97 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/nus-mono3d.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CustomNuScenesMonoDataset' 2 | data_root = 'data/nuscenes/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | # Input modality for nuScenes dataset, this is consistent with the submission 8 | # format which requires the information in input_modality. 9 | input_modality = dict( 10 | use_lidar=False, 11 | use_camera=True, 12 | use_radar=False, 13 | use_map=False, 14 | use_external=False) 15 | img_norm_cfg = dict( 16 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 17 | train_pipeline = [ 18 | dict(type='LoadImageFromFileMono3D'), 19 | dict( 20 | type='LoadAnnotations3D', 21 | with_bbox=True, 22 | with_label=True, 23 | with_attr_label=True, 24 | with_bbox_3d=True, 25 | with_label_3d=True, 26 | with_bbox_depth=True), 27 | dict(type='Resize', img_scale=(1600, 900), keep_ratio=True), 28 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='DefaultFormatBundle3D', class_names=class_names), 32 | dict( 33 | type='Collect3D', 34 | keys=[ 35 | 'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d', 36 | 'gt_labels_3d', 'centers2d', 'depths' 37 | ]), 38 | ] 39 | test_pipeline = [ 40 | dict(type='LoadImageFromFileMono3D'), 41 | dict( 42 | type='MultiScaleFlipAug', 43 | scale_factor=1.0, 44 | flip=False, 45 | transforms=[ 46 | dict(type='RandomFlip3D'), 47 | dict(type='Normalize', **img_norm_cfg), 48 | dict(type='Pad', size_divisor=32), 49 | dict( 50 | type='DefaultFormatBundle3D', 51 | class_names=class_names, 52 | with_label=False), 53 | dict(type='Collect3D', keys=['img']), 54 | ]) 55 | ] 56 | # construct a pipeline for data and gt loading in show function 57 | # please keep its loading function consistent with test_pipeline (e.g. client) 58 | eval_pipeline = [ 59 | dict(type='LoadImageFromFileMono3D'), 60 | dict( 61 | type='DefaultFormatBundle3D', 62 | class_names=class_names, 63 | with_label=False), 64 | dict(type='Collect3D', keys=['img']) 65 | ] 66 | 67 | data = dict( 68 | samples_per_gpu=2, 69 | workers_per_gpu=2, 70 | train=dict( 71 | type=dataset_type, 72 | data_root=data_root, 73 | ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json', 74 | img_prefix=data_root, 75 | classes=class_names, 76 | pipeline=train_pipeline, 77 | modality=input_modality, 78 | test_mode=False, 79 | box_type_3d='Camera'), 80 | val=dict( 81 | type=dataset_type, 82 | data_root=data_root, 83 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 84 | img_prefix=data_root, 85 | classes=class_names, 86 | pipeline=test_pipeline, 87 | modality=input_modality, 88 | test_mode=True, 89 | box_type_3d='Camera'), 90 | test=dict( 91 | type=dataset_type, 92 | data_root=data_root, 93 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 94 | img_prefix=data_root, 95 | classes=class_names, 96 | pipeline=test_pipeline, 97 | modality=input_modality, 98 | test_mode=True, 99 | box_type_3d='Camera')) 100 | evaluation = dict(interval=2) 101 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/sunrgbd-3d-10class.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'SUNRGBDDataset' 2 | data_root = 'data/sunrgbd/' 3 | class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 4 | 'night_stand', 'bookshelf', 'bathtub') 5 | train_pipeline = [ 6 | dict( 7 | type='LoadPointsFromFile', 8 | coord_type='DEPTH', 9 | shift_height=True, 10 | load_dim=6, 11 | use_dim=[0, 1, 2]), 12 | dict(type='LoadAnnotations3D'), 13 | dict( 14 | type='RandomFlip3D', 15 | sync_2d=False, 16 | flip_ratio_bev_horizontal=0.5, 17 | ), 18 | dict( 19 | type='GlobalRotScaleTrans', 20 | rot_range=[-0.523599, 0.523599], 21 | scale_ratio_range=[0.85, 1.15], 22 | shift_height=True), 23 | dict(type='PointSample', num_points=20000), 24 | dict(type='DefaultFormatBundle3D', class_names=class_names), 25 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 26 | ] 27 | test_pipeline = [ 28 | dict( 29 | type='LoadPointsFromFile', 30 | coord_type='DEPTH', 31 | shift_height=True, 32 | load_dim=6, 33 | use_dim=[0, 1, 2]), 34 | dict( 35 | type='MultiScaleFlipAug3D', 36 | img_scale=(1333, 800), 37 | pts_scale_ratio=1, 38 | flip=False, 39 | transforms=[ 40 | dict( 41 | type='GlobalRotScaleTrans', 42 | rot_range=[0, 0], 43 | scale_ratio_range=[1., 1.], 44 | translation_std=[0, 0, 0]), 45 | dict( 46 | type='RandomFlip3D', 47 | sync_2d=False, 48 | flip_ratio_bev_horizontal=0.5, 49 | ), 50 | dict(type='PointSample', num_points=20000), 51 | dict( 52 | type='DefaultFormatBundle3D', 53 | class_names=class_names, 54 | with_label=False), 55 | dict(type='Collect3D', keys=['points']) 56 | ]) 57 | ] 58 | # construct a pipeline for data and gt loading in show function 59 | # please keep its loading function consistent with test_pipeline (e.g. client) 60 | eval_pipeline = [ 61 | dict( 62 | type='LoadPointsFromFile', 63 | coord_type='DEPTH', 64 | shift_height=False, 65 | load_dim=6, 66 | use_dim=[0, 1, 2]), 67 | dict( 68 | type='DefaultFormatBundle3D', 69 | class_names=class_names, 70 | with_label=False), 71 | dict(type='Collect3D', keys=['points']) 72 | ] 73 | 74 | data = dict( 75 | samples_per_gpu=16, 76 | workers_per_gpu=4, 77 | train=dict( 78 | type='RepeatDataset', 79 | times=5, 80 | dataset=dict( 81 | type=dataset_type, 82 | data_root=data_root, 83 | ann_file=data_root + 'sunrgbd_infos_train.pkl', 84 | pipeline=train_pipeline, 85 | classes=class_names, 86 | filter_empty_gt=False, 87 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 88 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 89 | box_type_3d='Depth')), 90 | val=dict( 91 | type=dataset_type, 92 | data_root=data_root, 93 | ann_file=data_root + 'sunrgbd_infos_val.pkl', 94 | pipeline=test_pipeline, 95 | classes=class_names, 96 | test_mode=True, 97 | box_type_3d='Depth'), 98 | test=dict( 99 | type=dataset_type, 100 | data_root=data_root, 101 | ann_file=data_root + 'sunrgbd_infos_val.pkl', 102 | pipeline=test_pipeline, 103 | classes=class_names, 104 | test_mode=True, 105 | box_type_3d='Depth')) 106 | 107 | evaluation = dict(pipeline=eval_pipeline) 108 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/evaluation/eval_hooks.py: -------------------------------------------------------------------------------- 1 | 2 | # Note: Considering that MMCV's EvalHook updated its interface in V1.3.16, 3 | # in order to avoid strong version dependency, we did not directly 4 | # inherit EvalHook but BaseDistEvalHook. 5 | 6 | import bisect 7 | import os.path as osp 8 | 9 | import mmcv 10 | import torch.distributed as dist 11 | from mmcv.runner import DistEvalHook as BaseDistEvalHook 12 | from mmcv.runner import EvalHook as BaseEvalHook 13 | from torch.nn.modules.batchnorm import _BatchNorm 14 | from mmdet.core.evaluation.eval_hooks import DistEvalHook 15 | 16 | 17 | def _calc_dynamic_intervals(start_interval, dynamic_interval_list): 18 | assert mmcv.is_list_of(dynamic_interval_list, tuple) 19 | 20 | dynamic_milestones = [0] 21 | dynamic_milestones.extend( 22 | [dynamic_interval[0] for dynamic_interval in dynamic_interval_list]) 23 | dynamic_intervals = [start_interval] 24 | dynamic_intervals.extend( 25 | [dynamic_interval[1] for dynamic_interval in dynamic_interval_list]) 26 | return dynamic_milestones, dynamic_intervals 27 | 28 | 29 | class CustomDistEvalHook(BaseDistEvalHook): 30 | 31 | def __init__(self, *args, dynamic_intervals=None, **kwargs): 32 | super(CustomDistEvalHook, self).__init__(*args, **kwargs) 33 | self.use_dynamic_intervals = dynamic_intervals is not None 34 | if self.use_dynamic_intervals: 35 | self.dynamic_milestones, self.dynamic_intervals = \ 36 | _calc_dynamic_intervals(self.interval, dynamic_intervals) 37 | 38 | def _decide_interval(self, runner): 39 | if self.use_dynamic_intervals: 40 | progress = runner.epoch if self.by_epoch else runner.iter 41 | step = bisect.bisect(self.dynamic_milestones, (progress + 1)) 42 | # Dynamically modify the evaluation interval 43 | self.interval = self.dynamic_intervals[step - 1] 44 | 45 | def before_train_epoch(self, runner): 46 | """Evaluate the model only at the start of training by epoch.""" 47 | self._decide_interval(runner) 48 | super().before_train_epoch(runner) 49 | 50 | def before_train_iter(self, runner): 51 | self._decide_interval(runner) 52 | super().before_train_iter(runner) 53 | 54 | def _do_evaluate(self, runner): 55 | """perform evaluation and save ckpt.""" 56 | # Synchronization of BatchNorm's buffer (running_mean 57 | # and running_var) is not supported in the DDP of pytorch, 58 | # which may cause the inconsistent performance of models in 59 | # different ranks, so we broadcast BatchNorm's buffers 60 | # of rank 0 to other ranks to avoid this. 61 | if self.broadcast_bn_buffer: 62 | model = runner.model 63 | for name, module in model.named_modules(): 64 | if isinstance(module, 65 | _BatchNorm) and module.track_running_stats: 66 | dist.broadcast(module.running_var, 0) 67 | dist.broadcast(module.running_mean, 0) 68 | 69 | if not self._should_evaluate(runner): 70 | return 71 | 72 | tmpdir = self.tmpdir 73 | if tmpdir is None: 74 | tmpdir = osp.join(runner.work_dir, '.eval_hook') 75 | 76 | from projects.mmdet3d_plugin.bevformer.apis.test import custom_multi_gpu_test # to solve circlur import 77 | 78 | results = custom_multi_gpu_test( 79 | runner.model, 80 | self.dataloader, 81 | tmpdir=tmpdir, 82 | gpu_collect=self.gpu_collect) 83 | if runner.rank == 0: 84 | print('\n') 85 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) 86 | 87 | key_score = self.evaluate(runner, results) 88 | 89 | if self.save_best: 90 | self._save_ckpt(runner, key_score) 91 | 92 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_second_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.08, 0.08, 0.1] 7 | model = dict( 8 | type='VoxelNet', 9 | voxel_layer=dict( 10 | max_num_points=10, 11 | point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(80000, 90000)), 14 | voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 15 | middle_encoder=dict( 16 | type='SparseEncoder', 17 | in_channels=5, 18 | sparse_shape=[61, 1280, 1920], 19 | order=('conv', 'norm', 'act')), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=384, 23 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 24 | layer_nums=[5, 5], 25 | layer_strides=[1, 2], 26 | out_channels=[128, 256]), 27 | neck=dict( 28 | type='SECONDFPN', 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | in_channels=[128, 256], 31 | upsample_strides=[1, 2], 32 | out_channels=[256, 256]), 33 | bbox_head=dict( 34 | type='Anchor3DHead', 35 | num_classes=3, 36 | in_channels=512, 37 | feat_channels=512, 38 | use_direction_classifier=True, 39 | anchor_generator=dict( 40 | type='AlignedAnchor3DRangeGenerator', 41 | ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345], 42 | [-76.8, -51.2, 0, 76.8, 51.2, 0], 43 | [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]], 44 | sizes=[ 45 | [2.08, 4.73, 1.77], # car 46 | [0.84, 0.91, 1.74], # pedestrian 47 | [0.84, 1.81, 1.77] # cyclist 48 | ], 49 | rotations=[0, 1.57], 50 | reshape_out=False), 51 | diff_rad_by_sin=True, 52 | dir_offset=0.7854, # pi/4 53 | dir_limit_offset=0, 54 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 55 | loss_cls=dict( 56 | type='FocalLoss', 57 | use_sigmoid=True, 58 | gamma=2.0, 59 | alpha=0.25, 60 | loss_weight=1.0), 61 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 62 | loss_dir=dict( 63 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 64 | # model training and testing settings 65 | train_cfg=dict( 66 | assigner=[ 67 | dict( # car 68 | type='MaxIoUAssigner', 69 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 70 | pos_iou_thr=0.55, 71 | neg_iou_thr=0.4, 72 | min_pos_iou=0.4, 73 | ignore_iof_thr=-1), 74 | dict( # pedestrian 75 | type='MaxIoUAssigner', 76 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 77 | pos_iou_thr=0.5, 78 | neg_iou_thr=0.3, 79 | min_pos_iou=0.3, 80 | ignore_iof_thr=-1), 81 | dict( # cyclist 82 | type='MaxIoUAssigner', 83 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.3, 86 | min_pos_iou=0.3, 87 | ignore_iof_thr=-1) 88 | ], 89 | allowed_border=0, 90 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 91 | pos_weight=-1, 92 | debug=False), 93 | test_cfg=dict( 94 | use_rotate_nms=True, 95 | nms_across_levels=False, 96 | nms_pre=4096, 97 | nms_thr=0.25, 98 | score_thr=0.1, 99 | min_bbox_size=0, 100 | max_num=500)) 101 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/imvotenet_image.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='ImVoteNet', 3 | img_backbone=dict( 4 | type='ResNet', 5 | depth=50, 6 | num_stages=4, 7 | out_indices=(0, 1, 2, 3), 8 | frozen_stages=1, 9 | norm_cfg=dict(type='BN', requires_grad=False), 10 | norm_eval=True, 11 | style='caffe'), 12 | img_neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | img_rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[8], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[4, 8, 16, 32, 64]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | img_roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=256, 39 | featmap_strides=[4, 8, 16, 32]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=256, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=10, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 54 | 55 | # model training and testing settings 56 | train_cfg=dict( 57 | img_rpn=dict( 58 | assigner=dict( 59 | type='MaxIoUAssigner', 60 | pos_iou_thr=0.7, 61 | neg_iou_thr=0.3, 62 | min_pos_iou=0.3, 63 | match_low_quality=True, 64 | ignore_iof_thr=-1), 65 | sampler=dict( 66 | type='RandomSampler', 67 | num=256, 68 | pos_fraction=0.5, 69 | neg_pos_ub=-1, 70 | add_gt_as_proposals=False), 71 | allowed_border=-1, 72 | pos_weight=-1, 73 | debug=False), 74 | img_rpn_proposal=dict( 75 | nms_across_levels=False, 76 | nms_pre=2000, 77 | nms_post=1000, 78 | max_per_img=1000, 79 | nms=dict(type='nms', iou_threshold=0.7), 80 | min_bbox_size=0), 81 | img_rcnn=dict( 82 | assigner=dict( 83 | type='MaxIoUAssigner', 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.5, 86 | min_pos_iou=0.5, 87 | match_low_quality=False, 88 | ignore_iof_thr=-1), 89 | sampler=dict( 90 | type='RandomSampler', 91 | num=512, 92 | pos_fraction=0.25, 93 | neg_pos_ub=-1, 94 | add_gt_as_proposals=True), 95 | pos_weight=-1, 96 | debug=False)), 97 | test_cfg=dict( 98 | img_rpn=dict( 99 | nms_across_levels=False, 100 | nms_pre=1000, 101 | nms_post=1000, 102 | max_per_img=1000, 103 | nms=dict(type='nms', iou_threshold=0.7), 104 | min_bbox_size=0), 105 | img_rcnn=dict( 106 | score_thr=0.05, 107 | nms=dict(type='nms', iou_threshold=0.5), 108 | max_per_img=100))) 109 | -------------------------------------------------------------------------------- /hf_guide.md: -------------------------------------------------------------------------------- 1 | # mmlab 代码萤火适配指引 2 | 3 | 下面以 BEVFormer 为例,介绍 mmlab 代码适配到萤火的关键步骤。 4 | 5 | ## 适配 1: 启动方式 6 | 7 | 萤火集群要求启动分布式时 `bind_numa`,因此推荐使用 `torch.multiprocessing.spawn` 启动,不推荐使用 `torch.distributed.launch`。 8 | 9 | 修改前 10 | 11 | ``` 12 | def main(): 13 | args = parse_args() 14 | ... 15 | 16 | if __name__ == '__main__': 17 | main() 18 | ``` 19 | 20 | 修改后 21 | 22 | ``` 23 | import hfai 24 | 25 | def main(local_rank, args): 26 | ... 27 | 28 | if __name__ == '__main__': 29 | args = parse_args() 30 | ngpus = torch.cuda.device_count() 31 | hfai.multiprocessing.spawn(main, args=(args,), nprocs=ngpus, bind_numa=True) 32 | ``` 33 | 34 | ## 适配 2: 初始化分布式参数 35 | 启动萤火任务时,我们通过 --num-nodes 指定节点的数量,每个节点有 8 台 GPU。 36 | 37 | 萤火集群中的环境变量含义如下: 38 | - `world_size` 代表节点的数量 39 | - `rank` 代表当前节点的 id 40 | 41 | 因此需要对初始化分布式部分进行修改。 42 | 43 | 修改前 44 | 45 | ``` 46 | def main(local_rank, args): 47 | ... 48 | if args.launcher == 'none': 49 | distributed = False 50 | else: 51 | distributed = True 52 | init_dist(args.launcher, **cfg.dist_params) 53 | # re-set gpu_ids with distributed training mode 54 | _, world_size = get_dist_info() 55 | cfg.gpu_ids = range(world_size) 56 | ``` 57 | 58 | 修改后 59 | 60 | ``` 61 | def main(local_rank, args): 62 | ... 63 | if args.launcher == 'none': 64 | distributed = False 65 | rank = 0 66 | else: 67 | distributed = True 68 | # init distributed env first, since logger depends on the dist info. 69 | ip = os.environ.get("MASTER_ADDR", "127.0.0.1") 70 | port = os.environ.get("MASTER_PORT", "2223") 71 | hosts = int(os.environ.get("WORLD_SIZE", 1)) # number of nodes 72 | rank = int(os.environ.get("RANK", 0)) # node id 73 | gpus = torch.cuda.device_count() # gpus per node 74 | dist.init_process_group( 75 | backend="nccl", init_method=f"tcp://{ip}:{port}", world_size=hosts * gpus, rank=rank * gpus + local_rank 76 | ) 77 | torch.cuda.set_device(local_rank) 78 | # re-set gpu_ids with distributed training mode 79 | _, world_size = get_dist_info() 80 | cfg.gpu_ids = range(world_size) 81 | ``` 82 | 83 | ## 适配 3: 打断继续训练 84 | 85 | 萤火集群中的任务都会参与分时调度,因此任务需要支持端点续跑。`mmcv.runner` 会自动在每个 epoch 结束保存 checkpoint,因此需要完成下面两步,保证打断后可以继续训练。 86 | 87 | 1. 训练时指定保存目录 --work-dir 88 | 2. 启动时增加 --auto-resume 参数 89 | 90 | 91 | ## 适配 4: 保存 config 92 | 93 | 为了保证集群多个进程的写文件操作只执行一次,需要在写文件前检查 `local_rank` 变量和 `rank` 变量。 94 | 95 | 修改前 96 | 97 | ``` 98 | def main(local_rank, args): 99 | ... 100 | # dump config 101 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) 102 | ``` 103 | 104 | 修改后 105 | 106 | ``` 107 | def main(local_rank, args): 108 | ... 109 | # dump config 110 | if local_rank == 0 and rank == 0: 111 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) 112 | 113 | ``` 114 | 115 | ## 适配 5:转换数据为 FFRecord 116 | 117 | 转换步骤请参考:[ffrecord_converter](https://github.com/HFAiLab/ffrecord_converters) 118 | 119 | ## 适配 6:使用 hfai 算子 120 | 121 | 幻方 AI 对一些常用的 AI 算子进行了重新研发,提升了模型整体训练效率,通过增加如下代码,可以自动替换相应算子。 122 | 123 | ``` 124 | from hfai.nn import to_hfai 125 | def main(local_rank, args): 126 | ... 127 | model = to_hfai(model, contiguous_param=False, verbose=True, inplace=True) 128 | ``` 129 | 注意:在 `batch_size` 较大时,提速明显。 130 | 131 | 132 | 133 | ## 常见问题整理 134 | 下面整理一些常见的问题,供用户参考。 135 | 136 | ### 问题 1:cannot pickle 'dict_values' object. 137 | 138 | 该问题是因为 nuscenes-devkit 中使用了 `dict_values` 的数据类型,导致 `dataloader` 设置 `num_workers` 大于 `0` 时,多进程无法 `pickle dump` 数据集。 139 | 140 | 修改步骤如下: 141 | 142 | 1 - 找到 nuscenes-devkit 安装目录 `$nuscenes_devkit_path`: 143 | ``` 144 | python -c "import nuscenes; print(nuscenes.__file__)" 145 | ``` 146 | 147 | 2 - 将 `$nuscenes_devkit_path/eval/detection/data_classes.py` 路径下的 `dict_values` 数据类型进行修改。 148 | 149 | 修改前 150 | ``` 151 | self.class_names = self.class_range.keys() 152 | ``` 153 | 修改后 154 | ``` 155 | self.class_names = list(self.class_range.keys()) 156 | ``` 157 | 158 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/s3dis-3d-5class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'S3DISDataset' 3 | data_root = './data/s3dis/' 4 | class_names = ('table', 'chair', 'sofa', 'bookcase', 'board') 5 | train_area = [1, 2, 3, 4, 6] 6 | test_area = 5 7 | 8 | train_pipeline = [ 9 | dict( 10 | type='LoadPointsFromFile', 11 | coord_type='DEPTH', 12 | shift_height=True, 13 | load_dim=6, 14 | use_dim=[0, 1, 2, 3, 4, 5]), 15 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 16 | dict(type='PointSample', num_points=40000), 17 | dict( 18 | type='RandomFlip3D', 19 | sync_2d=False, 20 | flip_ratio_bev_horizontal=0.5, 21 | flip_ratio_bev_vertical=0.5), 22 | dict( 23 | type='GlobalRotScaleTrans', 24 | # following ScanNet dataset the rotation range is 5 degrees 25 | rot_range=[-0.087266, 0.087266], 26 | scale_ratio_range=[1.0, 1.0], 27 | shift_height=True), 28 | dict(type='DefaultFormatBundle3D', class_names=class_names), 29 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 30 | ] 31 | test_pipeline = [ 32 | dict( 33 | type='LoadPointsFromFile', 34 | coord_type='DEPTH', 35 | shift_height=True, 36 | load_dim=6, 37 | use_dim=[0, 1, 2, 3, 4, 5]), 38 | dict( 39 | type='MultiScaleFlipAug3D', 40 | img_scale=(1333, 800), 41 | pts_scale_ratio=1, 42 | flip=False, 43 | transforms=[ 44 | dict( 45 | type='GlobalRotScaleTrans', 46 | rot_range=[0, 0], 47 | scale_ratio_range=[1., 1.], 48 | translation_std=[0, 0, 0]), 49 | dict( 50 | type='RandomFlip3D', 51 | sync_2d=False, 52 | flip_ratio_bev_horizontal=0.5, 53 | flip_ratio_bev_vertical=0.5), 54 | dict(type='PointSample', num_points=40000), 55 | dict( 56 | type='DefaultFormatBundle3D', 57 | class_names=class_names, 58 | with_label=False), 59 | dict(type='Collect3D', keys=['points']) 60 | ]) 61 | ] 62 | # construct a pipeline for data and gt loading in show function 63 | # please keep its loading function consistent with test_pipeline (e.g. client) 64 | eval_pipeline = [ 65 | dict( 66 | type='LoadPointsFromFile', 67 | coord_type='DEPTH', 68 | shift_height=False, 69 | load_dim=6, 70 | use_dim=[0, 1, 2, 3, 4, 5]), 71 | dict( 72 | type='DefaultFormatBundle3D', 73 | class_names=class_names, 74 | with_label=False), 75 | dict(type='Collect3D', keys=['points']) 76 | ] 77 | 78 | data = dict( 79 | samples_per_gpu=8, 80 | workers_per_gpu=4, 81 | train=dict( 82 | type='RepeatDataset', 83 | times=5, 84 | dataset=dict( 85 | type='ConcatDataset', 86 | datasets=[ 87 | dict( 88 | type=dataset_type, 89 | data_root=data_root, 90 | ann_file=data_root + f's3dis_infos_Area_{i}.pkl', 91 | pipeline=train_pipeline, 92 | filter_empty_gt=False, 93 | classes=class_names, 94 | box_type_3d='Depth') for i in train_area 95 | ], 96 | separate_eval=False)), 97 | val=dict( 98 | type=dataset_type, 99 | data_root=data_root, 100 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 101 | pipeline=test_pipeline, 102 | classes=class_names, 103 | test_mode=True, 104 | box_type_3d='Depth'), 105 | test=dict( 106 | type=dataset_type, 107 | data_root=data_root, 108 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 109 | pipeline=test_pipeline, 110 | classes=class_names, 111 | test_mode=True, 112 | box_type_3d='Depth')) 113 | 114 | evaluation = dict(pipeline=eval_pipeline) 115 | -------------------------------------------------------------------------------- /projects/configs/datasets/custom_waymo-3d.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | # D5 in the config name means the whole dataset is divided into 5 folds 3 | # We only use one fold for efficient experiments 4 | dataset_type = 'CustomWaymoDataset' 5 | data_root = 'data/waymo/kitti_format/' 6 | file_client_args = dict(backend='disk') 7 | # Uncomment the following if use ceph or other file clients. 8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 9 | # for more details. 10 | # file_client_args = dict( 11 | # backend='petrel', path_mapping=dict(data='s3://waymo_data/')) 12 | 13 | img_norm_cfg = dict( 14 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 15 | class_names = ['Car', 'Pedestrian', 'Cyclist'] 16 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4] 17 | input_modality = dict(use_lidar=False, use_camera=True) 18 | db_sampler = dict( 19 | data_root=data_root, 20 | info_path=data_root + 'waymo_dbinfos_train.pkl', 21 | rate=1.0, 22 | prepare=dict( 23 | filter_by_difficulty=[-1], 24 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)), 25 | classes=class_names, 26 | sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10), 27 | points_loader=dict( 28 | type='LoadPointsFromFile', 29 | coord_type='LIDAR', 30 | load_dim=5, 31 | use_dim=[0, 1, 2, 3, 4], 32 | file_client_args=file_client_args)) 33 | 34 | 35 | 36 | train_pipeline = [ 37 | dict(type='LoadMultiViewImageFromFiles', to_float32=True), 38 | dict(type='PhotoMetricDistortionMultiViewImage'), 39 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_attr_label=False), 40 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 41 | dict(type='ObjectNameFilter', classes=class_names), 42 | dict(type='NormalizeMultiviewImage', **img_norm_cfg), 43 | dict(type='PadMultiViewImage', size_divisor=32), 44 | dict(type='DefaultFormatBundle3D', class_names=class_names), 45 | dict(type='CustomCollect3D', keys=['gt_bboxes_3d', 'gt_labels_3d', 'img']) 46 | ] 47 | 48 | 49 | test_pipeline = [ 50 | dict(type='LoadMultiViewImageFromFiles', to_float32=True), 51 | dict(type='NormalizeMultiviewImage', **img_norm_cfg), 52 | dict(type='PadMultiViewImage', size_divisor=32), 53 | dict( 54 | type='MultiScaleFlipAug3D', 55 | img_scale=(1920, 1280), 56 | pts_scale_ratio=1, 57 | flip=False, 58 | transforms=[ 59 | dict( 60 | type='DefaultFormatBundle3D', 61 | class_names=class_names, 62 | with_label=False), 63 | dict(type='CustomCollect3D', keys=['img']) 64 | ]) 65 | ] 66 | 67 | 68 | # construct a pipeline for data and gt loading in show function 69 | # please keep its loading function consistent with test_pipeline (e.g. client) 70 | 71 | data = dict( 72 | samples_per_gpu=2, 73 | workers_per_gpu=4, 74 | train=dict( 75 | type='RepeatDataset', 76 | times=2, 77 | dataset=dict( 78 | type=dataset_type, 79 | data_root=data_root, 80 | ann_file=data_root + 'waymo_infos_train.pkl', 81 | split='training', 82 | pipeline=train_pipeline, 83 | modality=input_modality, 84 | classes=class_names, 85 | test_mode=False, 86 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 87 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 88 | box_type_3d='LiDAR', 89 | # load one frame every five frames 90 | load_interval=5)), 91 | val=dict( 92 | type=dataset_type, 93 | data_root=data_root, 94 | ann_file=data_root + 'waymo_infos_val.pkl', 95 | split='training', 96 | pipeline=test_pipeline, 97 | modality=input_modality, 98 | classes=class_names, 99 | test_mode=True, 100 | box_type_3d='LiDAR'), 101 | test=dict( 102 | type=dataset_type, 103 | data_root=data_root, 104 | ann_file=data_root + 'waymo_infos_val.pkl', 105 | split='training', 106 | pipeline=test_pipeline, 107 | modality=input_modality, 108 | classes=class_names, 109 | test_mode=True, 110 | box_type_3d='LiDAR')) 111 | 112 | evaluation = dict(interval=24, pipeline=test_pipeline) -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/group_sampler.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | import math 4 | 5 | import numpy as np 6 | import torch 7 | from mmcv.runner import get_dist_info 8 | from torch.utils.data import Sampler 9 | from .sampler import SAMPLER 10 | import random 11 | from IPython import embed 12 | 13 | 14 | @SAMPLER.register_module() 15 | class DistributedGroupSampler(Sampler): 16 | """Sampler that restricts data loading to a subset of the dataset. 17 | It is especially useful in conjunction with 18 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 19 | process can pass a DistributedSampler instance as a DataLoader sampler, 20 | and load a subset of the original dataset that is exclusive to it. 21 | .. note:: 22 | Dataset is assumed to be of constant size. 23 | Arguments: 24 | dataset: Dataset used for sampling. 25 | num_replicas (optional): Number of processes participating in 26 | distributed training. 27 | rank (optional): Rank of the current process within num_replicas. 28 | seed (int, optional): random seed used to shuffle the sampler if 29 | ``shuffle=True``. This number should be identical across all 30 | processes in the distributed group. Default: 0. 31 | """ 32 | 33 | def __init__(self, 34 | dataset, 35 | samples_per_gpu=1, 36 | num_replicas=None, 37 | rank=None, 38 | seed=0): 39 | _rank, _num_replicas = get_dist_info() 40 | if num_replicas is None: 41 | num_replicas = _num_replicas 42 | if rank is None: 43 | rank = _rank 44 | self.dataset = dataset 45 | self.samples_per_gpu = samples_per_gpu 46 | self.num_replicas = num_replicas 47 | self.rank = rank 48 | self.epoch = 0 49 | self.seed = seed if seed is not None else 0 50 | 51 | assert hasattr(self.dataset, 'flag') 52 | self.flag = self.dataset.flag 53 | self.group_sizes = np.bincount(self.flag) 54 | 55 | self.num_samples = 0 56 | for i, j in enumerate(self.group_sizes): 57 | self.num_samples += int( 58 | math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu / 59 | self.num_replicas)) * self.samples_per_gpu 60 | self.total_size = self.num_samples * self.num_replicas 61 | 62 | def __iter__(self): 63 | # deterministically shuffle based on epoch 64 | g = torch.Generator() 65 | g.manual_seed(self.epoch + self.seed) 66 | 67 | indices = [] 68 | for i, size in enumerate(self.group_sizes): 69 | if size > 0: 70 | indice = np.where(self.flag == i)[0] 71 | assert len(indice) == size 72 | # add .numpy() to avoid bug when selecting indice in parrots. 73 | # TODO: check whether torch.randperm() can be replaced by 74 | # numpy.random.permutation(). 75 | indice = indice[list( 76 | torch.randperm(int(size), generator=g).numpy())].tolist() 77 | extra = int( 78 | math.ceil( 79 | size * 1.0 / self.samples_per_gpu / self.num_replicas) 80 | ) * self.samples_per_gpu * self.num_replicas - len(indice) 81 | # pad indice 82 | tmp = indice.copy() 83 | for _ in range(extra // size): 84 | indice.extend(tmp) 85 | indice.extend(tmp[:extra % size]) 86 | indices.extend(indice) 87 | 88 | assert len(indices) == self.total_size 89 | 90 | indices = [ 91 | indices[j] for i in list( 92 | torch.randperm( 93 | len(indices) // self.samples_per_gpu, generator=g)) 94 | for j in range(i * self.samples_per_gpu, (i + 1) * 95 | self.samples_per_gpu) 96 | ] 97 | 98 | # subsample 99 | offset = self.num_samples * self.rank 100 | indices = indices[offset:offset + self.num_samples] 101 | assert len(indices) == self.num_samples 102 | 103 | return iter(indices) 104 | 105 | def __len__(self): 106 | return self.num_samples 107 | 108 | def set_epoch(self, epoch): 109 | self.epoch = epoch 110 | 111 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.32, 0.32, 6] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(32000, 32000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=5, 17 | feat_channels=[64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[1, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='SECONDFPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | in_channels=[64, 128, 256], 37 | upsample_strides=[1, 2, 4], 38 | out_channels=[128, 128, 128]), 39 | pts_bbox_head=dict( 40 | type='Anchor3DHead', 41 | num_classes=3, 42 | in_channels=384, 43 | feat_channels=384, 44 | use_direction_classifier=True, 45 | anchor_generator=dict( 46 | type='AlignedAnchor3DRangeGenerator', 47 | ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345], 48 | [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188], 49 | [-74.88, -74.88, 0, 74.88, 74.88, 0]], 50 | sizes=[ 51 | [2.08, 4.73, 1.77], # car 52 | [0.84, 1.81, 1.77], # cyclist 53 | [0.84, 0.91, 1.74] # pedestrian 54 | ], 55 | rotations=[0, 1.57], 56 | reshape_out=False), 57 | diff_rad_by_sin=True, 58 | dir_offset=0.7854, # pi/4 59 | dir_limit_offset=0, 60 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 61 | loss_cls=dict( 62 | type='FocalLoss', 63 | use_sigmoid=True, 64 | gamma=2.0, 65 | alpha=0.25, 66 | loss_weight=1.0), 67 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 68 | loss_dir=dict( 69 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 70 | # model training and testing settings 71 | train_cfg=dict( 72 | pts=dict( 73 | assigner=[ 74 | dict( # car 75 | type='MaxIoUAssigner', 76 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 77 | pos_iou_thr=0.55, 78 | neg_iou_thr=0.4, 79 | min_pos_iou=0.4, 80 | ignore_iof_thr=-1), 81 | dict( # cyclist 82 | type='MaxIoUAssigner', 83 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.3, 86 | min_pos_iou=0.3, 87 | ignore_iof_thr=-1), 88 | dict( # pedestrian 89 | type='MaxIoUAssigner', 90 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 91 | pos_iou_thr=0.5, 92 | neg_iou_thr=0.3, 93 | min_pos_iou=0.3, 94 | ignore_iof_thr=-1), 95 | ], 96 | allowed_border=0, 97 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 98 | pos_weight=-1, 99 | debug=False)), 100 | test_cfg=dict( 101 | pts=dict( 102 | use_rotate_nms=True, 103 | nms_across_levels=False, 104 | nms_pre=4096, 105 | nms_thr=0.25, 106 | score_thr=0.1, 107 | min_bbox_size=0, 108 | max_num=500))) 109 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/grid_mask.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from PIL import Image 5 | from mmcv.runner import force_fp32, auto_fp16 6 | 7 | class Grid(object): 8 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 9 | self.use_h = use_h 10 | self.use_w = use_w 11 | self.rotate = rotate 12 | self.offset = offset 13 | self.ratio = ratio 14 | self.mode=mode 15 | self.st_prob = prob 16 | self.prob = prob 17 | 18 | def set_prob(self, epoch, max_epoch): 19 | self.prob = self.st_prob * epoch / max_epoch 20 | 21 | def __call__(self, img, label): 22 | if np.random.rand() > self.prob: 23 | return img, label 24 | h = img.size(1) 25 | w = img.size(2) 26 | self.d1 = 2 27 | self.d2 = min(h, w) 28 | hh = int(1.5*h) 29 | ww = int(1.5*w) 30 | d = np.random.randint(self.d1, self.d2) 31 | if self.ratio == 1: 32 | self.l = np.random.randint(1, d) 33 | else: 34 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 35 | mask = np.ones((hh, ww), np.float32) 36 | st_h = np.random.randint(d) 37 | st_w = np.random.randint(d) 38 | if self.use_h: 39 | for i in range(hh//d): 40 | s = d*i + st_h 41 | t = min(s+self.l, hh) 42 | mask[s:t,:] *= 0 43 | if self.use_w: 44 | for i in range(ww//d): 45 | s = d*i + st_w 46 | t = min(s+self.l, ww) 47 | mask[:,s:t] *= 0 48 | 49 | r = np.random.randint(self.rotate) 50 | mask = Image.fromarray(np.uint8(mask)) 51 | mask = mask.rotate(r) 52 | mask = np.asarray(mask) 53 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 54 | 55 | mask = torch.from_numpy(mask).float() 56 | if self.mode == 1: 57 | mask = 1-mask 58 | 59 | mask = mask.expand_as(img) 60 | if self.offset: 61 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float() 62 | offset = (1 - mask) * offset 63 | img = img * mask + offset 64 | else: 65 | img = img * mask 66 | 67 | return img, label 68 | 69 | 70 | class GridMask(nn.Module): 71 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 72 | super(GridMask, self).__init__() 73 | self.use_h = use_h 74 | self.use_w = use_w 75 | self.rotate = rotate 76 | self.offset = offset 77 | self.ratio = ratio 78 | self.mode = mode 79 | self.st_prob = prob 80 | self.prob = prob 81 | self.fp16_enable = False 82 | def set_prob(self, epoch, max_epoch): 83 | self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5 84 | @auto_fp16() 85 | def forward(self, x): 86 | if np.random.rand() > self.prob or not self.training: 87 | return x 88 | n,c,h,w = x.size() 89 | x = x.view(-1,h,w) 90 | hh = int(1.5*h) 91 | ww = int(1.5*w) 92 | d = np.random.randint(2, h) 93 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 94 | mask = np.ones((hh, ww), np.float32) 95 | st_h = np.random.randint(d) 96 | st_w = np.random.randint(d) 97 | if self.use_h: 98 | for i in range(hh//d): 99 | s = d*i + st_h 100 | t = min(s+self.l, hh) 101 | mask[s:t,:] *= 0 102 | if self.use_w: 103 | for i in range(ww//d): 104 | s = d*i + st_w 105 | t = min(s+self.l, ww) 106 | mask[:,s:t] *= 0 107 | 108 | r = np.random.randint(self.rotate) 109 | mask = Image.fromarray(np.uint8(mask)) 110 | mask = mask.rotate(r) 111 | mask = np.asarray(mask) 112 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 113 | 114 | mask = torch.from_numpy(mask).to(x.dtype).cuda() 115 | if self.mode == 1: 116 | mask = 1-mask 117 | mask = mask.expand_as(x) 118 | if self.offset: 119 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).to(x.dtype).cuda() 120 | x = x * mask + offset * (1 - mask) 121 | else: 122 | x = x * mask 123 | 124 | return x.view(n,c,h,w) -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/scannet-3d-18class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ScanNetDataset' 3 | data_root = './data/scannet/' 4 | class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 5 | 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 6 | 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 7 | 'garbagebin') 8 | train_pipeline = [ 9 | dict( 10 | type='LoadPointsFromFile', 11 | coord_type='DEPTH', 12 | shift_height=True, 13 | load_dim=6, 14 | use_dim=[0, 1, 2]), 15 | dict( 16 | type='LoadAnnotations3D', 17 | with_bbox_3d=True, 18 | with_label_3d=True, 19 | with_mask_3d=True, 20 | with_seg_3d=True), 21 | dict(type='GlobalAlignment', rotation_axis=2), 22 | dict( 23 | type='PointSegClassMapping', 24 | valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 25 | 36, 39), 26 | max_cat_id=40), 27 | dict(type='PointSample', num_points=40000), 28 | dict( 29 | type='RandomFlip3D', 30 | sync_2d=False, 31 | flip_ratio_bev_horizontal=0.5, 32 | flip_ratio_bev_vertical=0.5), 33 | dict( 34 | type='GlobalRotScaleTrans', 35 | rot_range=[-0.087266, 0.087266], 36 | scale_ratio_range=[1.0, 1.0], 37 | shift_height=True), 38 | dict(type='DefaultFormatBundle3D', class_names=class_names), 39 | dict( 40 | type='Collect3D', 41 | keys=[ 42 | 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 43 | 'pts_instance_mask' 44 | ]) 45 | ] 46 | test_pipeline = [ 47 | dict( 48 | type='LoadPointsFromFile', 49 | coord_type='DEPTH', 50 | shift_height=True, 51 | load_dim=6, 52 | use_dim=[0, 1, 2]), 53 | dict(type='GlobalAlignment', rotation_axis=2), 54 | dict( 55 | type='MultiScaleFlipAug3D', 56 | img_scale=(1333, 800), 57 | pts_scale_ratio=1, 58 | flip=False, 59 | transforms=[ 60 | dict( 61 | type='GlobalRotScaleTrans', 62 | rot_range=[0, 0], 63 | scale_ratio_range=[1., 1.], 64 | translation_std=[0, 0, 0]), 65 | dict( 66 | type='RandomFlip3D', 67 | sync_2d=False, 68 | flip_ratio_bev_horizontal=0.5, 69 | flip_ratio_bev_vertical=0.5), 70 | dict(type='PointSample', num_points=40000), 71 | dict( 72 | type='DefaultFormatBundle3D', 73 | class_names=class_names, 74 | with_label=False), 75 | dict(type='Collect3D', keys=['points']) 76 | ]) 77 | ] 78 | # construct a pipeline for data and gt loading in show function 79 | # please keep its loading function consistent with test_pipeline (e.g. client) 80 | eval_pipeline = [ 81 | dict( 82 | type='LoadPointsFromFile', 83 | coord_type='DEPTH', 84 | shift_height=False, 85 | load_dim=6, 86 | use_dim=[0, 1, 2]), 87 | dict(type='GlobalAlignment', rotation_axis=2), 88 | dict( 89 | type='DefaultFormatBundle3D', 90 | class_names=class_names, 91 | with_label=False), 92 | dict(type='Collect3D', keys=['points']) 93 | ] 94 | 95 | data = dict( 96 | samples_per_gpu=8, 97 | workers_per_gpu=4, 98 | train=dict( 99 | type='RepeatDataset', 100 | times=5, 101 | dataset=dict( 102 | type=dataset_type, 103 | data_root=data_root, 104 | ann_file=data_root + 'scannet_infos_train.pkl', 105 | pipeline=train_pipeline, 106 | filter_empty_gt=False, 107 | classes=class_names, 108 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 109 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 110 | box_type_3d='Depth')), 111 | val=dict( 112 | type=dataset_type, 113 | data_root=data_root, 114 | ann_file=data_root + 'scannet_infos_val.pkl', 115 | pipeline=test_pipeline, 116 | classes=class_names, 117 | test_mode=True, 118 | box_type_3d='Depth'), 119 | test=dict( 120 | type=dataset_type, 121 | data_root=data_root, 122 | ann_file=data_root + 'scannet_infos_val.pkl', 123 | pipeline=test_pipeline, 124 | classes=class_names, 125 | test_mode=True, 126 | box_type_3d='Depth')) 127 | 128 | evaluation = dict(pipeline=eval_pipeline) 129 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 56 | mask_roi_extractor=dict( 57 | type='SingleRoIExtractor', 58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 59 | out_channels=256, 60 | featmap_strides=[4, 8, 16, 32]), 61 | mask_head=dict( 62 | type='FCNMaskHead', 63 | num_convs=4, 64 | in_channels=256, 65 | conv_out_channels=256, 66 | num_classes=80, 67 | loss_mask=dict( 68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | rpn=dict( 72 | assigner=dict( 73 | type='MaxIoUAssigner', 74 | pos_iou_thr=0.7, 75 | neg_iou_thr=0.3, 76 | min_pos_iou=0.3, 77 | match_low_quality=True, 78 | ignore_iof_thr=-1), 79 | sampler=dict( 80 | type='RandomSampler', 81 | num=256, 82 | pos_fraction=0.5, 83 | neg_pos_ub=-1, 84 | add_gt_as_proposals=False), 85 | allowed_border=-1, 86 | pos_weight=-1, 87 | debug=False), 88 | rpn_proposal=dict( 89 | nms_across_levels=False, 90 | nms_pre=2000, 91 | nms_post=1000, 92 | max_num=1000, 93 | nms_thr=0.7, 94 | min_bbox_size=0), 95 | rcnn=dict( 96 | assigner=dict( 97 | type='MaxIoUAssigner', 98 | pos_iou_thr=0.5, 99 | neg_iou_thr=0.5, 100 | min_pos_iou=0.5, 101 | match_low_quality=True, 102 | ignore_iof_thr=-1), 103 | sampler=dict( 104 | type='RandomSampler', 105 | num=512, 106 | pos_fraction=0.25, 107 | neg_pos_ub=-1, 108 | add_gt_as_proposals=True), 109 | mask_size=28, 110 | pos_weight=-1, 111 | debug=False)), 112 | test_cfg=dict( 113 | rpn=dict( 114 | nms_across_levels=False, 115 | nms_pre=1000, 116 | nms_post=1000, 117 | max_num=1000, 118 | nms_thr=0.7, 119 | min_bbox_size=0), 120 | rcnn=dict( 121 | score_thr=0.05, 122 | nms=dict(type='nms', iou_threshold=0.5), 123 | max_per_img=100, 124 | mask_thr_binary=0.5))) 125 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/scannet_seg-3d-20class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ScanNetSegDataset' 3 | data_root = './data/scannet/' 4 | class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 5 | 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 6 | 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 7 | 'bathtub', 'otherfurniture') 8 | num_points = 8192 9 | train_pipeline = [ 10 | dict( 11 | type='LoadPointsFromFile', 12 | coord_type='DEPTH', 13 | shift_height=False, 14 | use_color=True, 15 | load_dim=6, 16 | use_dim=[0, 1, 2, 3, 4, 5]), 17 | dict( 18 | type='LoadAnnotations3D', 19 | with_bbox_3d=False, 20 | with_label_3d=False, 21 | with_mask_3d=False, 22 | with_seg_3d=True), 23 | dict( 24 | type='PointSegClassMapping', 25 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 26 | 33, 34, 36, 39), 27 | max_cat_id=40), 28 | dict( 29 | type='IndoorPatchPointSample', 30 | num_points=num_points, 31 | block_size=1.5, 32 | ignore_index=len(class_names), 33 | use_normalized_coord=False, 34 | enlarge_size=0.2, 35 | min_unique_num=None), 36 | dict(type='NormalizePointsColor', color_mean=None), 37 | dict(type='DefaultFormatBundle3D', class_names=class_names), 38 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 39 | ] 40 | test_pipeline = [ 41 | dict( 42 | type='LoadPointsFromFile', 43 | coord_type='DEPTH', 44 | shift_height=False, 45 | use_color=True, 46 | load_dim=6, 47 | use_dim=[0, 1, 2, 3, 4, 5]), 48 | dict(type='NormalizePointsColor', color_mean=None), 49 | dict( 50 | # a wrapper in order to successfully call test function 51 | # actually we don't perform test-time-aug 52 | type='MultiScaleFlipAug3D', 53 | img_scale=(1333, 800), 54 | pts_scale_ratio=1, 55 | flip=False, 56 | transforms=[ 57 | dict( 58 | type='GlobalRotScaleTrans', 59 | rot_range=[0, 0], 60 | scale_ratio_range=[1., 1.], 61 | translation_std=[0, 0, 0]), 62 | dict( 63 | type='RandomFlip3D', 64 | sync_2d=False, 65 | flip_ratio_bev_horizontal=0.0, 66 | flip_ratio_bev_vertical=0.0), 67 | dict( 68 | type='DefaultFormatBundle3D', 69 | class_names=class_names, 70 | with_label=False), 71 | dict(type='Collect3D', keys=['points']) 72 | ]) 73 | ] 74 | # construct a pipeline for data and gt loading in show function 75 | # please keep its loading function consistent with test_pipeline (e.g. client) 76 | # we need to load gt seg_mask! 77 | eval_pipeline = [ 78 | dict( 79 | type='LoadPointsFromFile', 80 | coord_type='DEPTH', 81 | shift_height=False, 82 | use_color=True, 83 | load_dim=6, 84 | use_dim=[0, 1, 2, 3, 4, 5]), 85 | dict( 86 | type='LoadAnnotations3D', 87 | with_bbox_3d=False, 88 | with_label_3d=False, 89 | with_mask_3d=False, 90 | with_seg_3d=True), 91 | dict( 92 | type='PointSegClassMapping', 93 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 94 | 33, 34, 36, 39), 95 | max_cat_id=40), 96 | dict( 97 | type='DefaultFormatBundle3D', 98 | with_label=False, 99 | class_names=class_names), 100 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 101 | ] 102 | 103 | data = dict( 104 | samples_per_gpu=8, 105 | workers_per_gpu=4, 106 | train=dict( 107 | type=dataset_type, 108 | data_root=data_root, 109 | ann_file=data_root + 'scannet_infos_train.pkl', 110 | pipeline=train_pipeline, 111 | classes=class_names, 112 | test_mode=False, 113 | ignore_index=len(class_names), 114 | scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy'), 115 | val=dict( 116 | type=dataset_type, 117 | data_root=data_root, 118 | ann_file=data_root + 'scannet_infos_val.pkl', 119 | pipeline=test_pipeline, 120 | classes=class_names, 121 | test_mode=True, 122 | ignore_index=len(class_names)), 123 | test=dict( 124 | type=dataset_type, 125 | data_root=data_root, 126 | ann_file=data_root + 'scannet_infos_val.pkl', 127 | pipeline=test_pipeline, 128 | classes=class_names, 129 | test_mode=True, 130 | ignore_index=len(class_names))) 131 | 132 | evaluation = dict(pipeline=eval_pipeline) 133 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/s3dis_seg-3d-13class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'S3DISSegDataset' 3 | data_root = './data/s3dis/' 4 | class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', 5 | 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter') 6 | num_points = 4096 7 | train_area = [1, 2, 3, 4, 6] 8 | test_area = 5 9 | train_pipeline = [ 10 | dict( 11 | type='LoadPointsFromFile', 12 | coord_type='DEPTH', 13 | shift_height=False, 14 | use_color=True, 15 | load_dim=6, 16 | use_dim=[0, 1, 2, 3, 4, 5]), 17 | dict( 18 | type='LoadAnnotations3D', 19 | with_bbox_3d=False, 20 | with_label_3d=False, 21 | with_mask_3d=False, 22 | with_seg_3d=True), 23 | dict( 24 | type='PointSegClassMapping', 25 | valid_cat_ids=tuple(range(len(class_names))), 26 | max_cat_id=13), 27 | dict( 28 | type='IndoorPatchPointSample', 29 | num_points=num_points, 30 | block_size=1.0, 31 | ignore_index=len(class_names), 32 | use_normalized_coord=True, 33 | enlarge_size=0.2, 34 | min_unique_num=None), 35 | dict(type='NormalizePointsColor', color_mean=None), 36 | dict(type='DefaultFormatBundle3D', class_names=class_names), 37 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 38 | ] 39 | test_pipeline = [ 40 | dict( 41 | type='LoadPointsFromFile', 42 | coord_type='DEPTH', 43 | shift_height=False, 44 | use_color=True, 45 | load_dim=6, 46 | use_dim=[0, 1, 2, 3, 4, 5]), 47 | dict(type='NormalizePointsColor', color_mean=None), 48 | dict( 49 | # a wrapper in order to successfully call test function 50 | # actually we don't perform test-time-aug 51 | type='MultiScaleFlipAug3D', 52 | img_scale=(1333, 800), 53 | pts_scale_ratio=1, 54 | flip=False, 55 | transforms=[ 56 | dict( 57 | type='GlobalRotScaleTrans', 58 | rot_range=[0, 0], 59 | scale_ratio_range=[1., 1.], 60 | translation_std=[0, 0, 0]), 61 | dict( 62 | type='RandomFlip3D', 63 | sync_2d=False, 64 | flip_ratio_bev_horizontal=0.0, 65 | flip_ratio_bev_vertical=0.0), 66 | dict( 67 | type='DefaultFormatBundle3D', 68 | class_names=class_names, 69 | with_label=False), 70 | dict(type='Collect3D', keys=['points']) 71 | ]) 72 | ] 73 | # construct a pipeline for data and gt loading in show function 74 | # please keep its loading function consistent with test_pipeline (e.g. client) 75 | # we need to load gt seg_mask! 76 | eval_pipeline = [ 77 | dict( 78 | type='LoadPointsFromFile', 79 | coord_type='DEPTH', 80 | shift_height=False, 81 | use_color=True, 82 | load_dim=6, 83 | use_dim=[0, 1, 2, 3, 4, 5]), 84 | dict( 85 | type='LoadAnnotations3D', 86 | with_bbox_3d=False, 87 | with_label_3d=False, 88 | with_mask_3d=False, 89 | with_seg_3d=True), 90 | dict( 91 | type='PointSegClassMapping', 92 | valid_cat_ids=tuple(range(len(class_names))), 93 | max_cat_id=13), 94 | dict( 95 | type='DefaultFormatBundle3D', 96 | with_label=False, 97 | class_names=class_names), 98 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 99 | ] 100 | 101 | data = dict( 102 | samples_per_gpu=8, 103 | workers_per_gpu=4, 104 | # train on area 1, 2, 3, 4, 6 105 | # test on area 5 106 | train=dict( 107 | type=dataset_type, 108 | data_root=data_root, 109 | ann_files=[ 110 | data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area 111 | ], 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | test_mode=False, 115 | ignore_index=len(class_names), 116 | scene_idxs=[ 117 | data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy' 118 | for i in train_area 119 | ]), 120 | val=dict( 121 | type=dataset_type, 122 | data_root=data_root, 123 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', 124 | pipeline=test_pipeline, 125 | classes=class_names, 126 | test_mode=True, 127 | ignore_index=len(class_names), 128 | scene_idxs=data_root + 129 | f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'), 130 | test=dict( 131 | type=dataset_type, 132 | data_root=data_root, 133 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', 134 | pipeline=test_pipeline, 135 | classes=class_names, 136 | test_mode=True, 137 | ignore_index=len(class_names))) 138 | 139 | evaluation = dict(pipeline=eval_pipeline) 140 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.core.bbox import BaseBBoxCoder 4 | from mmdet.core.bbox.builder import BBOX_CODERS 5 | from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox 6 | import numpy as np 7 | 8 | 9 | @BBOX_CODERS.register_module() 10 | class NMSFreeCoder(BaseBBoxCoder): 11 | """Bbox coder for NMS-free detector. 12 | Args: 13 | pc_range (list[float]): Range of point cloud. 14 | post_center_range (list[float]): Limit of the center. 15 | Default: None. 16 | max_num (int): Max number to be kept. Default: 100. 17 | score_threshold (float): Threshold to filter boxes based on score. 18 | Default: None. 19 | code_size (int): Code size of bboxes. Default: 9 20 | """ 21 | 22 | def __init__(self, 23 | pc_range, 24 | voxel_size=None, 25 | post_center_range=None, 26 | max_num=100, 27 | score_threshold=None, 28 | num_classes=10): 29 | self.pc_range = pc_range 30 | self.voxel_size = voxel_size 31 | self.post_center_range = post_center_range 32 | self.max_num = max_num 33 | self.score_threshold = score_threshold 34 | self.num_classes = num_classes 35 | 36 | def encode(self): 37 | 38 | pass 39 | 40 | def decode_single(self, cls_scores, bbox_preds): 41 | """Decode bboxes. 42 | Args: 43 | cls_scores (Tensor): Outputs from the classification head, \ 44 | shape [num_query, cls_out_channels]. Note \ 45 | cls_out_channels should includes background. 46 | bbox_preds (Tensor): Outputs from the regression \ 47 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 48 | Shape [num_query, 9]. 49 | Returns: 50 | list[dict]: Decoded boxes. 51 | """ 52 | max_num = self.max_num 53 | 54 | cls_scores = cls_scores.sigmoid() 55 | scores, indexs = cls_scores.view(-1).topk(max_num) 56 | labels = indexs % self.num_classes 57 | bbox_index = indexs // self.num_classes 58 | bbox_preds = bbox_preds[bbox_index] 59 | 60 | final_box_preds = denormalize_bbox(bbox_preds, self.pc_range) 61 | final_scores = scores 62 | final_preds = labels 63 | 64 | # use score threshold 65 | if self.score_threshold is not None: 66 | thresh_mask = final_scores > self.score_threshold 67 | tmp_score = self.score_threshold 68 | while thresh_mask.sum() == 0: 69 | tmp_score *= 0.9 70 | if tmp_score < 0.01: 71 | thresh_mask = final_scores > -1 72 | break 73 | thresh_mask = final_scores >= tmp_score 74 | 75 | if self.post_center_range is not None: 76 | self.post_center_range = torch.tensor( 77 | self.post_center_range, device=scores.device) 78 | mask = (final_box_preds[..., :3] >= 79 | self.post_center_range[:3]).all(1) 80 | mask &= (final_box_preds[..., :3] <= 81 | self.post_center_range[3:]).all(1) 82 | 83 | if self.score_threshold: 84 | mask &= thresh_mask 85 | 86 | boxes3d = final_box_preds[mask] 87 | scores = final_scores[mask] 88 | 89 | labels = final_preds[mask] 90 | predictions_dict = { 91 | 'bboxes': boxes3d, 92 | 'scores': scores, 93 | 'labels': labels 94 | } 95 | 96 | else: 97 | raise NotImplementedError( 98 | 'Need to reorganize output as a batch, only ' 99 | 'support post_center_range is not None for now!') 100 | return predictions_dict 101 | 102 | def decode(self, preds_dicts): 103 | """Decode bboxes. 104 | Args: 105 | all_cls_scores (Tensor): Outputs from the classification head, \ 106 | shape [nb_dec, bs, num_query, cls_out_channels]. Note \ 107 | cls_out_channels should includes background. 108 | all_bbox_preds (Tensor): Sigmoid outputs from the regression \ 109 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 110 | Shape [nb_dec, bs, num_query, 9]. 111 | Returns: 112 | list[dict]: Decoded boxes. 113 | """ 114 | all_cls_scores = preds_dicts['all_cls_scores'][-1] 115 | all_bbox_preds = preds_dicts['all_bbox_preds'][-1] 116 | 117 | batch_size = all_cls_scores.size()[0] 118 | predictions_list = [] 119 | for i in range(batch_size): 120 | predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i])) 121 | return predictions_list 122 | 123 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/kitti-3d-car.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'KittiDataset' 3 | data_root = 'data/kitti/' 4 | class_names = ['Car'] 5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1] 6 | input_modality = dict(use_lidar=True, use_camera=False) 7 | db_sampler = dict( 8 | data_root=data_root, 9 | info_path=data_root + 'kitti_dbinfos_train.pkl', 10 | rate=1.0, 11 | prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)), 12 | classes=class_names, 13 | sample_groups=dict(Car=15)) 14 | 15 | file_client_args = dict(backend='disk') 16 | # Uncomment the following if use ceph or other file clients. 17 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 18 | # for more details. 19 | # file_client_args = dict( 20 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/')) 21 | 22 | train_pipeline = [ 23 | dict( 24 | type='LoadPointsFromFile', 25 | coord_type='LIDAR', 26 | load_dim=4, 27 | use_dim=4, 28 | file_client_args=file_client_args), 29 | dict( 30 | type='LoadAnnotations3D', 31 | with_bbox_3d=True, 32 | with_label_3d=True, 33 | file_client_args=file_client_args), 34 | dict(type='ObjectSample', db_sampler=db_sampler), 35 | dict( 36 | type='ObjectNoise', 37 | num_try=100, 38 | translation_std=[1.0, 1.0, 0.5], 39 | global_rot_range=[0.0, 0.0], 40 | rot_range=[-0.78539816, 0.78539816]), 41 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 42 | dict( 43 | type='GlobalRotScaleTrans', 44 | rot_range=[-0.78539816, 0.78539816], 45 | scale_ratio_range=[0.95, 1.05]), 46 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 47 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='PointShuffle'), 49 | dict(type='DefaultFormatBundle3D', class_names=class_names), 50 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 51 | ] 52 | test_pipeline = [ 53 | dict( 54 | type='LoadPointsFromFile', 55 | coord_type='LIDAR', 56 | load_dim=4, 57 | use_dim=4, 58 | file_client_args=file_client_args), 59 | dict( 60 | type='MultiScaleFlipAug3D', 61 | img_scale=(1333, 800), 62 | pts_scale_ratio=1, 63 | flip=False, 64 | transforms=[ 65 | dict( 66 | type='GlobalRotScaleTrans', 67 | rot_range=[0, 0], 68 | scale_ratio_range=[1., 1.], 69 | translation_std=[0, 0, 0]), 70 | dict(type='RandomFlip3D'), 71 | dict( 72 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 73 | dict( 74 | type='DefaultFormatBundle3D', 75 | class_names=class_names, 76 | with_label=False), 77 | dict(type='Collect3D', keys=['points']) 78 | ]) 79 | ] 80 | # construct a pipeline for data and gt loading in show function 81 | # please keep its loading function consistent with test_pipeline (e.g. client) 82 | eval_pipeline = [ 83 | dict( 84 | type='LoadPointsFromFile', 85 | coord_type='LIDAR', 86 | load_dim=4, 87 | use_dim=4, 88 | file_client_args=file_client_args), 89 | dict( 90 | type='DefaultFormatBundle3D', 91 | class_names=class_names, 92 | with_label=False), 93 | dict(type='Collect3D', keys=['points']) 94 | ] 95 | 96 | data = dict( 97 | samples_per_gpu=6, 98 | workers_per_gpu=4, 99 | train=dict( 100 | type='RepeatDataset', 101 | times=2, 102 | dataset=dict( 103 | type=dataset_type, 104 | data_root=data_root, 105 | ann_file=data_root + 'kitti_infos_train.pkl', 106 | split='training', 107 | pts_prefix='velodyne_reduced', 108 | pipeline=train_pipeline, 109 | modality=input_modality, 110 | classes=class_names, 111 | test_mode=False, 112 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 113 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 114 | box_type_3d='LiDAR')), 115 | val=dict( 116 | type=dataset_type, 117 | data_root=data_root, 118 | ann_file=data_root + 'kitti_infos_val.pkl', 119 | split='training', 120 | pts_prefix='velodyne_reduced', 121 | pipeline=test_pipeline, 122 | modality=input_modality, 123 | classes=class_names, 124 | test_mode=True, 125 | box_type_3d='LiDAR'), 126 | test=dict( 127 | type=dataset_type, 128 | data_root=data_root, 129 | ann_file=data_root + 'kitti_infos_val.pkl', 130 | split='training', 131 | pts_prefix='velodyne_reduced', 132 | pipeline=test_pipeline, 133 | modality=input_modality, 134 | classes=class_names, 135 | test_mode=True, 136 | box_type_3d='LiDAR')) 137 | 138 | evaluation = dict(interval=1, pipeline=eval_pipeline) 139 | -------------------------------------------------------------------------------- /projects/configs/datasets/custom_lyft-3d.py: -------------------------------------------------------------------------------- 1 | # If point cloud range is changed, the models should also change their point 2 | # cloud range accordingly 3 | point_cloud_range = [-80, -80, -5, 80, 80, 3] 4 | # For Lyft we usually do 9-class detection 5 | class_names = [ 6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle', 7 | 'bicycle', 'pedestrian', 'animal' 8 | ] 9 | dataset_type = 'CustomLyftDataset' 10 | data_root = 'data/lyft/' 11 | # Input modality for Lyft dataset, this is consistent with the submission 12 | # format which requires the information in input_modality. 13 | input_modality = dict( 14 | use_lidar=True, 15 | use_camera=False, 16 | use_radar=False, 17 | use_map=False, 18 | use_external=True) 19 | file_client_args = dict(backend='disk') 20 | # Uncomment the following if use ceph or other file clients. 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 22 | # for more details. 23 | # file_client_args = dict( 24 | # backend='petrel', 25 | # path_mapping=dict({ 26 | # './data/lyft/': 's3://lyft/lyft/', 27 | # 'data/lyft/': 's3://lyft/lyft/' 28 | # })) 29 | train_pipeline = [ 30 | dict( 31 | type='LoadPointsFromFile', 32 | coord_type='LIDAR', 33 | load_dim=5, 34 | use_dim=5, 35 | file_client_args=file_client_args), 36 | dict( 37 | type='LoadPointsFromMultiSweeps', 38 | sweeps_num=10, 39 | file_client_args=file_client_args), 40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 41 | dict( 42 | type='GlobalRotScaleTrans', 43 | rot_range=[-0.3925, 0.3925], 44 | scale_ratio_range=[0.95, 1.05], 45 | translation_std=[0, 0, 0]), 46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='PointShuffle'), 50 | dict(type='DefaultFormatBundle3D', class_names=class_names), 51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 52 | ] 53 | test_pipeline = [ 54 | dict( 55 | type='LoadPointsFromFile', 56 | coord_type='LIDAR', 57 | load_dim=5, 58 | use_dim=5, 59 | file_client_args=file_client_args), 60 | dict( 61 | type='LoadPointsFromMultiSweeps', 62 | sweeps_num=10, 63 | file_client_args=file_client_args), 64 | dict( 65 | type='MultiScaleFlipAug3D', 66 | img_scale=(1333, 800), 67 | pts_scale_ratio=1, 68 | flip=False, 69 | transforms=[ 70 | dict( 71 | type='GlobalRotScaleTrans', 72 | rot_range=[0, 0], 73 | scale_ratio_range=[1., 1.], 74 | translation_std=[0, 0, 0]), 75 | dict(type='RandomFlip3D'), 76 | dict( 77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 78 | dict( 79 | type='DefaultFormatBundle3D', 80 | class_names=class_names, 81 | with_label=False), 82 | dict(type='Collect3D', keys=['points']) 83 | ]) 84 | ] 85 | # construct a pipeline for data and gt loading in show function 86 | # please keep its loading function consistent with test_pipeline (e.g. client) 87 | eval_pipeline = [ 88 | dict( 89 | type='LoadPointsFromFile', 90 | coord_type='LIDAR', 91 | load_dim=5, 92 | use_dim=5, 93 | file_client_args=file_client_args), 94 | dict( 95 | type='LoadPointsFromMultiSweeps', 96 | sweeps_num=10, 97 | file_client_args=file_client_args), 98 | dict( 99 | type='DefaultFormatBundle3D', 100 | class_names=class_names, 101 | with_label=False), 102 | dict(type='Collect3D', keys=['points']) 103 | ] 104 | 105 | data = dict( 106 | samples_per_gpu=2, 107 | workers_per_gpu=2, 108 | train=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'lyft_infos_train.pkl', 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | modality=input_modality, 115 | test_mode=False), 116 | val=dict( 117 | type=dataset_type, 118 | data_root=data_root, 119 | ann_file=data_root + 'lyft_infos_val.pkl', 120 | pipeline=test_pipeline, 121 | classes=class_names, 122 | modality=input_modality, 123 | test_mode=True), 124 | test=dict( 125 | type=dataset_type, 126 | data_root=data_root, 127 | ann_file=data_root + 'lyft_infos_val.pkl', 128 | pipeline=test_pipeline, 129 | classes=class_names, 130 | modality=input_modality, 131 | test_mode=True)) 132 | # For Lyft dataset, we usually evaluate the model at the end of training. 133 | # Since the models are trained by 24 epochs by default, we set evaluation 134 | # interval to be 24. Please change the interval accordingly if you do not 135 | # use a default schedule. 136 | evaluation = dict(interval=24, pipeline=eval_pipeline) -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/lyft-3d.py: -------------------------------------------------------------------------------- 1 | # If point cloud range is changed, the models should also change their point 2 | # cloud range accordingly 3 | point_cloud_range = [-80, -80, -5, 80, 80, 3] 4 | # For Lyft we usually do 9-class detection 5 | class_names = [ 6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle', 7 | 'bicycle', 'pedestrian', 'animal' 8 | ] 9 | dataset_type = 'LyftDataset' 10 | data_root = 'data/lyft/' 11 | # Input modality for Lyft dataset, this is consistent with the submission 12 | # format which requires the information in input_modality. 13 | input_modality = dict( 14 | use_lidar=True, 15 | use_camera=False, 16 | use_radar=False, 17 | use_map=False, 18 | use_external=False) 19 | file_client_args = dict(backend='disk') 20 | # Uncomment the following if use ceph or other file clients. 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 22 | # for more details. 23 | # file_client_args = dict( 24 | # backend='petrel', 25 | # path_mapping=dict({ 26 | # './data/lyft/': 's3://lyft/lyft/', 27 | # 'data/lyft/': 's3://lyft/lyft/' 28 | # })) 29 | train_pipeline = [ 30 | dict( 31 | type='LoadPointsFromFile', 32 | coord_type='LIDAR', 33 | load_dim=5, 34 | use_dim=5, 35 | file_client_args=file_client_args), 36 | dict( 37 | type='LoadPointsFromMultiSweeps', 38 | sweeps_num=10, 39 | file_client_args=file_client_args), 40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 41 | dict( 42 | type='GlobalRotScaleTrans', 43 | rot_range=[-0.3925, 0.3925], 44 | scale_ratio_range=[0.95, 1.05], 45 | translation_std=[0, 0, 0]), 46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='PointShuffle'), 50 | dict(type='DefaultFormatBundle3D', class_names=class_names), 51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 52 | ] 53 | test_pipeline = [ 54 | dict( 55 | type='LoadPointsFromFile', 56 | coord_type='LIDAR', 57 | load_dim=5, 58 | use_dim=5, 59 | file_client_args=file_client_args), 60 | dict( 61 | type='LoadPointsFromMultiSweeps', 62 | sweeps_num=10, 63 | file_client_args=file_client_args), 64 | dict( 65 | type='MultiScaleFlipAug3D', 66 | img_scale=(1333, 800), 67 | pts_scale_ratio=1, 68 | flip=False, 69 | transforms=[ 70 | dict( 71 | type='GlobalRotScaleTrans', 72 | rot_range=[0, 0], 73 | scale_ratio_range=[1., 1.], 74 | translation_std=[0, 0, 0]), 75 | dict(type='RandomFlip3D'), 76 | dict( 77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 78 | dict( 79 | type='DefaultFormatBundle3D', 80 | class_names=class_names, 81 | with_label=False), 82 | dict(type='Collect3D', keys=['points']) 83 | ]) 84 | ] 85 | # construct a pipeline for data and gt loading in show function 86 | # please keep its loading function consistent with test_pipeline (e.g. client) 87 | eval_pipeline = [ 88 | dict( 89 | type='LoadPointsFromFile', 90 | coord_type='LIDAR', 91 | load_dim=5, 92 | use_dim=5, 93 | file_client_args=file_client_args), 94 | dict( 95 | type='LoadPointsFromMultiSweeps', 96 | sweeps_num=10, 97 | file_client_args=file_client_args), 98 | dict( 99 | type='DefaultFormatBundle3D', 100 | class_names=class_names, 101 | with_label=False), 102 | dict(type='Collect3D', keys=['points']) 103 | ] 104 | 105 | data = dict( 106 | samples_per_gpu=2, 107 | workers_per_gpu=2, 108 | train=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'lyft_infos_train.pkl', 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | modality=input_modality, 115 | test_mode=False), 116 | val=dict( 117 | type=dataset_type, 118 | data_root=data_root, 119 | ann_file=data_root + 'lyft_infos_val.pkl', 120 | pipeline=test_pipeline, 121 | classes=class_names, 122 | modality=input_modality, 123 | test_mode=True), 124 | test=dict( 125 | type=dataset_type, 126 | data_root=data_root, 127 | ann_file=data_root + 'lyft_infos_test.pkl', 128 | pipeline=test_pipeline, 129 | classes=class_names, 130 | modality=input_modality, 131 | test_mode=True)) 132 | # For Lyft dataset, we usually evaluate the model at the end of training. 133 | # Since the models are trained by 24 epochs by default, we set evaluation 134 | # interval to be 24. Please change the interval accordingly if you do not 135 | # use a default schedule. 136 | evaluation = dict(interval=24, pipeline=eval_pipeline) 137 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/range100_lyft-3d.py: -------------------------------------------------------------------------------- 1 | # If point cloud range is changed, the models should also change their point 2 | # cloud range accordingly 3 | point_cloud_range = [-100, -100, -5, 100, 100, 3] 4 | # For Lyft we usually do 9-class detection 5 | class_names = [ 6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle', 7 | 'bicycle', 'pedestrian', 'animal' 8 | ] 9 | dataset_type = 'LyftDataset' 10 | data_root = 'data/lyft/' 11 | # Input modality for Lyft dataset, this is consistent with the submission 12 | # format which requires the information in input_modality. 13 | input_modality = dict( 14 | use_lidar=True, 15 | use_camera=False, 16 | use_radar=False, 17 | use_map=False, 18 | use_external=False) 19 | file_client_args = dict(backend='disk') 20 | # Uncomment the following if use ceph or other file clients. 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 22 | # for more details. 23 | # file_client_args = dict( 24 | # backend='petrel', 25 | # path_mapping=dict({ 26 | # './data/lyft/': 's3://lyft/lyft/', 27 | # 'data/lyft/': 's3://lyft/lyft/' 28 | # })) 29 | train_pipeline = [ 30 | dict( 31 | type='LoadPointsFromFile', 32 | coord_type='LIDAR', 33 | load_dim=5, 34 | use_dim=5, 35 | file_client_args=file_client_args), 36 | dict( 37 | type='LoadPointsFromMultiSweeps', 38 | sweeps_num=10, 39 | file_client_args=file_client_args), 40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 41 | dict( 42 | type='GlobalRotScaleTrans', 43 | rot_range=[-0.3925, 0.3925], 44 | scale_ratio_range=[0.95, 1.05], 45 | translation_std=[0, 0, 0]), 46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='PointShuffle'), 50 | dict(type='DefaultFormatBundle3D', class_names=class_names), 51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 52 | ] 53 | test_pipeline = [ 54 | dict( 55 | type='LoadPointsFromFile', 56 | coord_type='LIDAR', 57 | load_dim=5, 58 | use_dim=5, 59 | file_client_args=file_client_args), 60 | dict( 61 | type='LoadPointsFromMultiSweeps', 62 | sweeps_num=10, 63 | file_client_args=file_client_args), 64 | dict( 65 | type='MultiScaleFlipAug3D', 66 | img_scale=(1333, 800), 67 | pts_scale_ratio=1, 68 | flip=False, 69 | transforms=[ 70 | dict( 71 | type='GlobalRotScaleTrans', 72 | rot_range=[0, 0], 73 | scale_ratio_range=[1., 1.], 74 | translation_std=[0, 0, 0]), 75 | dict(type='RandomFlip3D'), 76 | dict( 77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 78 | dict( 79 | type='DefaultFormatBundle3D', 80 | class_names=class_names, 81 | with_label=False), 82 | dict(type='Collect3D', keys=['points']) 83 | ]) 84 | ] 85 | # construct a pipeline for data and gt loading in show function 86 | # please keep its loading function consistent with test_pipeline (e.g. client) 87 | eval_pipeline = [ 88 | dict( 89 | type='LoadPointsFromFile', 90 | coord_type='LIDAR', 91 | load_dim=5, 92 | use_dim=5, 93 | file_client_args=file_client_args), 94 | dict( 95 | type='LoadPointsFromMultiSweeps', 96 | sweeps_num=10, 97 | file_client_args=file_client_args), 98 | dict( 99 | type='DefaultFormatBundle3D', 100 | class_names=class_names, 101 | with_label=False), 102 | dict(type='Collect3D', keys=['points']) 103 | ] 104 | 105 | data = dict( 106 | samples_per_gpu=2, 107 | workers_per_gpu=2, 108 | train=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'lyft_infos_train.pkl', 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | modality=input_modality, 115 | test_mode=False), 116 | val=dict( 117 | type=dataset_type, 118 | data_root=data_root, 119 | ann_file=data_root + 'lyft_infos_val.pkl', 120 | pipeline=test_pipeline, 121 | classes=class_names, 122 | modality=input_modality, 123 | test_mode=True), 124 | test=dict( 125 | type=dataset_type, 126 | data_root=data_root, 127 | ann_file=data_root + 'lyft_infos_test.pkl', 128 | pipeline=test_pipeline, 129 | classes=class_names, 130 | modality=input_modality, 131 | test_mode=True)) 132 | # For Lyft dataset, we usually evaluate the model at the end of training. 133 | # Since the models are trained by 24 epochs by default, we set evaluation 134 | # interval to be 24. Please change the interval accordingly if you do not 135 | # use a default schedule. 136 | evaluation = dict(interval=24, pipeline=eval_pipeline) 137 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/kitti-3d-3class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'KittiDataset' 3 | data_root = 'data/kitti/' 4 | class_names = ['Pedestrian', 'Cyclist', 'Car'] 5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1] 6 | input_modality = dict(use_lidar=True, use_camera=False) 7 | db_sampler = dict( 8 | data_root=data_root, 9 | info_path=data_root + 'kitti_dbinfos_train.pkl', 10 | rate=1.0, 11 | prepare=dict( 12 | filter_by_difficulty=[-1], 13 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)), 14 | classes=class_names, 15 | sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6)) 16 | 17 | file_client_args = dict(backend='disk') 18 | # Uncomment the following if use ceph or other file clients. 19 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 20 | # for more details. 21 | # file_client_args = dict( 22 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/')) 23 | 24 | train_pipeline = [ 25 | dict( 26 | type='LoadPointsFromFile', 27 | coord_type='LIDAR', 28 | load_dim=4, 29 | use_dim=4, 30 | file_client_args=file_client_args), 31 | dict( 32 | type='LoadAnnotations3D', 33 | with_bbox_3d=True, 34 | with_label_3d=True, 35 | file_client_args=file_client_args), 36 | dict(type='ObjectSample', db_sampler=db_sampler), 37 | dict( 38 | type='ObjectNoise', 39 | num_try=100, 40 | translation_std=[1.0, 1.0, 0.5], 41 | global_rot_range=[0.0, 0.0], 42 | rot_range=[-0.78539816, 0.78539816]), 43 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 44 | dict( 45 | type='GlobalRotScaleTrans', 46 | rot_range=[-0.78539816, 0.78539816], 47 | scale_ratio_range=[0.95, 1.05]), 48 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 50 | dict(type='PointShuffle'), 51 | dict(type='DefaultFormatBundle3D', class_names=class_names), 52 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 53 | ] 54 | test_pipeline = [ 55 | dict( 56 | type='LoadPointsFromFile', 57 | coord_type='LIDAR', 58 | load_dim=4, 59 | use_dim=4, 60 | file_client_args=file_client_args), 61 | dict( 62 | type='MultiScaleFlipAug3D', 63 | img_scale=(1333, 800), 64 | pts_scale_ratio=1, 65 | flip=False, 66 | transforms=[ 67 | dict( 68 | type='GlobalRotScaleTrans', 69 | rot_range=[0, 0], 70 | scale_ratio_range=[1., 1.], 71 | translation_std=[0, 0, 0]), 72 | dict(type='RandomFlip3D'), 73 | dict( 74 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 75 | dict( 76 | type='DefaultFormatBundle3D', 77 | class_names=class_names, 78 | with_label=False), 79 | dict(type='Collect3D', keys=['points']) 80 | ]) 81 | ] 82 | # construct a pipeline for data and gt loading in show function 83 | # please keep its loading function consistent with test_pipeline (e.g. client) 84 | eval_pipeline = [ 85 | dict( 86 | type='LoadPointsFromFile', 87 | coord_type='LIDAR', 88 | load_dim=4, 89 | use_dim=4, 90 | file_client_args=file_client_args), 91 | dict( 92 | type='DefaultFormatBundle3D', 93 | class_names=class_names, 94 | with_label=False), 95 | dict(type='Collect3D', keys=['points']) 96 | ] 97 | 98 | data = dict( 99 | samples_per_gpu=6, 100 | workers_per_gpu=4, 101 | train=dict( 102 | type='RepeatDataset', 103 | times=2, 104 | dataset=dict( 105 | type=dataset_type, 106 | data_root=data_root, 107 | ann_file=data_root + 'kitti_infos_train.pkl', 108 | split='training', 109 | pts_prefix='velodyne_reduced', 110 | pipeline=train_pipeline, 111 | modality=input_modality, 112 | classes=class_names, 113 | test_mode=False, 114 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 115 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 116 | box_type_3d='LiDAR')), 117 | val=dict( 118 | type=dataset_type, 119 | data_root=data_root, 120 | ann_file=data_root + 'kitti_infos_val.pkl', 121 | split='training', 122 | pts_prefix='velodyne_reduced', 123 | pipeline=test_pipeline, 124 | modality=input_modality, 125 | classes=class_names, 126 | test_mode=True, 127 | box_type_3d='LiDAR'), 128 | test=dict( 129 | type=dataset_type, 130 | data_root=data_root, 131 | ann_file=data_root + 'kitti_infos_val.pkl', 132 | split='training', 133 | pts_prefix='velodyne_reduced', 134 | pipeline=test_pipeline, 135 | modality=input_modality, 136 | classes=class_names, 137 | test_mode=True, 138 | box_type_3d='LiDAR')) 139 | 140 | evaluation = dict(interval=1, pipeline=eval_pipeline) 141 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/waymoD5-3d-car.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | # D5 in the config name means the whole dataset is divided into 5 folds 3 | # We only use one fold for efficient experiments 4 | dataset_type = 'WaymoDataset' 5 | data_root = 'data/waymo/kitti_format/' 6 | file_client_args = dict(backend='disk') 7 | # Uncomment the following if use ceph or other file clients. 8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 9 | # for more details. 10 | # file_client_args = dict( 11 | # backend='petrel', path_mapping=dict(data='s3://waymo_data/')) 12 | 13 | class_names = ['Car'] 14 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4] 15 | input_modality = dict(use_lidar=True, use_camera=False) 16 | db_sampler = dict( 17 | data_root=data_root, 18 | info_path=data_root + 'waymo_dbinfos_train.pkl', 19 | rate=1.0, 20 | prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)), 21 | classes=class_names, 22 | sample_groups=dict(Car=15), 23 | points_loader=dict( 24 | type='LoadPointsFromFile', 25 | coord_type='LIDAR', 26 | load_dim=5, 27 | use_dim=[0, 1, 2, 3, 4], 28 | file_client_args=file_client_args)) 29 | 30 | train_pipeline = [ 31 | dict( 32 | type='LoadPointsFromFile', 33 | coord_type='LIDAR', 34 | load_dim=6, 35 | use_dim=5, 36 | file_client_args=file_client_args), 37 | dict( 38 | type='LoadAnnotations3D', 39 | with_bbox_3d=True, 40 | with_label_3d=True, 41 | file_client_args=file_client_args), 42 | dict(type='ObjectSample', db_sampler=db_sampler), 43 | dict( 44 | type='RandomFlip3D', 45 | sync_2d=False, 46 | flip_ratio_bev_horizontal=0.5, 47 | flip_ratio_bev_vertical=0.5), 48 | dict( 49 | type='GlobalRotScaleTrans', 50 | rot_range=[-0.78539816, 0.78539816], 51 | scale_ratio_range=[0.95, 1.05]), 52 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 53 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 54 | dict(type='PointShuffle'), 55 | dict(type='DefaultFormatBundle3D', class_names=class_names), 56 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 57 | ] 58 | test_pipeline = [ 59 | dict( 60 | type='LoadPointsFromFile', 61 | coord_type='LIDAR', 62 | load_dim=6, 63 | use_dim=5, 64 | file_client_args=file_client_args), 65 | dict( 66 | type='MultiScaleFlipAug3D', 67 | img_scale=(1333, 800), 68 | pts_scale_ratio=1, 69 | flip=False, 70 | transforms=[ 71 | dict( 72 | type='GlobalRotScaleTrans', 73 | rot_range=[0, 0], 74 | scale_ratio_range=[1., 1.], 75 | translation_std=[0, 0, 0]), 76 | dict(type='RandomFlip3D'), 77 | dict( 78 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 79 | dict( 80 | type='DefaultFormatBundle3D', 81 | class_names=class_names, 82 | with_label=False), 83 | dict(type='Collect3D', keys=['points']) 84 | ]) 85 | ] 86 | # construct a pipeline for data and gt loading in show function 87 | # please keep its loading function consistent with test_pipeline (e.g. client) 88 | eval_pipeline = [ 89 | dict( 90 | type='LoadPointsFromFile', 91 | coord_type='LIDAR', 92 | load_dim=6, 93 | use_dim=5, 94 | file_client_args=file_client_args), 95 | dict( 96 | type='DefaultFormatBundle3D', 97 | class_names=class_names, 98 | with_label=False), 99 | dict(type='Collect3D', keys=['points']) 100 | ] 101 | 102 | data = dict( 103 | samples_per_gpu=2, 104 | workers_per_gpu=4, 105 | train=dict( 106 | type='RepeatDataset', 107 | times=2, 108 | dataset=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'waymo_infos_train.pkl', 112 | split='training', 113 | pipeline=train_pipeline, 114 | modality=input_modality, 115 | classes=class_names, 116 | test_mode=False, 117 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 118 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 119 | box_type_3d='LiDAR', 120 | # load one frame every five frames 121 | load_interval=5)), 122 | val=dict( 123 | type=dataset_type, 124 | data_root=data_root, 125 | ann_file=data_root + 'waymo_infos_val.pkl', 126 | split='training', 127 | pipeline=test_pipeline, 128 | modality=input_modality, 129 | classes=class_names, 130 | test_mode=True, 131 | box_type_3d='LiDAR'), 132 | test=dict( 133 | type=dataset_type, 134 | data_root=data_root, 135 | ann_file=data_root + 'waymo_infos_val.pkl', 136 | split='training', 137 | pipeline=test_pipeline, 138 | modality=input_modality, 139 | classes=class_names, 140 | test_mode=True, 141 | box_type_3d='LiDAR')) 142 | 143 | evaluation = dict(interval=24, pipeline=eval_pipeline) 144 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/waymoD5-3d-3class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | # D5 in the config name means the whole dataset is divided into 5 folds 3 | # We only use one fold for efficient experiments 4 | dataset_type = 'LidarWaymoDataset' 5 | data_root = 'data/waymo-full/kitti_format/' 6 | file_client_args = dict(backend='disk') 7 | # Uncomment the following if use ceph or other file clients. 8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 9 | # for more details. 10 | # file_client_args = dict( 11 | # backend='petrel', path_mapping=dict(data='s3://waymo_data/')) 12 | 13 | class_names = ['Car', 'Pedestrian', 'Cyclist'] 14 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4] 15 | input_modality = dict(use_lidar=True, use_camera=False) 16 | db_sampler = dict( 17 | data_root=data_root, 18 | info_path=data_root + 'waymo_dbinfos_train.pkl', 19 | rate=1.0, 20 | prepare=dict( 21 | filter_by_difficulty=[-1], 22 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)), 23 | classes=class_names, 24 | sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10), 25 | points_loader=dict( 26 | type='LoadPointsFromFile', 27 | coord_type='LIDAR', 28 | load_dim=5, 29 | use_dim=[0, 1, 2, 3, 4], 30 | file_client_args=file_client_args)) 31 | 32 | train_pipeline = [ 33 | dict( 34 | type='LoadPointsFromFile', 35 | coord_type='LIDAR', 36 | load_dim=6, 37 | use_dim=5, 38 | file_client_args=file_client_args), 39 | dict( 40 | type='LoadAnnotations3D', 41 | with_bbox_3d=True, 42 | with_label_3d=True, 43 | file_client_args=file_client_args), 44 | dict(type='ObjectSample', db_sampler=db_sampler), 45 | dict( 46 | type='RandomFlip3D', 47 | sync_2d=False, 48 | flip_ratio_bev_horizontal=0.5, 49 | flip_ratio_bev_vertical=0.5), 50 | dict( 51 | type='GlobalRotScaleTrans', 52 | rot_range=[-0.78539816, 0.78539816], 53 | scale_ratio_range=[0.95, 1.05]), 54 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 55 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 56 | dict(type='PointShuffle'), 57 | dict(type='DefaultFormatBundle3D', class_names=class_names), 58 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 59 | ] 60 | test_pipeline = [ 61 | dict( 62 | type='LoadPointsFromFile', 63 | coord_type='LIDAR', 64 | load_dim=6, 65 | use_dim=5, 66 | file_client_args=file_client_args), 67 | dict( 68 | type='MultiScaleFlipAug3D', 69 | img_scale=(1333, 800), 70 | pts_scale_ratio=1, 71 | flip=False, 72 | transforms=[ 73 | dict( 74 | type='GlobalRotScaleTrans', 75 | rot_range=[0, 0], 76 | scale_ratio_range=[1., 1.], 77 | translation_std=[0, 0, 0]), 78 | dict(type='RandomFlip3D'), 79 | dict( 80 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 81 | dict( 82 | type='DefaultFormatBundle3D', 83 | class_names=class_names, 84 | with_label=False), 85 | dict(type='Collect3D', keys=['points']) 86 | ]) 87 | ] 88 | # construct a pipeline for data and gt loading in show function 89 | # please keep its loading function consistent with test_pipeline (e.g. client) 90 | eval_pipeline = [ 91 | dict( 92 | type='LoadPointsFromFile', 93 | coord_type='LIDAR', 94 | load_dim=6, 95 | use_dim=5, 96 | file_client_args=file_client_args), 97 | dict( 98 | type='DefaultFormatBundle3D', 99 | class_names=class_names, 100 | with_label=False), 101 | dict(type='Collect3D', keys=['points']) 102 | ] 103 | 104 | data = dict( 105 | samples_per_gpu=2, 106 | workers_per_gpu=4, 107 | train=dict( 108 | type='RepeatDataset', 109 | times=2, 110 | dataset=dict( 111 | type=dataset_type, 112 | data_root=data_root, 113 | ann_file=data_root + 'waymo_infos_train.pkl', 114 | split='training', 115 | pipeline=train_pipeline, 116 | modality=input_modality, 117 | classes=class_names, 118 | test_mode=False, 119 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 120 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 121 | box_type_3d='LiDAR', 122 | # load one frame every five frames 123 | load_interval=5)), 124 | val=dict( 125 | type=dataset_type, 126 | data_root=data_root, 127 | ann_file=data_root + 'waymo_infos_val.pkl', 128 | split='training', 129 | pipeline=test_pipeline, 130 | modality=input_modality, 131 | classes=class_names, 132 | test_mode=True, 133 | box_type_3d='LiDAR'), 134 | test=dict( 135 | type=dataset_type, 136 | data_root=data_root, 137 | ann_file=data_root + 'waymo_infos_val.pkl', 138 | split='training', 139 | pipeline=test_pipeline, 140 | modality=input_modality, 141 | classes=class_names, 142 | test_mode=True, 143 | box_type_3d='LiDAR')) 144 | 145 | evaluation = dict(interval=24, pipeline=eval_pipeline) 146 | -------------------------------------------------------------------------------- /projects/configs/datasets/custom_nus-3d.py: -------------------------------------------------------------------------------- 1 | # If point cloud range is changed, the models should also change their point 2 | # cloud range accordingly 3 | point_cloud_range = [-50, -50, -5, 50, 50, 3] 4 | # For nuScenes we usually do 10-class detection 5 | class_names = [ 6 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 7 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 8 | ] 9 | dataset_type = 'NuScenesDataset_eval_modified' 10 | data_root = 'data/nuscenes/' 11 | # Input modality for nuScenes dataset, this is consistent with the submission 12 | # format which requires the information in input_modality. 13 | input_modality = dict( 14 | use_lidar=True, 15 | use_camera=False, 16 | use_radar=False, 17 | use_map=False, 18 | use_external=False) 19 | file_client_args = dict(backend='disk') 20 | # Uncomment the following if use ceph or other file clients. 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 22 | # for more details. 23 | # file_client_args = dict( 24 | # backend='petrel', 25 | # path_mapping=dict({ 26 | # './data/nuscenes/': 's3://nuscenes/nuscenes/', 27 | # 'data/nuscenes/': 's3://nuscenes/nuscenes/' 28 | # })) 29 | train_pipeline = [ 30 | dict( 31 | type='LoadPointsFromFile', 32 | coord_type='LIDAR', 33 | load_dim=5, 34 | use_dim=5, 35 | file_client_args=file_client_args), 36 | dict( 37 | type='LoadPointsFromMultiSweeps', 38 | sweeps_num=10, 39 | file_client_args=file_client_args), 40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 41 | dict( 42 | type='GlobalRotScaleTrans', 43 | rot_range=[-0.3925, 0.3925], 44 | scale_ratio_range=[0.95, 1.05], 45 | translation_std=[0, 0, 0]), 46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='ObjectNameFilter', classes=class_names), 50 | dict(type='PointShuffle'), 51 | dict(type='DefaultFormatBundle3D', class_names=class_names), 52 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 53 | ] 54 | test_pipeline = [ 55 | dict( 56 | type='LoadPointsFromFile', 57 | coord_type='LIDAR', 58 | load_dim=5, 59 | use_dim=5, 60 | file_client_args=file_client_args), 61 | dict( 62 | type='LoadPointsFromMultiSweeps', 63 | sweeps_num=10, 64 | file_client_args=file_client_args), 65 | dict( 66 | type='MultiScaleFlipAug3D', 67 | img_scale=(1333, 800), 68 | pts_scale_ratio=1, 69 | flip=False, 70 | transforms=[ 71 | dict( 72 | type='GlobalRotScaleTrans', 73 | rot_range=[0, 0], 74 | scale_ratio_range=[1., 1.], 75 | translation_std=[0, 0, 0]), 76 | dict(type='RandomFlip3D'), 77 | dict( 78 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 79 | dict( 80 | type='DefaultFormatBundle3D', 81 | class_names=class_names, 82 | with_label=False), 83 | dict(type='Collect3D', keys=['points']) 84 | ]) 85 | ] 86 | # construct a pipeline for data and gt loading in show function 87 | # please keep its loading function consistent with test_pipeline (e.g. client) 88 | eval_pipeline = [ 89 | dict( 90 | type='LoadPointsFromFile', 91 | coord_type='LIDAR', 92 | load_dim=5, 93 | use_dim=5, 94 | file_client_args=file_client_args), 95 | dict( 96 | type='LoadPointsFromMultiSweeps', 97 | sweeps_num=10, 98 | file_client_args=file_client_args), 99 | dict( 100 | type='DefaultFormatBundle3D', 101 | class_names=class_names, 102 | with_label=False), 103 | dict(type='Collect3D', keys=['points']) 104 | ] 105 | 106 | data = dict( 107 | samples_per_gpu=4, 108 | workers_per_gpu=4, 109 | train=dict( 110 | type=dataset_type, 111 | data_root=data_root, 112 | ann_file=data_root + 'nuscenes_infos_train.pkl', 113 | pipeline=train_pipeline, 114 | classes=class_names, 115 | modality=input_modality, 116 | test_mode=False, 117 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 118 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 119 | box_type_3d='LiDAR'), 120 | val=dict( 121 | type=dataset_type, 122 | ann_file=data_root + 'nuscenes_infos_val.pkl', 123 | pipeline=test_pipeline, 124 | classes=class_names, 125 | modality=input_modality, 126 | test_mode=True, 127 | box_type_3d='LiDAR'), 128 | test=dict( 129 | type=dataset_type, 130 | data_root=data_root, 131 | ann_file=data_root + 'nuscenes_infos_val.pkl', 132 | pipeline=test_pipeline, 133 | classes=class_names, 134 | modality=input_modality, 135 | test_mode=True, 136 | box_type_3d='LiDAR')) 137 | # For nuScenes dataset, we usually evaluate the model at the end of training. 138 | # Since the models are trained by 24 epochs by default, we set evaluation 139 | # interval to be 24. Please change the interval accordingly if you do not 140 | # use a default schedule. 141 | evaluation = dict(interval=24, pipeline=eval_pipeline) 142 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/nus-3d.py: -------------------------------------------------------------------------------- 1 | # If point cloud range is changed, the models should also change their point 2 | # cloud range accordingly 3 | point_cloud_range = [-50, -50, -5, 50, 50, 3] 4 | # For nuScenes we usually do 10-class detection 5 | class_names = [ 6 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 7 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 8 | ] 9 | dataset_type = 'NuScenesDataset' 10 | data_root = 'data/nuscenes/' 11 | # Input modality for nuScenes dataset, this is consistent with the submission 12 | # format which requires the information in input_modality. 13 | input_modality = dict( 14 | use_lidar=True, 15 | use_camera=False, 16 | use_radar=False, 17 | use_map=False, 18 | use_external=False) 19 | file_client_args = dict(backend='disk') 20 | # Uncomment the following if use ceph or other file clients. 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 22 | # for more details. 23 | # file_client_args = dict( 24 | # backend='petrel', 25 | # path_mapping=dict({ 26 | # './data/nuscenes/': 's3://nuscenes/nuscenes/', 27 | # 'data/nuscenes/': 's3://nuscenes/nuscenes/' 28 | # })) 29 | train_pipeline = [ 30 | dict( 31 | type='LoadPointsFromFile', 32 | coord_type='LIDAR', 33 | load_dim=5, 34 | use_dim=5, 35 | file_client_args=file_client_args), 36 | dict( 37 | type='LoadPointsFromMultiSweeps', 38 | sweeps_num=10, 39 | file_client_args=file_client_args), 40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 41 | dict( 42 | type='GlobalRotScaleTrans', 43 | rot_range=[-0.3925, 0.3925], 44 | scale_ratio_range=[0.95, 1.05], 45 | translation_std=[0, 0, 0]), 46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='ObjectNameFilter', classes=class_names), 50 | dict(type='PointShuffle'), 51 | dict(type='DefaultFormatBundle3D', class_names=class_names), 52 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 53 | ] 54 | test_pipeline = [ 55 | dict( 56 | type='LoadPointsFromFile', 57 | coord_type='LIDAR', 58 | load_dim=5, 59 | use_dim=5, 60 | file_client_args=file_client_args), 61 | dict( 62 | type='LoadPointsFromMultiSweeps', 63 | sweeps_num=10, 64 | file_client_args=file_client_args), 65 | dict( 66 | type='MultiScaleFlipAug3D', 67 | img_scale=(1333, 800), 68 | pts_scale_ratio=1, 69 | flip=False, 70 | transforms=[ 71 | dict( 72 | type='GlobalRotScaleTrans', 73 | rot_range=[0, 0], 74 | scale_ratio_range=[1., 1.], 75 | translation_std=[0, 0, 0]), 76 | dict(type='RandomFlip3D'), 77 | dict( 78 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 79 | dict( 80 | type='DefaultFormatBundle3D', 81 | class_names=class_names, 82 | with_label=False), 83 | dict(type='Collect3D', keys=['points']) 84 | ]) 85 | ] 86 | # construct a pipeline for data and gt loading in show function 87 | # please keep its loading function consistent with test_pipeline (e.g. client) 88 | eval_pipeline = [ 89 | dict( 90 | type='LoadPointsFromFile', 91 | coord_type='LIDAR', 92 | load_dim=5, 93 | use_dim=5, 94 | file_client_args=file_client_args), 95 | dict( 96 | type='LoadPointsFromMultiSweeps', 97 | sweeps_num=10, 98 | file_client_args=file_client_args), 99 | dict( 100 | type='DefaultFormatBundle3D', 101 | class_names=class_names, 102 | with_label=False), 103 | dict(type='Collect3D', keys=['points']) 104 | ] 105 | 106 | data = dict( 107 | samples_per_gpu=4, 108 | workers_per_gpu=4, 109 | train=dict( 110 | type=dataset_type, 111 | data_root=data_root, 112 | ann_file=data_root + 'nuscenes_infos_train.pkl', 113 | pipeline=train_pipeline, 114 | classes=class_names, 115 | modality=input_modality, 116 | test_mode=False, 117 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 118 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 119 | box_type_3d='LiDAR'), 120 | val=dict( 121 | type=dataset_type, 122 | data_root=data_root, 123 | ann_file=data_root + 'nuscenes_infos_val.pkl', 124 | pipeline=test_pipeline, 125 | classes=class_names, 126 | modality=input_modality, 127 | test_mode=True, 128 | box_type_3d='LiDAR'), 129 | test=dict( 130 | type=dataset_type, 131 | data_root=data_root, 132 | ann_file=data_root + 'nuscenes_infos_val.pkl', 133 | pipeline=test_pipeline, 134 | classes=class_names, 135 | modality=input_modality, 136 | test_mode=True, 137 | box_type_3d='LiDAR')) 138 | # For nuScenes dataset, we usually evaluate the model at the end of training. 139 | # Since the models are trained by 24 epochs by default, we set evaluation 140 | # interval to be 24. Please change the interval accordingly if you do not 141 | # use a default schedule. 142 | evaluation = dict(interval=24, pipeline=eval_pipeline) 143 | -------------------------------------------------------------------------------- /tools/data_converter/indoor_converter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | import numpy as np 4 | import os 5 | import sys 6 | sys.path.insert(0, '/ceph-jd/prod/jupyter/bixiao/notebooks/Workspace/Codes/CV/BEVFormer-master/') 7 | print(sys.path) 8 | import tools 9 | print(tools) 10 | from tools.data_converter.s3dis_data_utils import S3DISData, S3DISSegData 11 | from tools.data_converter.scannet_data_utils import ScanNetData, ScanNetSegData 12 | from tools.data_converter.sunrgbd_data_utils import SUNRGBDData 13 | 14 | 15 | def create_indoor_info_file(data_path, 16 | pkl_prefix='sunrgbd', 17 | save_path=None, 18 | use_v1=False, 19 | workers=4): 20 | """Create indoor information file. 21 | 22 | Get information of the raw data and save it to the pkl file. 23 | 24 | Args: 25 | data_path (str): Path of the data. 26 | pkl_prefix (str): Prefix of the pkl to be saved. Default: 'sunrgbd'. 27 | save_path (str): Path of the pkl to be saved. Default: None. 28 | use_v1 (bool): Whether to use v1. Default: False. 29 | workers (int): Number of threads to be used. Default: 4. 30 | """ 31 | assert os.path.exists(data_path) 32 | assert pkl_prefix in ['sunrgbd', 'scannet', 's3dis'], \ 33 | f'unsupported indoor dataset {pkl_prefix}' 34 | save_path = data_path if save_path is None else save_path 35 | assert os.path.exists(save_path) 36 | 37 | # generate infos for both detection and segmentation task 38 | if pkl_prefix in ['sunrgbd', 'scannet']: 39 | train_filename = os.path.join(save_path, 40 | f'{pkl_prefix}_infos_train.pkl') 41 | val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl') 42 | if pkl_prefix == 'sunrgbd': 43 | # SUN RGB-D has a train-val split 44 | train_dataset = SUNRGBDData( 45 | root_path=data_path, split='train', use_v1=use_v1) 46 | val_dataset = SUNRGBDData( 47 | root_path=data_path, split='val', use_v1=use_v1) 48 | else: 49 | # ScanNet has a train-val-test split 50 | train_dataset = ScanNetData(root_path=data_path, split='train') 51 | val_dataset = ScanNetData(root_path=data_path, split='val') 52 | test_dataset = ScanNetData(root_path=data_path, split='test') 53 | test_filename = os.path.join(save_path, 54 | f'{pkl_prefix}_infos_test.pkl') 55 | 56 | infos_train = train_dataset.get_infos( 57 | num_workers=workers, has_label=True) 58 | mmcv.dump(infos_train, train_filename, 'pkl') 59 | print(f'{pkl_prefix} info train file is saved to {train_filename}') 60 | 61 | infos_val = val_dataset.get_infos(num_workers=workers, has_label=True) 62 | mmcv.dump(infos_val, val_filename, 'pkl') 63 | print(f'{pkl_prefix} info val file is saved to {val_filename}') 64 | 65 | if pkl_prefix == 'scannet': 66 | infos_test = test_dataset.get_infos( 67 | num_workers=workers, has_label=False) 68 | mmcv.dump(infos_test, test_filename, 'pkl') 69 | print(f'{pkl_prefix} info test file is saved to {test_filename}') 70 | 71 | # generate infos for the semantic segmentation task 72 | # e.g. re-sampled scene indexes and label weights 73 | # scene indexes are used to re-sample rooms with different number of points 74 | # label weights are used to balance classes with different number of points 75 | if pkl_prefix == 'scannet': 76 | # label weight computation function is adopted from 77 | # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24 78 | train_dataset = ScanNetSegData( 79 | data_root=data_path, 80 | ann_file=train_filename, 81 | split='train', 82 | num_points=8192, 83 | label_weight_func=lambda x: 1.0 / np.log(1.2 + x)) 84 | # TODO: do we need to generate on val set? 85 | val_dataset = ScanNetSegData( 86 | data_root=data_path, 87 | ann_file=val_filename, 88 | split='val', 89 | num_points=8192, 90 | label_weight_func=lambda x: 1.0 / np.log(1.2 + x)) 91 | # no need to generate for test set 92 | train_dataset.get_seg_infos() 93 | val_dataset.get_seg_infos() 94 | elif pkl_prefix == 's3dis': 95 | # S3DIS doesn't have a fixed train-val split 96 | # it has 6 areas instead, so we generate info file for each of them 97 | # in training, we will use dataset to wrap different areas 98 | splits = [f'Area_{i}' for i in [1, 2, 3, 4, 5, 6]] 99 | for split in splits: 100 | dataset = S3DISData(root_path=data_path, split=split) 101 | info = dataset.get_infos(num_workers=workers, has_label=True) 102 | filename = os.path.join(save_path, 103 | f'{pkl_prefix}_infos_{split}.pkl') 104 | mmcv.dump(info, filename, 'pkl') 105 | print(f'{pkl_prefix} info {split} file is saved to {filename}') 106 | seg_dataset = S3DISSegData( 107 | data_root=data_path, 108 | ann_file=filename, 109 | split=split, 110 | num_points=4096, 111 | label_weight_func=lambda x: 1.0 / np.log(1.2 + x)) 112 | seg_dataset.get_seg_infos() 113 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/opt/adamw.py: -------------------------------------------------------------------------------- 1 | try: 2 | from torch.optim import _functional as F 3 | except: 4 | print('WARNING!!!, I recommend using torch>=1.8') 5 | 6 | import torch 7 | from torch.optim.optimizer import Optimizer 8 | from mmcv.runner.optimizer.builder import OPTIMIZERS 9 | 10 | @OPTIMIZERS.register_module() 11 | class AdamW2(Optimizer): 12 | r"""Implements AdamW algorithm. Solve the bug of torch 1.8 13 | 14 | The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_. 15 | The AdamW variant was proposed in `Decoupled Weight Decay Regularization`_. 16 | 17 | Args: 18 | params (iterable): iterable of parameters to optimize or dicts defining 19 | parameter groups 20 | lr (float, optional): learning rate (default: 1e-3) 21 | betas (Tuple[float, float], optional): coefficients used for computing 22 | running averages of gradient and its square (default: (0.9, 0.999)) 23 | eps (float, optional): term added to the denominator to improve 24 | numerical stability (default: 1e-8) 25 | weight_decay (float, optional): weight decay coefficient (default: 1e-2) 26 | amsgrad (boolean, optional): whether to use the AMSGrad variant of this 27 | algorithm from the paper `On the Convergence of Adam and Beyond`_ 28 | (default: False) 29 | 30 | .. _Adam\: A Method for Stochastic Optimization: 31 | https://arxiv.org/abs/1412.6980 32 | .. _Decoupled Weight Decay Regularization: 33 | https://arxiv.org/abs/1711.05101 34 | .. _On the Convergence of Adam and Beyond: 35 | https://openreview.net/forum?id=ryQu7f-RZ 36 | """ 37 | 38 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, 39 | weight_decay=1e-2, amsgrad=False): 40 | if not 0.0 <= lr: 41 | raise ValueError("Invalid learning rate: {}".format(lr)) 42 | if not 0.0 <= eps: 43 | raise ValueError("Invalid epsilon value: {}".format(eps)) 44 | if not 0.0 <= betas[0] < 1.0: 45 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0])) 46 | if not 0.0 <= betas[1] < 1.0: 47 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1])) 48 | if not 0.0 <= weight_decay: 49 | raise ValueError("Invalid weight_decay value: {}".format(weight_decay)) 50 | defaults = dict(lr=lr, betas=betas, eps=eps, 51 | weight_decay=weight_decay, amsgrad=amsgrad) 52 | super(AdamW2, self).__init__(params, defaults) 53 | 54 | def __setstate__(self, state): 55 | super(AdamW2, self).__setstate__(state) 56 | for group in self.param_groups: 57 | group.setdefault('amsgrad', False) 58 | 59 | @torch.no_grad() 60 | def step(self, closure=None): 61 | """Performs a single optimization step. 62 | 63 | Args: 64 | closure (callable, optional): A closure that reevaluates the model 65 | and returns the loss. 66 | """ 67 | loss = None 68 | if closure is not None: 69 | with torch.enable_grad(): 70 | loss = closure() 71 | 72 | for group in self.param_groups: 73 | params_with_grad = [] 74 | grads = [] 75 | exp_avgs = [] 76 | exp_avg_sqs = [] 77 | state_sums = [] 78 | max_exp_avg_sqs = [] 79 | state_steps = [] 80 | amsgrad = group['amsgrad'] 81 | 82 | # put this line here for solving bug 83 | beta1, beta2 = group['betas'] 84 | 85 | for p in group['params']: 86 | if p.grad is None: 87 | continue 88 | params_with_grad.append(p) 89 | if p.grad.is_sparse: 90 | raise RuntimeError('AdamW does not support sparse gradients') 91 | grads.append(p.grad) 92 | 93 | state = self.state[p] 94 | 95 | # State initialization 96 | if len(state) == 0: 97 | state['step'] = 0 98 | # Exponential moving average of gradient values 99 | state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format) 100 | # Exponential moving average of squared gradient values 101 | state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) 102 | if amsgrad: 103 | # Maintains max of all exp. moving avg. of sq. grad. values 104 | state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format) 105 | 106 | exp_avgs.append(state['exp_avg']) 107 | exp_avg_sqs.append(state['exp_avg_sq']) 108 | 109 | if amsgrad: 110 | max_exp_avg_sqs.append(state['max_exp_avg_sq']) 111 | 112 | 113 | # update the steps for each param group update 114 | state['step'] += 1 115 | # record the step after step update 116 | state_steps.append(state['step']) 117 | 118 | F.adamw(params_with_grad, 119 | grads, 120 | exp_avgs, 121 | exp_avg_sqs, 122 | max_exp_avg_sqs, 123 | state_steps, 124 | amsgrad, 125 | beta1, 126 | beta2, 127 | group['lr'], 128 | group['weight_decay'], 129 | group['eps']) 130 | 131 | return loss -------------------------------------------------------------------------------- /tools/model_converters/convert_votenet_checkpoints.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import tempfile 4 | import torch 5 | from mmcv import Config 6 | from mmcv.runner import load_state_dict 7 | 8 | from mmdet3d.models import build_detector 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='MMDet3D upgrade model version(before v0.6.0) of VoteNet') 14 | parser.add_argument('checkpoint', help='checkpoint file') 15 | parser.add_argument('--out', help='path of the output checkpoint file') 16 | args = parser.parse_args() 17 | return args 18 | 19 | 20 | def parse_config(config_strings): 21 | """Parse config from strings. 22 | 23 | Args: 24 | config_strings (string): strings of model config. 25 | 26 | Returns: 27 | Config: model config 28 | """ 29 | temp_file = tempfile.NamedTemporaryFile() 30 | config_path = f'{temp_file.name}.py' 31 | with open(config_path, 'w') as f: 32 | f.write(config_strings) 33 | 34 | config = Config.fromfile(config_path) 35 | 36 | # Update backbone config 37 | if 'pool_mod' in config.model.backbone: 38 | config.model.backbone.pop('pool_mod') 39 | 40 | if 'sa_cfg' not in config.model.backbone: 41 | config.model.backbone['sa_cfg'] = dict( 42 | type='PointSAModule', 43 | pool_mod='max', 44 | use_xyz=True, 45 | normalize_xyz=True) 46 | 47 | if 'type' not in config.model.bbox_head.vote_aggregation_cfg: 48 | config.model.bbox_head.vote_aggregation_cfg['type'] = 'PointSAModule' 49 | 50 | # Update bbox_head config 51 | if 'pred_layer_cfg' not in config.model.bbox_head: 52 | config.model.bbox_head['pred_layer_cfg'] = dict( 53 | in_channels=128, shared_conv_channels=(128, 128), bias=True) 54 | 55 | if 'feat_channels' in config.model.bbox_head: 56 | config.model.bbox_head.pop('feat_channels') 57 | 58 | if 'vote_moudule_cfg' in config.model.bbox_head: 59 | config.model.bbox_head['vote_module_cfg'] = config.model.bbox_head.pop( 60 | 'vote_moudule_cfg') 61 | 62 | if config.model.bbox_head.vote_aggregation_cfg.use_xyz: 63 | config.model.bbox_head.vote_aggregation_cfg.mlp_channels[0] -= 3 64 | 65 | temp_file.close() 66 | 67 | return config 68 | 69 | 70 | def main(): 71 | """Convert keys in checkpoints for VoteNet. 72 | 73 | There can be some breaking changes during the development of mmdetection3d, 74 | and this tool is used for upgrading checkpoints trained with old versions 75 | (before v0.6.0) to the latest one. 76 | """ 77 | args = parse_args() 78 | checkpoint = torch.load(args.checkpoint) 79 | cfg = parse_config(checkpoint['meta']['config']) 80 | # Build the model and load checkpoint 81 | model = build_detector( 82 | cfg.model, 83 | train_cfg=cfg.get('train_cfg'), 84 | test_cfg=cfg.get('test_cfg')) 85 | orig_ckpt = checkpoint['state_dict'] 86 | converted_ckpt = orig_ckpt.copy() 87 | 88 | if cfg['dataset_type'] == 'ScanNetDataset': 89 | NUM_CLASSES = 18 90 | elif cfg['dataset_type'] == 'SUNRGBDDataset': 91 | NUM_CLASSES = 10 92 | else: 93 | raise NotImplementedError 94 | 95 | RENAME_PREFIX = { 96 | 'bbox_head.conv_pred.0': 'bbox_head.conv_pred.shared_convs.layer0', 97 | 'bbox_head.conv_pred.1': 'bbox_head.conv_pred.shared_convs.layer1' 98 | } 99 | 100 | DEL_KEYS = [ 101 | 'bbox_head.conv_pred.0.bn.num_batches_tracked', 102 | 'bbox_head.conv_pred.1.bn.num_batches_tracked' 103 | ] 104 | 105 | EXTRACT_KEYS = { 106 | 'bbox_head.conv_pred.conv_cls.weight': 107 | ('bbox_head.conv_pred.conv_out.weight', [(0, 2), (-NUM_CLASSES, -1)]), 108 | 'bbox_head.conv_pred.conv_cls.bias': 109 | ('bbox_head.conv_pred.conv_out.bias', [(0, 2), (-NUM_CLASSES, -1)]), 110 | 'bbox_head.conv_pred.conv_reg.weight': 111 | ('bbox_head.conv_pred.conv_out.weight', [(2, -NUM_CLASSES)]), 112 | 'bbox_head.conv_pred.conv_reg.bias': 113 | ('bbox_head.conv_pred.conv_out.bias', [(2, -NUM_CLASSES)]) 114 | } 115 | 116 | # Delete some useless keys 117 | for key in DEL_KEYS: 118 | converted_ckpt.pop(key) 119 | 120 | # Rename keys with specific prefix 121 | RENAME_KEYS = dict() 122 | for old_key in converted_ckpt.keys(): 123 | for rename_prefix in RENAME_PREFIX.keys(): 124 | if rename_prefix in old_key: 125 | new_key = old_key.replace(rename_prefix, 126 | RENAME_PREFIX[rename_prefix]) 127 | RENAME_KEYS[new_key] = old_key 128 | for new_key, old_key in RENAME_KEYS.items(): 129 | converted_ckpt[new_key] = converted_ckpt.pop(old_key) 130 | 131 | # Extract weights and rename the keys 132 | for new_key, (old_key, indices) in EXTRACT_KEYS.items(): 133 | cur_layers = orig_ckpt[old_key] 134 | converted_layers = [] 135 | for (start, end) in indices: 136 | if end != -1: 137 | converted_layers.append(cur_layers[start:end]) 138 | else: 139 | converted_layers.append(cur_layers[start:]) 140 | converted_layers = torch.cat(converted_layers, 0) 141 | converted_ckpt[new_key] = converted_layers 142 | if old_key in converted_ckpt.keys(): 143 | converted_ckpt.pop(old_key) 144 | 145 | # Check the converted checkpoint by loading to the model 146 | load_state_dict(model, converted_ckpt, strict=True) 147 | checkpoint['state_dict'] = converted_ckpt 148 | torch.save(checkpoint, args.out) 149 | 150 | 151 | if __name__ == '__main__': 152 | main() 153 | --------------------------------------------------------------------------------