├── tools
├── __init__.py
├── analysis_tools
│ ├── __init__.py
│ ├── get_params.py
│ └── benchmark.py
├── data_converter
│ ├── __init__.py
│ ├── lyft_data_fixer.py
│ └── indoor_converter.py
├── dist_train.sh
├── dist_test.sh
├── misc
│ ├── print_config.py
│ ├── visualize_results.py
│ └── fuse_conv_bn.py
└── model_converters
│ ├── publish_model.py
│ ├── regnet2mmdet.py
│ └── convert_votenet_checkpoints.py
├── projects
├── __init__.py
├── mmdet3d_plugin
│ ├── models
│ │ ├── opt
│ │ │ ├── __init__.py
│ │ │ └── adamw.py
│ │ ├── hooks
│ │ │ ├── __init__.py
│ │ │ └── hooks.py
│ │ ├── backbones
│ │ │ └── __init__.py
│ │ └── utils
│ │ │ ├── __init__.py
│ │ │ ├── bricks.py
│ │ │ ├── visual.py
│ │ │ ├── position_embedding.py
│ │ │ └── grid_mask.py
│ ├── bevformer
│ │ ├── detectors
│ │ │ └── __init__.py
│ │ ├── dense_heads
│ │ │ └── __init__.py
│ │ ├── __init__.py
│ │ ├── apis
│ │ │ ├── __init__.py
│ │ │ └── train.py
│ │ └── modules
│ │ │ └── __init__.py
│ ├── core
│ │ ├── evaluation
│ │ │ ├── __init__.py
│ │ │ └── eval_hooks.py
│ │ └── bbox
│ │ │ ├── coders
│ │ │ ├── __init__.py
│ │ │ └── nms_free_coder.py
│ │ │ ├── assigners
│ │ │ └── __init__.py
│ │ │ ├── match_costs
│ │ │ ├── __init__.py
│ │ │ └── match_cost.py
│ │ │ └── util.py
│ ├── datasets
│ │ ├── __init__.py
│ │ ├── samplers
│ │ │ ├── __init__.py
│ │ │ ├── sampler.py
│ │ │ ├── distributed_sampler.py
│ │ │ └── group_sampler.py
│ │ └── pipelines
│ │ │ ├── __init__.py
│ │ │ └── formating.py
│ └── __init__.py
└── configs
│ ├── _base_
│ ├── models
│ │ ├── paconv_cuda_ssg.py
│ │ ├── hv_pointpillars_fpn_lyft.py
│ │ ├── hv_pointpillars_fpn_range100_lyft.py
│ │ ├── pointnet2_msg.py
│ │ ├── pointnet2_ssg.py
│ │ ├── paconv_ssg.py
│ │ ├── fcos3d.py
│ │ ├── votenet.py
│ │ ├── groupfree3d.py
│ │ ├── hv_second_secfpn_kitti.py
│ │ ├── 3dssd.py
│ │ ├── hv_pointpillars_secfpn_kitti.py
│ │ ├── centerpoint_02pillar_second_secfpn_nus.py
│ │ ├── centerpoint_01voxel_second_secfpn_nus.py
│ │ ├── hv_pointpillars_fpn_nus.py
│ │ ├── hv_second_secfpn_waymo.py
│ │ ├── imvotenet_image.py
│ │ ├── hv_pointpillars_secfpn_waymo.py
│ │ └── mask_rcnn_r50_fpn.py
│ ├── schedules
│ │ ├── mmdet_schedule_1x.py
│ │ ├── seg_cosine_200e.py
│ │ ├── seg_cosine_50e.py
│ │ ├── seg_cosine_150e.py
│ │ ├── schedule_3x.py
│ │ ├── schedule_2x.py
│ │ ├── cosine.py
│ │ ├── cyclic_20e.py
│ │ └── cyclic_40e.py
│ ├── default_runtime.py
│ └── datasets
│ │ ├── coco_instance.py
│ │ ├── nuim_instance.py
│ │ ├── nus-mono3d.py
│ │ ├── sunrgbd-3d-10class.py
│ │ ├── s3dis-3d-5class.py
│ │ ├── scannet-3d-18class.py
│ │ ├── scannet_seg-3d-20class.py
│ │ ├── s3dis_seg-3d-13class.py
│ │ ├── kitti-3d-car.py
│ │ ├── lyft-3d.py
│ │ ├── range100_lyft-3d.py
│ │ ├── kitti-3d-3class.py
│ │ ├── waymoD5-3d-car.py
│ │ ├── waymoD5-3d-3class.py
│ │ └── nus-3d.py
│ └── datasets
│ ├── custom_waymo-3d.py
│ ├── custom_lyft-3d.py
│ └── custom_nus-3d.py
├── figs
├── arch.png
└── sota_results.png
├── docs
├── getting_started.md
├── prepare_dataset.md
└── install.md
├── README.md
└── hf_guide.md
/tools/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/projects/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tools/analysis_tools/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/opt/__init__.py:
--------------------------------------------------------------------------------
1 | from .adamw import AdamW2
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | from .hooks import GradChecker
--------------------------------------------------------------------------------
/figs/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAiLab/BEVFormer/HEAD/figs/arch.png
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .bevformer import BEVFormer
--------------------------------------------------------------------------------
/tools/data_converter/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .eval_hooks import CustomDistEvalHook
--------------------------------------------------------------------------------
/figs/sota_results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HFAiLab/BEVFormer/HEAD/figs/sota_results.png
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bevformer_head import BEVFormerHead
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .vovnet import VoVNet
2 |
3 | __all__ = ['VoVNet']
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_free_coder import NMSFreeCoder
2 |
3 | __all__ = ['NMSFreeCoder']
4 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .dense_heads import *
3 | from .detectors import *
4 | from .modules import *
5 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .hungarian_assigner_3d import HungarianAssigner3D
2 |
3 | __all__ = ['HungarianAssigner3D']
4 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .train import custom_train_model
2 | from .mmdet_train import custom_train_detector
3 | # from .test import custom_multi_gpu_test
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .nuscenes_dataset import CustomNuScenesDataset
2 | from .builder import custom_build_dataset
3 |
4 | __all__ = [
5 | 'CustomNuScenesDataset'
6 | ]
7 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.core.bbox.match_costs import build_match_cost
2 | from .match_cost import BBox3DL1Cost
3 |
4 | __all__ = ['build_match_cost', 'BBox3DL1Cost']
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .bricks import run_time
3 | from .grid_mask import GridMask
4 | from .position_embedding import RelPositionEmbedding
5 | from .visual import save_tensor
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .group_sampler import DistributedGroupSampler
2 | from .distributed_sampler import DistributedSampler
3 | from .sampler import SAMPLER, build_sampler
4 |
5 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/paconv_cuda_ssg.py:
--------------------------------------------------------------------------------
1 | _base_ = './paconv_ssg.py'
2 |
3 | model = dict(
4 | backbone=dict(
5 | sa_cfg=dict(
6 | type='PAConvCUDASAModule',
7 | scorenet_cfg=dict(mlp_channels=[8, 16, 16]))))
8 |
--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CONFIG=$1
4 | GPUS=$2
5 | PORT=${PORT:-28509}
6 |
7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
8 | python \
9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic
10 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/samplers/sampler.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils.registry import Registry, build_from_cfg
2 |
3 | SAMPLER = Registry('sampler')
4 |
5 |
6 | def build_sampler(cfg, default_args):
7 | return build_from_cfg(cfg, SAMPLER, default_args)
8 |
--------------------------------------------------------------------------------
/tools/analysis_tools/get_params.py:
--------------------------------------------------------------------------------
1 | import torch
2 | file_path = './ckpts/bevformer_v4.pth'
3 | model = torch.load(file_path, map_location='cpu')
4 | all = 0
5 | for key in list(model['state_dict'].keys()):
6 | all += model['state_dict'][key].nelement()
7 | print(all)
8 |
9 | # smaller 63374123
10 | # v4 69140395
11 |
--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CONFIG=$1
4 | CHECKPOINT=$2
5 | GPUS=$3
6 | PORT=${PORT:-29503}
7 |
8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --eval bbox
11 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .transformer import PerceptionTransformer
2 | from .spatial_cross_attention import SpatialCrossAttention, MSDeformableAttention3D
3 | from .temporal_self_attention import TemporalSelfAttention
4 | from .encoder import BEVFormerEncoder, BEVFormerLayer
5 | from .decoder import DetectionTransformerDecoder
6 |
7 |
--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/mmdet_schedule_1x.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(
6 | policy='step',
7 | warmup='linear',
8 | warmup_iters=500,
9 | warmup_ratio=0.001,
10 | step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 |
--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/seg_cosine_200e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | # This schedule is mainly used on ScanNet dataset in segmentation task
3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)
4 | optimizer_config = dict(grad_clip=None)
5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
6 | momentum_config = None
7 |
8 | # runtime settings
9 | runner = dict(type='EpochBasedRunner', max_epochs=200)
10 |
--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/seg_cosine_50e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | # This schedule is mainly used on S3DIS dataset in segmentation task
3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)
4 | optimizer_config = dict(grad_clip=None)
5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
6 | momentum_config = None
7 |
8 | # runtime settings
9 | runner = dict(type='EpochBasedRunner', max_epochs=50)
10 |
--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/seg_cosine_150e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | # This schedule is mainly used on S3DIS dataset in segmentation task
3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)
4 | optimizer_config = dict(grad_clip=None)
5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)
6 | momentum_config = None
7 |
8 | # runtime settings
9 | runner = dict(type='EpochBasedRunner', max_epochs=150)
10 |
--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/schedule_3x.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | # This schedule is mainly used by models on indoor dataset,
3 | # e.g., VoteNet on SUNRGBD and ScanNet
4 | lr = 0.008 # max learning rate
5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
7 | lr_config = dict(policy='step', warmup=None, step=[24, 32])
8 | # runtime settings
9 | runner = dict(type='EpochBasedRunner', max_epochs=36)
10 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/hooks/hooks.py:
--------------------------------------------------------------------------------
1 | from mmcv.runner.hooks.hook import HOOKS, Hook
2 | from projects.mmdet3d_plugin.models.utils import run_time
3 |
4 |
5 | @HOOKS.register_module()
6 | class GradChecker(Hook):
7 |
8 | def after_train_iter(self, runner):
9 | for key, val in runner.model.named_parameters():
10 | if val.grad == None and val.requires_grad:
11 | print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key))
12 |
13 |
14 |
--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | # This schedule is mainly used by models on nuScenes dataset
3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
4 | # max_norm=10 is better for SECOND
5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
6 | lr_config = dict(
7 | policy='step',
8 | warmup='linear',
9 | warmup_iters=1000,
10 | warmup_ratio=1.0 / 1000,
11 | step=[20, 23])
12 | momentum_config = None
13 | # runtime settings
14 | runner = dict(type='EpochBasedRunner', max_epochs=24)
15 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/__init__.py:
--------------------------------------------------------------------------------
1 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D
2 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder
3 | from .core.bbox.match_costs import BBox3DL1Cost
4 | from .core.evaluation.eval_hooks import CustomDistEvalHook
5 | from .datasets.pipelines import (
6 | PhotoMetricDistortionMultiViewImage, PadMultiViewImage,
7 | NormalizeMultiviewImage, CustomCollect3D)
8 | from .models.backbones.vovnet import VoVNet
9 | from .models.utils import *
10 | from .models.opt.adamw import AdamW2
11 | from .bevformer import *
12 |
--------------------------------------------------------------------------------
/projects/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
1 | checkpoint_config = dict(interval=1)
2 | # yapf:disable push
3 | # By default we use textlogger hook and tensorboard
4 | # For more loggers see
5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook
6 | log_config = dict(
7 | interval=50,
8 | hooks=[
9 | dict(type='TextLoggerHook'),
10 | dict(type='TensorboardLoggerHook')
11 | ])
12 | # yapf:enable
13 | dist_params = dict(backend='nccl')
14 | log_level = 'INFO'
15 | work_dir = None
16 | load_from = None
17 | resume_from = None
18 | workflow = [('train', 1)]
19 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | from .transform_3d import (
2 | PadMultiViewImage, NormalizeMultiviewImage,
3 | PhotoMetricDistortionMultiViewImage, CustomCollect3D, RandomScaleImageMultiViewImage)
4 | from .formating import CustomDefaultFormatBundle3D
5 | from .loading import FFrecordClient, LoadMultiViewImageFromFilesHF
6 | __all__ = [
7 | 'PadMultiViewImage', 'NormalizeMultiviewImage',
8 | 'PhotoMetricDistortionMultiViewImage', 'CustomDefaultFormatBundle3D', 'CustomCollect3D',
9 | 'RandomScaleImageMultiViewImage', 'FFrecordClient', 'LoadMultiViewImageFromFilesHF'
10 | ]
--------------------------------------------------------------------------------
/docs/getting_started.md:
--------------------------------------------------------------------------------
1 | # Prerequisites
2 |
3 | **Please ensure you have prepared the environment and the nuScenes dataset.**
4 |
5 | # Train and Test
6 |
7 | Train BEVFormer with 8 GPUs
8 | ```
9 | ./tools/dist_train.sh ./projects/configs/bevformer/bevformer_base.py 8
10 | ```
11 |
12 | Eval BEVFormer with 8 GPUs
13 | ```
14 | ./tools/dist_test.sh ./projects/configs/bevformer/bevformer_base.py ./path/to/ckpts.pth 8
15 | ```
16 | Note: using 1 GPU to eval can obtain slightly higher performance because continuous video may be truncated with multiple GPUs. By default we report the score evaled with 8 GPUs.
17 |
18 |
19 |
--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/cosine.py:
--------------------------------------------------------------------------------
1 | # This schedule is mainly used by models with dynamic voxelization
2 | # optimizer
3 | lr = 0.003 # max learning rate
4 | optimizer = dict(
5 | type='AdamW',
6 | lr=lr,
7 | betas=(0.95, 0.99), # the momentum is change during training
8 | weight_decay=0.001)
9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
10 |
11 | lr_config = dict(
12 | policy='CosineAnnealing',
13 | warmup='linear',
14 | warmup_iters=1000,
15 | warmup_ratio=1.0 / 10,
16 | min_lr_ratio=1e-5)
17 |
18 | momentum_config = None
19 |
20 | runner = dict(type='EpochBasedRunner', max_epochs=40)
21 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/bricks.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import time
3 | from collections import defaultdict
4 | time_maps = defaultdict(lambda :0.)
5 | count_maps = defaultdict(lambda :0.)
6 | def run_time(name):
7 | def middle(fn):
8 | def wrapper(*args, **kwargs):
9 | start = time.time()
10 | res = fn(*args, **kwargs)
11 | time_maps['%s : %s'%(name, fn.__name__) ] += time.time()-start
12 | count_maps['%s : %s'%(name, fn.__name__) ] +=1
13 | print("%s : %s takes up %f "% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] ))
14 | return res
15 | return wrapper
16 | return middle
17 |
--------------------------------------------------------------------------------
/tools/misc/print_config.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | from mmcv import Config, DictAction
4 |
5 |
6 | def parse_args():
7 | parser = argparse.ArgumentParser(description='Print the whole config')
8 | parser.add_argument('config', help='config file path')
9 | parser.add_argument(
10 | '--options', nargs='+', action=DictAction, help='arguments in dict')
11 | args = parser.parse_args()
12 |
13 | return args
14 |
15 |
16 | def main():
17 | args = parse_args()
18 |
19 | cfg = Config.fromfile(args.config)
20 | if args.options is not None:
21 | cfg.merge_from_dict(args.options)
22 | print(f'Config:\n{cfg.pretty_text}')
23 |
24 |
25 | if __name__ == '__main__':
26 | main()
27 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/visual.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchvision.utils import make_grid
3 | import torchvision
4 | import matplotlib.pyplot as plt
5 | import cv2
6 |
7 |
8 | def convert_color(img_path):
9 | plt.figure()
10 | img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
11 | plt.imsave(img_path, img, cmap=plt.get_cmap('viridis'))
12 | plt.close()
13 |
14 |
15 | def save_tensor(tensor, path, pad_value=254.0,):
16 | print('save_tensor', path)
17 | tensor = tensor.to(torch.float).detach().cpu()
18 | if tensor.type() == 'torch.BoolTensor':
19 | tensor = tensor*255
20 | if len(tensor.shape) == 3:
21 | tensor = tensor.unsqueeze(1)
22 | tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy()
23 | torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path)
24 | convert_color(path)
25 |
--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/cyclic_20e.py:
--------------------------------------------------------------------------------
1 | # For nuScenes dataset, we usually evaluate the model at the end of training.
2 | # Since the models are trained by 24 epochs by default, we set evaluation
3 | # interval to be 20. Please change the interval accordingly if you do not
4 | # use a default schedule.
5 | # optimizer
6 | # This schedule is mainly used by models on nuScenes dataset
7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)
8 | # max_norm=10 is better for SECOND
9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
10 | lr_config = dict(
11 | policy='cyclic',
12 | target_ratio=(10, 1e-4),
13 | cyclic_times=1,
14 | step_ratio_up=0.4,
15 | )
16 | momentum_config = dict(
17 | policy='cyclic',
18 | target_ratio=(0.85 / 0.95, 1),
19 | cyclic_times=1,
20 | step_ratio_up=0.4,
21 | )
22 |
23 | # runtime settings
24 | runner = dict(type='EpochBasedRunner', max_epochs=20)
25 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST
3 |
4 |
5 | @MATCH_COST.register_module()
6 | class BBox3DL1Cost(object):
7 | """BBox3DL1Cost.
8 | Args:
9 | weight (int | float, optional): loss_weight
10 | """
11 |
12 | def __init__(self, weight=1.):
13 | self.weight = weight
14 |
15 | def __call__(self, bbox_pred, gt_bboxes):
16 | """
17 | Args:
18 | bbox_pred (Tensor): Predicted boxes with normalized coordinates
19 | (cx, cy, w, h), which are all in range [0, 1]. Shape
20 | [num_query, 4].
21 | gt_bboxes (Tensor): Ground truth boxes with normalized
22 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
23 | Returns:
24 | torch.Tensor: bbox_cost value with weight
25 | """
26 | bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
27 | return bbox_cost * self.weight
--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py:
--------------------------------------------------------------------------------
1 | _base_ = './hv_pointpillars_fpn_nus.py'
2 |
3 | # model settings (based on nuScenes model settings)
4 | # Voxel size for voxel encoder
5 | # Usually voxel size is changed consistently with the point cloud range
6 | # If point cloud range is modified, do remember to change all related
7 | # keys in the config.
8 | model = dict(
9 | pts_voxel_layer=dict(
10 | max_num_points=20,
11 | point_cloud_range=[-80, -80, -5, 80, 80, 3],
12 | max_voxels=(60000, 60000)),
13 | pts_voxel_encoder=dict(
14 | feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),
15 | pts_middle_encoder=dict(output_shape=[640, 640]),
16 | pts_bbox_head=dict(
17 | num_classes=9,
18 | anchor_generator=dict(
19 | ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),
20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 | # model training settings (based on nuScenes model settings)
22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py:
--------------------------------------------------------------------------------
1 | _base_ = './hv_pointpillars_fpn_nus.py'
2 |
3 | # model settings (based on nuScenes model settings)
4 | # Voxel size for voxel encoder
5 | # Usually voxel size is changed consistently with the point cloud range
6 | # If point cloud range is modified, do remember to change all related
7 | # keys in the config.
8 | model = dict(
9 | pts_voxel_layer=dict(
10 | max_num_points=20,
11 | point_cloud_range=[-100, -100, -5, 100, 100, 3],
12 | max_voxels=(60000, 60000)),
13 | pts_voxel_encoder=dict(
14 | feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),
15 | pts_middle_encoder=dict(output_shape=[800, 800]),
16 | pts_bbox_head=dict(
17 | num_classes=9,
18 | anchor_generator=dict(
19 | ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),
20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 | # model training settings (based on nuScenes model settings)
22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 |
--------------------------------------------------------------------------------
/docs/prepare_dataset.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ## NuScenes
4 | Download nuScenes V1.0 full dataset data and CAN bus expansion data [HERE](https://www.nuscenes.org/download). Prepare nuscenes data by running
5 |
6 |
7 | **Download CAN bus expansion**
8 | ```
9 | # download 'can_bus.zip'
10 | unzip can_bus.zip
11 | # move can_bus to data dir
12 | ```
13 |
14 | **Prepare nuScenes data**
15 |
16 | *We genetate custom annotation files which are different from mmdet3d's*
17 | ```
18 | python tools/create_data.py nuscenes --root-path ./data/nuscenes --out-dir ./data/nuscenes --extra-tag nuscenes --version v1.0 --canbus ./data
19 | ```
20 |
21 | Using the above code will generate `nuscenes_infos_temporal_{train,val}.pkl`.
22 |
23 | **Folder structure**
24 | ```
25 | bevformer
26 | ├── projects/
27 | ├── tools/
28 | ├── configs/
29 | ├── ckpts/
30 | │ ├── r101_dcn_fcos3d_pretrain.pth
31 | ├── data/
32 | │ ├── can_bus/
33 | │ ├── nuscenes/
34 | │ │ ├── maps/
35 | │ │ ├── samples/
36 | │ │ ├── sweeps/
37 | │ │ ├── v1.0-test/
38 | | | ├── v1.0-trainval/
39 | | | ├── nuscenes_infos_temporal_train.pkl
40 | | | ├── nuscenes_infos_temporal_val.pkl
41 | ```
42 |
--------------------------------------------------------------------------------
/tools/model_converters/publish_model.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import subprocess
4 | import torch
5 |
6 |
7 | def parse_args():
8 | parser = argparse.ArgumentParser(
9 | description='Process a checkpoint to be published')
10 | parser.add_argument('in_file', help='input checkpoint filename')
11 | parser.add_argument('out_file', help='output checkpoint filename')
12 | args = parser.parse_args()
13 | return args
14 |
15 |
16 | def process_checkpoint(in_file, out_file):
17 | checkpoint = torch.load(in_file, map_location='cpu')
18 | # remove optimizer for smaller file size
19 | if 'optimizer' in checkpoint:
20 | del checkpoint['optimizer']
21 | # if it is necessary to remove some sensitive data in checkpoint['meta'],
22 | # add the code here.
23 | torch.save(checkpoint, out_file)
24 | sha = subprocess.check_output(['sha256sum', out_file]).decode()
25 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
26 | subprocess.Popen(['mv', out_file, final_file])
27 |
28 |
29 | def main():
30 | args = parse_args()
31 | process_checkpoint(args.in_file, args.out_file)
32 |
33 |
34 | if __name__ == '__main__':
35 | main()
36 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/pointnet2_msg.py:
--------------------------------------------------------------------------------
1 | _base_ = './pointnet2_ssg.py'
2 |
3 | # model settings
4 | model = dict(
5 | backbone=dict(
6 | _delete_=True,
7 | type='PointNet2SAMSG',
8 | in_channels=6, # [xyz, rgb], should be modified with dataset
9 | num_points=(1024, 256, 64, 16),
10 | radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),
11 | num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
12 | sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
13 | 128)),
14 | ((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
15 | (256, 384, 512))),
16 | aggregation_channels=(None, None, None, None),
17 | fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
18 | fps_sample_range_lists=((-1), (-1), (-1), (-1)),
19 | dilated_group=(False, False, False, False),
20 | out_indices=(0, 1, 2, 3),
21 | sa_cfg=dict(
22 | type='PointSAModuleMSG',
23 | pool_mod='max',
24 | use_xyz=True,
25 | normalize_xyz=False)),
26 | decode_head=dict(
27 | fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),
28 | (128, 128, 128, 128))))
29 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/pointnet2_ssg.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='EncoderDecoder3D',
4 | backbone=dict(
5 | type='PointNet2SASSG',
6 | in_channels=6, # [xyz, rgb], should be modified with dataset
7 | num_points=(1024, 256, 64, 16),
8 | radius=(0.1, 0.2, 0.4, 0.8),
9 | num_samples=(32, 32, 32, 32),
10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 | 512)),
12 | fp_channels=(),
13 | norm_cfg=dict(type='BN2d'),
14 | sa_cfg=dict(
15 | type='PointSAModule',
16 | pool_mod='max',
17 | use_xyz=True,
18 | normalize_xyz=False)),
19 | decode_head=dict(
20 | type='PointNet2Head',
21 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
22 | (128, 128, 128, 128)),
23 | channels=128,
24 | dropout_ratio=0.5,
25 | conv_cfg=dict(type='Conv1d'),
26 | norm_cfg=dict(type='BN1d'),
27 | act_cfg=dict(type='ReLU'),
28 | loss_decode=dict(
29 | type='CrossEntropyLoss',
30 | use_sigmoid=False,
31 | class_weight=None, # should be modified with dataset
32 | loss_weight=1.0)),
33 | # model training and testing settings
34 | train_cfg=dict(),
35 | test_cfg=dict(mode='slide'))
36 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch
4 | from torch.utils.data import DistributedSampler as _DistributedSampler
5 | from .sampler import SAMPLER
6 |
7 |
8 | @SAMPLER.register_module()
9 | class DistributedSampler(_DistributedSampler):
10 |
11 | def __init__(self,
12 | dataset=None,
13 | num_replicas=None,
14 | rank=None,
15 | shuffle=True,
16 | seed=0):
17 | super().__init__(
18 | dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
19 | # for the compatibility from PyTorch 1.3+
20 | self.seed = seed if seed is not None else 0
21 |
22 | def __iter__(self):
23 | # deterministically shuffle based on epoch
24 | if self.shuffle:
25 | assert False
26 | else:
27 | indices = torch.arange(len(self.dataset)).tolist()
28 |
29 | # add extra samples to make it evenly divisible
30 | # in case that indices is shorter than half of total_size
31 | indices = (indices *
32 | math.ceil(self.total_size / len(indices)))[:self.total_size]
33 | assert len(indices) == self.total_size
34 |
35 | # subsample
36 | per_replicas = self.total_size//self.num_replicas
37 | # indices = indices[self.rank:self.total_size:self.num_replicas]
38 | indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas]
39 | assert len(indices) == self.num_samples
40 |
41 | return iter(indices)
42 |
--------------------------------------------------------------------------------
/tools/data_converter/lyft_data_fixer.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import numpy as np
4 | import os
5 |
6 |
7 | def fix_lyft(root_folder='./data/lyft', version='v1.01'):
8 | # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000 # noqa
9 | lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin'
10 | root_folder = os.path.join(root_folder, f'{version}-train')
11 | lidar_path = os.path.join(root_folder, lidar_path)
12 | assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \
13 | f'dataset and make sure {lidar_path} is present.'
14 | points = np.fromfile(lidar_path, dtype=np.float32, count=-1)
15 | try:
16 | points.reshape([-1, 5])
17 | print(f'This fix is not required for version {version}.')
18 | except ValueError:
19 | new_points = np.array(list(points) + [100.0, 1.0], dtype='float32')
20 | new_points.tofile(lidar_path)
21 | print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.')
22 |
23 |
24 | parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser')
25 | parser.add_argument(
26 | '--root-folder',
27 | type=str,
28 | default='./data/lyft',
29 | help='specify the root path of Lyft dataset')
30 | parser.add_argument(
31 | '--version',
32 | type=str,
33 | default='v1.01',
34 | help='specify Lyft dataset version')
35 | args = parser.parse_args()
36 |
37 | if __name__ == '__main__':
38 | fix_lyft(root_folder=args.root_folder, version=args.version)
39 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/position_embedding.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import math
4 |
5 | class RelPositionEmbedding(nn.Module):
6 | def __init__(self, num_pos_feats=64, pos_norm=True):
7 | super().__init__()
8 | self.num_pos_feats = num_pos_feats
9 | self.fc = nn.Linear(4, self.num_pos_feats,bias=False)
10 | #nn.init.orthogonal_(self.fc.weight)
11 | #self.fc.weight.requires_grad = False
12 | self.pos_norm = pos_norm
13 | if self.pos_norm:
14 | self.norm = nn.LayerNorm(self.num_pos_feats)
15 | def forward(self, tensor):
16 | #mask = nesttensor.mask
17 | B,C,H,W = tensor.shape
18 | #print('tensor.shape', tensor.shape)
19 | y_range = (torch.arange(H) / float(H - 1)).to(tensor.device)
20 | #y_axis = torch.stack((y_range, 1-y_range),dim=1)
21 | y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1)
22 | y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2)
23 |
24 | x_range = (torch.arange(W) / float(W - 1)).to(tensor.device)
25 | #x_axis =torch.stack((x_range,1-x_range),dim=1)
26 | x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1)
27 | x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2)
28 | x_pos = torch.cat((y_axis, x_axis), dim=1)
29 | x_pos = self.fc(x_pos)
30 |
31 | if self.pos_norm:
32 | x_pos = self.norm(x_pos)
33 | #print('xpos,', x_pos.max(),x_pos.min())
34 | return x_pos
--------------------------------------------------------------------------------
/tools/misc/visualize_results.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import mmcv
4 | from mmcv import Config
5 |
6 | from mmdet3d.datasets import build_dataset
7 |
8 |
9 | def parse_args():
10 | parser = argparse.ArgumentParser(
11 | description='MMDet3D visualize the results')
12 | parser.add_argument('config', help='test config file path')
13 | parser.add_argument('--result', help='results file in pickle format')
14 | parser.add_argument(
15 | '--show-dir', help='directory where visualize results will be saved')
16 | args = parser.parse_args()
17 |
18 | return args
19 |
20 |
21 | def main():
22 | args = parse_args()
23 |
24 | if args.result is not None and \
25 | not args.result.endswith(('.pkl', '.pickle')):
26 | raise ValueError('The results file must be a pkl file.')
27 |
28 | cfg = Config.fromfile(args.config)
29 | cfg.data.test.test_mode = True
30 |
31 | # build the dataset
32 | dataset = build_dataset(cfg.data.test)
33 | results = mmcv.load(args.result)
34 |
35 | if getattr(dataset, 'show', None) is not None:
36 | # data loading pipeline for showing
37 | eval_pipeline = cfg.get('eval_pipeline', {})
38 | if eval_pipeline:
39 | dataset.show(results, args.show_dir, pipeline=eval_pipeline)
40 | else:
41 | dataset.show(results, args.show_dir) # use default pipeline
42 | else:
43 | raise NotImplementedError(
44 | 'Show is not implemented for dataset {}!'.format(
45 | type(dataset).__name__))
46 |
47 |
48 | if __name__ == '__main__':
49 | main()
50 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/util.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def normalize_bbox(bboxes, pc_range):
5 |
6 | cx = bboxes[..., 0:1]
7 | cy = bboxes[..., 1:2]
8 | cz = bboxes[..., 2:3]
9 | w = bboxes[..., 3:4].log()
10 | l = bboxes[..., 4:5].log()
11 | h = bboxes[..., 5:6].log()
12 |
13 | rot = bboxes[..., 6:7]
14 | if bboxes.size(-1) > 7:
15 | vx = bboxes[..., 7:8]
16 | vy = bboxes[..., 8:9]
17 | normalized_bboxes = torch.cat(
18 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1
19 | )
20 | else:
21 | normalized_bboxes = torch.cat(
22 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1
23 | )
24 | return normalized_bboxes
25 |
26 | def denormalize_bbox(normalized_bboxes, pc_range):
27 | # rotation
28 | rot_sine = normalized_bboxes[..., 6:7]
29 |
30 | rot_cosine = normalized_bboxes[..., 7:8]
31 | rot = torch.atan2(rot_sine, rot_cosine)
32 |
33 | # center in the bev
34 | cx = normalized_bboxes[..., 0:1]
35 | cy = normalized_bboxes[..., 1:2]
36 | cz = normalized_bboxes[..., 4:5]
37 |
38 | # size
39 | w = normalized_bboxes[..., 2:3]
40 | l = normalized_bboxes[..., 3:4]
41 | h = normalized_bboxes[..., 5:6]
42 |
43 | w = w.exp()
44 | l = l.exp()
45 | h = h.exp()
46 | if normalized_bboxes.size(-1) > 8:
47 | # velocity
48 | vx = normalized_bboxes[:, 8:9]
49 | vy = normalized_bboxes[:, 9:10]
50 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)
51 | else:
52 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)
53 | return denormalized_bboxes
--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/cyclic_40e.py:
--------------------------------------------------------------------------------
1 | # The schedule is usually used by models trained on KITTI dataset
2 |
3 | # The learning rate set in the cyclic schedule is the initial learning rate
4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4),
5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
6 | lr = 0.0018
7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use
8 | # the offcial AdamW optimizer implemented by PyTorch.
9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch
12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa
13 | # We implement them in mmcv, for more details, please refer to
14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa
15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa
16 | lr_config = dict(
17 | policy='cyclic',
18 | target_ratio=(10, 1e-4),
19 | cyclic_times=1,
20 | step_ratio_up=0.4,
21 | )
22 | momentum_config = dict(
23 | policy='cyclic',
24 | target_ratio=(0.85 / 0.95, 1),
25 | cyclic_times=1,
26 | step_ratio_up=0.4,
27 | )
28 | # Although the max_epochs is 40, this schedule is usually used we
29 | # RepeatDataset with repeat ratio N, thus the actual max epoch
30 | # number could be Nx40
31 | runner = dict(type='EpochBasedRunner', max_epochs=40)
32 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/formating.py:
--------------------------------------------------------------------------------
1 |
2 | # Copyright (c) OpenMMLab. All rights reserved.
3 | import numpy as np
4 | from mmcv.parallel import DataContainer as DC
5 |
6 | from mmdet3d.core.bbox import BaseInstance3DBoxes
7 | from mmdet3d.core.points import BasePoints
8 | from mmdet.datasets.builder import PIPELINES
9 | from mmdet.datasets.pipelines import to_tensor
10 | from mmdet3d.datasets.pipelines import DefaultFormatBundle3D
11 |
12 | @PIPELINES.register_module()
13 | class CustomDefaultFormatBundle3D(DefaultFormatBundle3D):
14 | """Default formatting bundle.
15 | It simplifies the pipeline of formatting common fields for voxels,
16 | including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and
17 | "gt_semantic_seg".
18 | These fields are formatted as follows.
19 | - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
20 | - proposals: (1)to tensor, (2)to DataContainer
21 | - gt_bboxes: (1)to tensor, (2)to DataContainer
22 | - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
23 | - gt_labels: (1)to tensor, (2)to DataContainer
24 | """
25 |
26 | def __call__(self, results):
27 | """Call function to transform and format common fields in results.
28 | Args:
29 | results (dict): Result dict contains the data to convert.
30 | Returns:
31 | dict: The result dict contains the data that is formatted with
32 | default bundle.
33 | """
34 | # Format 3D data
35 | results = super(CustomDefaultFormatBundle3D, self).__call__(results)
36 | results['gt_map_masks'] = DC(
37 | to_tensor(results['gt_map_masks']), stack=True)
38 |
39 | return results
--------------------------------------------------------------------------------
/docs/install.md:
--------------------------------------------------------------------------------
1 | # Step-by-step installation instructions
2 |
3 | Following https://mmdetection3d.readthedocs.io/en/latest/getting_started.html#installation
4 |
5 |
6 |
7 | **a. Create a conda virtual environment and activate it.**
8 | ```shell
9 | conda create -n open-mmlab python=3.8 -y
10 | conda activate open-mmlab
11 | ```
12 |
13 | **b. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/).**
14 | ```shell
15 | pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
16 | # Recommended torch>=1.9
17 |
18 | ```
19 |
20 | **c. Install gcc>=5 in conda env (optional).**
21 | ```shell
22 | conda install -c omgarcia gcc-6 # gcc-6.2
23 | ```
24 |
25 | **c. Install mmcv-full.**
26 | ```shell
27 | pip install mmcv-full==1.4.0
28 | # pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
29 | ```
30 |
31 | **d. Install mmdet and mmseg.**
32 | ```shell
33 | pip install mmdet==2.14.0
34 | pip install mmsegmentation==0.14.1
35 | ```
36 |
37 | **e. Install mmdet3d from source code.**
38 | ```shell
39 | git clone https://github.com/open-mmlab/mmdetection3d.git
40 | cd mmdetection3d
41 | git checkout v0.17.1 # Other versions may not be compatible.
42 | python setup.py install
43 | ```
44 |
45 | **f. Install timm.**
46 | ```shell
47 | pip install timm
48 | ```
49 |
50 |
51 | **g. Clone BEVFormer.**
52 | ```
53 | git clone https://github.com/zhiqi-li/BEVFormer.git
54 | ```
55 |
56 | **h. Prepare pretrained models.**
57 | ```shell
58 | cd bevformer
59 | mkdir ckpts
60 |
61 | cd ckpts & wget https://github.com/zhiqi-li/storage/releases/download/v1.0/r101_dcn_fcos3d_pretrain.pth
62 | ```
63 |
64 | note: this pretrained model is the same model used in [detr3d](https://github.com/WangYueFt/detr3d)
65 |
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
1 | dataset_type = 'CocoDataset'
2 | data_root = 'data/coco/'
3 | img_norm_cfg = dict(
4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
5 | train_pipeline = [
6 | dict(type='LoadImageFromFile'),
7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
9 | dict(type='RandomFlip', flip_ratio=0.5),
10 | dict(type='Normalize', **img_norm_cfg),
11 | dict(type='Pad', size_divisor=32),
12 | dict(type='DefaultFormatBundle'),
13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
14 | ]
15 | test_pipeline = [
16 | dict(type='LoadImageFromFile'),
17 | dict(
18 | type='MultiScaleFlipAug',
19 | img_scale=(1333, 800),
20 | flip=False,
21 | transforms=[
22 | dict(type='Resize', keep_ratio=True),
23 | dict(type='RandomFlip'),
24 | dict(type='Normalize', **img_norm_cfg),
25 | dict(type='Pad', size_divisor=32),
26 | dict(type='ImageToTensor', keys=['img']),
27 | dict(type='Collect', keys=['img']),
28 | ])
29 | ]
30 | data = dict(
31 | samples_per_gpu=2,
32 | workers_per_gpu=2,
33 | train=dict(
34 | type=dataset_type,
35 | ann_file=data_root + 'annotations/instances_train2017.json',
36 | img_prefix=data_root + 'train2017/',
37 | pipeline=train_pipeline),
38 | val=dict(
39 | type=dataset_type,
40 | ann_file=data_root + 'annotations/instances_val2017.json',
41 | img_prefix=data_root + 'val2017/',
42 | pipeline=test_pipeline),
43 | test=dict(
44 | type=dataset_type,
45 | ann_file=data_root + 'annotations/instances_val2017.json',
46 | img_prefix=data_root + 'val2017/',
47 | pipeline=test_pipeline))
48 | evaluation = dict(metric=['bbox', 'segm'])
49 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/apis/train.py:
--------------------------------------------------------------------------------
1 | # ---------------------------------------------
2 | # Copyright (c) OpenMMLab. All rights reserved.
3 | # ---------------------------------------------
4 | # Modified by Zhiqi Li
5 | # ---------------------------------------------
6 |
7 | from .mmdet_train import custom_train_detector
8 | from mmseg.apis import train_segmentor
9 | from mmdet.apis import train_detector
10 |
11 | def custom_train_model(model,
12 | dataset,
13 | cfg,
14 | distributed=False,
15 | validate=False,
16 | timestamp=None,
17 | meta=None):
18 | """A function wrapper for launching model training according to cfg.
19 |
20 | Because we need different eval_hook in runner. Should be deprecated in the
21 | future.
22 | """
23 | if cfg.model.type in ['EncoderDecoder3D']:
24 | assert False
25 | else:
26 | custom_train_detector(
27 | model,
28 | dataset,
29 | cfg,
30 | distributed=distributed,
31 | validate=validate,
32 | timestamp=timestamp,
33 | meta=meta)
34 |
35 |
36 | def train_model(model,
37 | dataset,
38 | cfg,
39 | distributed=False,
40 | validate=False,
41 | timestamp=None,
42 | meta=None):
43 | """A function wrapper for launching model training according to cfg.
44 |
45 | Because we need different eval_hook in runner. Should be deprecated in the
46 | future.
47 | """
48 | if cfg.model.type in ['EncoderDecoder3D']:
49 | train_segmentor(
50 | model,
51 | dataset,
52 | cfg,
53 | distributed=distributed,
54 | validate=validate,
55 | timestamp=timestamp,
56 | meta=meta)
57 | else:
58 | train_detector(
59 | model,
60 | dataset,
61 | cfg,
62 | distributed=distributed,
63 | validate=validate,
64 | timestamp=timestamp,
65 | meta=meta)
66 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/paconv_ssg.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='EncoderDecoder3D',
4 | backbone=dict(
5 | type='PointNet2SASSG',
6 | in_channels=9, # [xyz, rgb, normalized_xyz]
7 | num_points=(1024, 256, 64, 16),
8 | radius=(None, None, None, None), # use kNN instead of ball query
9 | num_samples=(32, 32, 32, 32),
10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 | 512)),
12 | fp_channels=(),
13 | norm_cfg=dict(type='BN2d', momentum=0.1),
14 | sa_cfg=dict(
15 | type='PAConvSAModule',
16 | pool_mod='max',
17 | use_xyz=True,
18 | normalize_xyz=False,
19 | paconv_num_kernels=[16, 16, 16],
20 | paconv_kernel_input='w_neighbor',
21 | scorenet_input='w_neighbor_dist',
22 | scorenet_cfg=dict(
23 | mlp_channels=[16, 16, 16],
24 | score_norm='softmax',
25 | temp_factor=1.0,
26 | last_bn=False))),
27 | decode_head=dict(
28 | type='PAConvHead',
29 | # PAConv model's decoder takes skip connections from beckbone
30 | # different from PointNet++, it also concats input features in the last
31 | # level of decoder, leading to `128 + 6` as the channel number
32 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
33 | (128 + 6, 128, 128, 128)),
34 | channels=128,
35 | dropout_ratio=0.5,
36 | conv_cfg=dict(type='Conv1d'),
37 | norm_cfg=dict(type='BN1d'),
38 | act_cfg=dict(type='ReLU'),
39 | loss_decode=dict(
40 | type='CrossEntropyLoss',
41 | use_sigmoid=False,
42 | class_weight=None, # should be modified with dataset
43 | loss_weight=1.0)),
44 | # correlation loss to regularize PAConv's kernel weights
45 | loss_regularization=dict(
46 | type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0),
47 | # model training and testing settings
48 | train_cfg=dict(),
49 | test_cfg=dict(mode='slide'))
50 |
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/nuim_instance.py:
--------------------------------------------------------------------------------
1 | dataset_type = 'CocoDataset'
2 | data_root = 'data/nuimages/'
3 | class_names = [
4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
6 | ]
7 | img_norm_cfg = dict(
8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
9 | train_pipeline = [
10 | dict(type='LoadImageFromFile'),
11 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
12 | dict(
13 | type='Resize',
14 | img_scale=[(1280, 720), (1920, 1080)],
15 | multiscale_mode='range',
16 | keep_ratio=True),
17 | dict(type='RandomFlip', flip_ratio=0.5),
18 | dict(type='Normalize', **img_norm_cfg),
19 | dict(type='Pad', size_divisor=32),
20 | dict(type='DefaultFormatBundle'),
21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
22 | ]
23 | test_pipeline = [
24 | dict(type='LoadImageFromFile'),
25 | dict(
26 | type='MultiScaleFlipAug',
27 | img_scale=(1600, 900),
28 | flip=False,
29 | transforms=[
30 | dict(type='Resize', keep_ratio=True),
31 | dict(type='RandomFlip'),
32 | dict(type='Normalize', **img_norm_cfg),
33 | dict(type='Pad', size_divisor=32),
34 | dict(type='ImageToTensor', keys=['img']),
35 | dict(type='Collect', keys=['img']),
36 | ])
37 | ]
38 | data = dict(
39 | samples_per_gpu=2,
40 | workers_per_gpu=2,
41 | train=dict(
42 | type=dataset_type,
43 | ann_file=data_root + 'annotations/nuimages_v1.0-train.json',
44 | img_prefix=data_root,
45 | classes=class_names,
46 | pipeline=train_pipeline),
47 | val=dict(
48 | type=dataset_type,
49 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
50 | img_prefix=data_root,
51 | classes=class_names,
52 | pipeline=test_pipeline),
53 | test=dict(
54 | type=dataset_type,
55 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
56 | img_prefix=data_root,
57 | classes=class_names,
58 | pipeline=test_pipeline))
59 | evaluation = dict(metric=['bbox', 'segm'])
60 |
--------------------------------------------------------------------------------
/tools/misc/fuse_conv_bn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import torch
4 | from mmcv.runner import save_checkpoint
5 | from torch import nn as nn
6 |
7 | from mmdet.apis import init_model
8 |
9 |
10 | def fuse_conv_bn(conv, bn):
11 | """During inference, the functionary of batch norm layers is turned off but
12 | only the mean and var alone channels are used, which exposes the chance to
13 | fuse it with the preceding conv layers to save computations and simplify
14 | network structures."""
15 | conv_w = conv.weight
16 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
17 | bn.running_mean)
18 |
19 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
20 | conv.weight = nn.Parameter(conv_w *
21 | factor.reshape([conv.out_channels, 1, 1, 1]))
22 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
23 | return conv
24 |
25 |
26 | def fuse_module(m):
27 | last_conv = None
28 | last_conv_name = None
29 |
30 | for name, child in m.named_children():
31 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)):
32 | if last_conv is None: # only fuse BN that is after Conv
33 | continue
34 | fused_conv = fuse_conv_bn(last_conv, child)
35 | m._modules[last_conv_name] = fused_conv
36 | # To reduce changes, set BN as Identity instead of deleting it.
37 | m._modules[name] = nn.Identity()
38 | last_conv = None
39 | elif isinstance(child, nn.Conv2d):
40 | last_conv = child
41 | last_conv_name = name
42 | else:
43 | fuse_module(child)
44 | return m
45 |
46 |
47 | def parse_args():
48 | parser = argparse.ArgumentParser(
49 | description='fuse Conv and BN layers in a model')
50 | parser.add_argument('config', help='config file path')
51 | parser.add_argument('checkpoint', help='checkpoint file path')
52 | parser.add_argument('out', help='output path of the converted model')
53 | args = parser.parse_args()
54 | return args
55 |
56 |
57 | def main():
58 | args = parse_args()
59 | # build the model from a config file and a checkpoint file
60 | model = init_model(args.config, args.checkpoint)
61 | # fuse conv and bn layers of the model
62 | fused_model = fuse_module(model)
63 | save_checkpoint(fused_model, args.out)
64 |
65 |
66 | if __name__ == '__main__':
67 | main()
68 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/fcos3d.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='FCOSMono3D',
3 | pretrained='open-mmlab://detectron2/resnet101_caffe',
4 | backbone=dict(
5 | type='ResNet',
6 | depth=101,
7 | num_stages=4,
8 | out_indices=(0, 1, 2, 3),
9 | frozen_stages=1,
10 | norm_cfg=dict(type='BN', requires_grad=False),
11 | norm_eval=True,
12 | style='caffe'),
13 | neck=dict(
14 | type='FPN',
15 | in_channels=[256, 512, 1024, 2048],
16 | out_channels=256,
17 | start_level=1,
18 | add_extra_convs='on_output',
19 | num_outs=5,
20 | relu_before_extra_convs=True),
21 | bbox_head=dict(
22 | type='FCOSMono3DHead',
23 | num_classes=10,
24 | in_channels=256,
25 | stacked_convs=2,
26 | feat_channels=256,
27 | use_direction_classifier=True,
28 | diff_rad_by_sin=True,
29 | pred_attrs=True,
30 | pred_velo=True,
31 | dir_offset=0.7854, # pi/4
32 | strides=[8, 16, 32, 64, 128],
33 | group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo
34 | cls_branch=(256, ),
35 | reg_branch=(
36 | (256, ), # offset
37 | (256, ), # depth
38 | (256, ), # size
39 | (256, ), # rot
40 | () # velo
41 | ),
42 | dir_branch=(256, ),
43 | attr_branch=(256, ),
44 | loss_cls=dict(
45 | type='FocalLoss',
46 | use_sigmoid=True,
47 | gamma=2.0,
48 | alpha=0.25,
49 | loss_weight=1.0),
50 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
51 | loss_dir=dict(
52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
53 | loss_attr=dict(
54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 | loss_centerness=dict(
56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
57 | norm_on_bbox=True,
58 | centerness_on_reg=True,
59 | center_sampling=True,
60 | conv_bias=True,
61 | dcn_on_last_conv=True),
62 | train_cfg=dict(
63 | allowed_border=0,
64 | code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
65 | pos_weight=-1,
66 | debug=False),
67 | test_cfg=dict(
68 | use_rotate_nms=True,
69 | nms_across_levels=False,
70 | nms_pre=1000,
71 | nms_thr=0.8,
72 | score_thr=0.05,
73 | min_bbox_size=0,
74 | max_per_img=200))
75 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # BEVFormer: a Cutting-edge Baseline for Camera-based Detection
4 |
5 |
6 |
7 | > **BEVFormer: Learning Bird's-Eye-View Representation from Multi-Camera Images via Spatiotemporal Transformers**
8 | > - [Paper](http://arxiv.org/abs/2203.17270) | [Blog](https://www.zhihu.com/question/521842610/answer/2431585901) (in Chinese) | Presentation Slides at CVPR 2022 Workshop (soon) | Live-streaming video on BEV Perception (soon)
9 |
10 |
11 |
12 | # Abstract
13 | In this work, the authors present a new framework termed BEVFormer, which learns unified BEV representations with spatiotemporal transformers to support multiple autonomous driving perception tasks. In a nutshell, BEVFormer exploits both spatial and temporal information by interacting with spatial and temporal space through predefined grid-shaped BEV queries. To aggregate spatial information, the authors design a spatial cross-attention that each BEV query extracts the spatial features from the regions of interest across camera views. For temporal information, the authors propose a temporal self-attention to recurrently fuse the history BEV information.
14 | The proposed approach achieves the new state-of-the-art **56.9\%** in terms of NDS metric on the nuScenes test set, which is **9.0** points higher than previous best arts and on par with the performance of LiDAR-based baselines.
15 |
16 |
17 | # Methods
18 | 
19 |
20 |
21 | # Getting Started
22 | - [Installation](docs/install.md)
23 | - [Prepare Dataset](docs/prepare_dataset.md)
24 | - [Run and Eval](docs/getting_started.md)
25 |
26 |
27 | # HFai Adaptation
28 |
29 | Follow [hf_guide](./hf_guide.md) to adapt to Fire-Flyer II.
30 |
31 | Train BEVFormer with 10 Nodes
32 | ```
33 | hfai python tools/train.py projects/configs/bevformer/bevformer_base.py --work-dir out/node10_train --cfg-options optimizer.lr=0.0008 -- --nodes 10 --priority 40 --name node10_train
34 | ```
35 |
36 | Eval BEVFormer with 10 Nodes
37 | ```
38 | hfai python tools/test.py projects/configs/bevformer/bevformer_base.py out/node10_train/epoch_24.pth --launcher pytorch --eval bbox -- --nodes 10 --priority 40 --name node10_test
39 | ```
40 |
41 | # Bibtex
42 | If this work is helpful for your research, please consider citing the following BibTeX entry.
43 |
44 | ```
45 | @article{li2022bevformer,
46 | title={BEVFormer: Learning Bird’s-Eye-View Representation from Multi-Camera Images via Spatiotemporal Transformers},
47 | author={Li, Zhiqi and Wang, Wenhai and Li, Hongyang and Xie, Enze and Sima, Chonghao and Lu, Tong and Qiao, Yu and Dai, Jifeng}
48 | journal={arXiv preprint arXiv:2203.17270},
49 | year={2022}
50 | }
51 | ```
52 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/votenet.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='VoteNet',
3 | backbone=dict(
4 | type='PointNet2SASSG',
5 | in_channels=4,
6 | num_points=(2048, 1024, 512, 256),
7 | radius=(0.2, 0.4, 0.8, 1.2),
8 | num_samples=(64, 32, 16, 16),
9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
10 | (128, 128, 256)),
11 | fp_channels=((256, 256), (256, 256)),
12 | norm_cfg=dict(type='BN2d'),
13 | sa_cfg=dict(
14 | type='PointSAModule',
15 | pool_mod='max',
16 | use_xyz=True,
17 | normalize_xyz=True)),
18 | bbox_head=dict(
19 | type='VoteHead',
20 | vote_module_cfg=dict(
21 | in_channels=256,
22 | vote_per_seed=1,
23 | gt_per_seed=3,
24 | conv_channels=(256, 256),
25 | conv_cfg=dict(type='Conv1d'),
26 | norm_cfg=dict(type='BN1d'),
27 | norm_feats=True,
28 | vote_loss=dict(
29 | type='ChamferDistance',
30 | mode='l1',
31 | reduction='none',
32 | loss_dst_weight=10.0)),
33 | vote_aggregation_cfg=dict(
34 | type='PointSAModule',
35 | num_point=256,
36 | radius=0.3,
37 | num_sample=16,
38 | mlp_channels=[256, 128, 128, 128],
39 | use_xyz=True,
40 | normalize_xyz=True),
41 | pred_layer_cfg=dict(
42 | in_channels=128, shared_conv_channels=(128, 128), bias=True),
43 | conv_cfg=dict(type='Conv1d'),
44 | norm_cfg=dict(type='BN1d'),
45 | objectness_loss=dict(
46 | type='CrossEntropyLoss',
47 | class_weight=[0.2, 0.8],
48 | reduction='sum',
49 | loss_weight=5.0),
50 | center_loss=dict(
51 | type='ChamferDistance',
52 | mode='l2',
53 | reduction='sum',
54 | loss_src_weight=10.0,
55 | loss_dst_weight=10.0),
56 | dir_class_loss=dict(
57 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
58 | dir_res_loss=dict(
59 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
60 | size_class_loss=dict(
61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
62 | size_res_loss=dict(
63 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
64 | semantic_loss=dict(
65 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
66 | # model training and testing settings
67 | train_cfg=dict(
68 | pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
69 | test_cfg=dict(
70 | sample_mod='seed',
71 | nms_thr=0.25,
72 | score_thr=0.05,
73 | per_class_proposal=True))
74 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/groupfree3d.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='GroupFree3DNet',
3 | backbone=dict(
4 | type='PointNet2SASSG',
5 | in_channels=3,
6 | num_points=(2048, 1024, 512, 256),
7 | radius=(0.2, 0.4, 0.8, 1.2),
8 | num_samples=(64, 32, 16, 16),
9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
10 | (128, 128, 256)),
11 | fp_channels=((256, 256), (256, 288)),
12 | norm_cfg=dict(type='BN2d'),
13 | sa_cfg=dict(
14 | type='PointSAModule',
15 | pool_mod='max',
16 | use_xyz=True,
17 | normalize_xyz=True)),
18 | bbox_head=dict(
19 | type='GroupFree3DHead',
20 | in_channels=288,
21 | num_decoder_layers=6,
22 | num_proposal=256,
23 | transformerlayers=dict(
24 | type='BaseTransformerLayer',
25 | attn_cfgs=dict(
26 | type='GroupFree3DMHA',
27 | embed_dims=288,
28 | num_heads=8,
29 | attn_drop=0.1,
30 | dropout_layer=dict(type='Dropout', drop_prob=0.1)),
31 | ffn_cfgs=dict(
32 | embed_dims=288,
33 | feedforward_channels=2048,
34 | ffn_drop=0.1,
35 | act_cfg=dict(type='ReLU', inplace=True)),
36 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',
37 | 'norm')),
38 | pred_layer_cfg=dict(
39 | in_channels=288, shared_conv_channels=(288, 288), bias=True),
40 | sampling_objectness_loss=dict(
41 | type='FocalLoss',
42 | use_sigmoid=True,
43 | gamma=2.0,
44 | alpha=0.25,
45 | loss_weight=8.0),
46 | objectness_loss=dict(
47 | type='FocalLoss',
48 | use_sigmoid=True,
49 | gamma=2.0,
50 | alpha=0.25,
51 | loss_weight=1.0),
52 | center_loss=dict(
53 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
54 | dir_class_loss=dict(
55 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
56 | dir_res_loss=dict(
57 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
58 | size_class_loss=dict(
59 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
60 | size_res_loss=dict(
61 | type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0),
62 | semantic_loss=dict(
63 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
64 | # model training and testing settings
65 | train_cfg=dict(sample_mod='kps'),
66 | test_cfg=dict(
67 | sample_mod='kps',
68 | nms_thr=0.25,
69 | score_thr=0.0,
70 | per_class_proposal=True,
71 | prediction_stages='last'))
72 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_second_secfpn_kitti.py:
--------------------------------------------------------------------------------
1 | voxel_size = [0.05, 0.05, 0.1]
2 |
3 | model = dict(
4 | type='VoxelNet',
5 | voxel_layer=dict(
6 | max_num_points=5,
7 | point_cloud_range=[0, -40, -3, 70.4, 40, 1],
8 | voxel_size=voxel_size,
9 | max_voxels=(16000, 40000)),
10 | voxel_encoder=dict(type='HardSimpleVFE'),
11 | middle_encoder=dict(
12 | type='SparseEncoder',
13 | in_channels=4,
14 | sparse_shape=[41, 1600, 1408],
15 | order=('conv', 'norm', 'act')),
16 | backbone=dict(
17 | type='SECOND',
18 | in_channels=256,
19 | layer_nums=[5, 5],
20 | layer_strides=[1, 2],
21 | out_channels=[128, 256]),
22 | neck=dict(
23 | type='SECONDFPN',
24 | in_channels=[128, 256],
25 | upsample_strides=[1, 2],
26 | out_channels=[256, 256]),
27 | bbox_head=dict(
28 | type='Anchor3DHead',
29 | num_classes=3,
30 | in_channels=512,
31 | feat_channels=512,
32 | use_direction_classifier=True,
33 | anchor_generator=dict(
34 | type='Anchor3DRangeGenerator',
35 | ranges=[
36 | [0, -40.0, -0.6, 70.4, 40.0, -0.6],
37 | [0, -40.0, -0.6, 70.4, 40.0, -0.6],
38 | [0, -40.0, -1.78, 70.4, 40.0, -1.78],
39 | ],
40 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
41 | rotations=[0, 1.57],
42 | reshape_out=False),
43 | diff_rad_by_sin=True,
44 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
45 | loss_cls=dict(
46 | type='FocalLoss',
47 | use_sigmoid=True,
48 | gamma=2.0,
49 | alpha=0.25,
50 | loss_weight=1.0),
51 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
52 | loss_dir=dict(
53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
54 | # model training and testing settings
55 | train_cfg=dict(
56 | assigner=[
57 | dict( # for Pedestrian
58 | type='MaxIoUAssigner',
59 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
60 | pos_iou_thr=0.35,
61 | neg_iou_thr=0.2,
62 | min_pos_iou=0.2,
63 | ignore_iof_thr=-1),
64 | dict( # for Cyclist
65 | type='MaxIoUAssigner',
66 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
67 | pos_iou_thr=0.35,
68 | neg_iou_thr=0.2,
69 | min_pos_iou=0.2,
70 | ignore_iof_thr=-1),
71 | dict( # for Car
72 | type='MaxIoUAssigner',
73 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
74 | pos_iou_thr=0.6,
75 | neg_iou_thr=0.45,
76 | min_pos_iou=0.45,
77 | ignore_iof_thr=-1),
78 | ],
79 | allowed_border=0,
80 | pos_weight=-1,
81 | debug=False),
82 | test_cfg=dict(
83 | use_rotate_nms=True,
84 | nms_across_levels=False,
85 | nms_thr=0.01,
86 | score_thr=0.1,
87 | min_bbox_size=0,
88 | nms_pre=100,
89 | max_num=50))
90 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/3dssd.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='SSD3DNet',
3 | backbone=dict(
4 | type='PointNet2SAMSG',
5 | in_channels=4,
6 | num_points=(4096, 512, (256, 256)),
7 | radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),
8 | num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),
9 | sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),
10 | ((64, 64, 128), (64, 64, 128), (64, 96, 128)),
11 | ((128, 128, 256), (128, 192, 256), (128, 256, 256))),
12 | aggregation_channels=(64, 128, 256),
13 | fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),
14 | fps_sample_range_lists=((-1), (-1), (512, -1)),
15 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
16 | sa_cfg=dict(
17 | type='PointSAModuleMSG',
18 | pool_mod='max',
19 | use_xyz=True,
20 | normalize_xyz=False)),
21 | bbox_head=dict(
22 | type='SSD3DHead',
23 | in_channels=256,
24 | vote_module_cfg=dict(
25 | in_channels=256,
26 | num_points=256,
27 | gt_per_seed=1,
28 | conv_channels=(128, ),
29 | conv_cfg=dict(type='Conv1d'),
30 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
31 | with_res_feat=False,
32 | vote_xyz_range=(3.0, 3.0, 2.0)),
33 | vote_aggregation_cfg=dict(
34 | type='PointSAModuleMSG',
35 | num_point=256,
36 | radii=(4.8, 6.4),
37 | sample_nums=(16, 32),
38 | mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)),
39 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
40 | use_xyz=True,
41 | normalize_xyz=False,
42 | bias=True),
43 | pred_layer_cfg=dict(
44 | in_channels=1536,
45 | shared_conv_channels=(512, 128),
46 | cls_conv_channels=(128, ),
47 | reg_conv_channels=(128, ),
48 | conv_cfg=dict(type='Conv1d'),
49 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
50 | bias=True),
51 | conv_cfg=dict(type='Conv1d'),
52 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
53 | objectness_loss=dict(
54 | type='CrossEntropyLoss',
55 | use_sigmoid=True,
56 | reduction='sum',
57 | loss_weight=1.0),
58 | center_loss=dict(
59 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
60 | dir_class_loss=dict(
61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
62 | dir_res_loss=dict(
63 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
64 | size_res_loss=dict(
65 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
66 | corner_loss=dict(
67 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
68 | vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)),
69 | # model training and testing settings
70 | train_cfg=dict(
71 | sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05),
72 | test_cfg=dict(
73 | nms_cfg=dict(type='nms', iou_thr=0.1),
74 | sample_mod='spec',
75 | score_thr=0.0,
76 | per_class_proposal=True,
77 | max_output_num=100))
78 |
--------------------------------------------------------------------------------
/tools/model_converters/regnet2mmdet.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import torch
4 | from collections import OrderedDict
5 |
6 |
7 | def convert_stem(model_key, model_weight, state_dict, converted_names):
8 | new_key = model_key.replace('stem.conv', 'conv1')
9 | new_key = new_key.replace('stem.bn', 'bn1')
10 | state_dict[new_key] = model_weight
11 | converted_names.add(model_key)
12 | print(f'Convert {model_key} to {new_key}')
13 |
14 |
15 | def convert_head(model_key, model_weight, state_dict, converted_names):
16 | new_key = model_key.replace('head.fc', 'fc')
17 | state_dict[new_key] = model_weight
18 | converted_names.add(model_key)
19 | print(f'Convert {model_key} to {new_key}')
20 |
21 |
22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names):
23 | split_keys = model_key.split('.')
24 | layer, block, module = split_keys[:3]
25 | block_id = int(block[1:])
26 | layer_name = f'layer{int(layer[1:])}'
27 | block_name = f'{block_id - 1}'
28 |
29 | if block_id == 1 and module == 'bn':
30 | new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}'
31 | elif block_id == 1 and module == 'proj':
32 | new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}'
33 | elif module == 'f':
34 | if split_keys[3] == 'a_bn':
35 | module_name = 'bn1'
36 | elif split_keys[3] == 'b_bn':
37 | module_name = 'bn2'
38 | elif split_keys[3] == 'c_bn':
39 | module_name = 'bn3'
40 | elif split_keys[3] == 'a':
41 | module_name = 'conv1'
42 | elif split_keys[3] == 'b':
43 | module_name = 'conv2'
44 | elif split_keys[3] == 'c':
45 | module_name = 'conv3'
46 | new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}'
47 | else:
48 | raise ValueError(f'Unsupported conversion of key {model_key}')
49 | print(f'Convert {model_key} to {new_key}')
50 | state_dict[new_key] = model_weight
51 | converted_names.add(model_key)
52 |
53 |
54 | def convert(src, dst):
55 | """Convert keys in pycls pretrained RegNet models to mmdet style."""
56 | # load caffe model
57 | regnet_model = torch.load(src)
58 | blobs = regnet_model['model_state']
59 | # convert to pytorch style
60 | state_dict = OrderedDict()
61 | converted_names = set()
62 | for key, weight in blobs.items():
63 | if 'stem' in key:
64 | convert_stem(key, weight, state_dict, converted_names)
65 | elif 'head' in key:
66 | convert_head(key, weight, state_dict, converted_names)
67 | elif key.startswith('s'):
68 | convert_reslayer(key, weight, state_dict, converted_names)
69 |
70 | # check if all layers are converted
71 | for key in blobs:
72 | if key not in converted_names:
73 | print(f'not converted: {key}')
74 | # save checkpoint
75 | checkpoint = dict()
76 | checkpoint['state_dict'] = state_dict
77 | torch.save(checkpoint, dst)
78 |
79 |
80 | def main():
81 | parser = argparse.ArgumentParser(description='Convert model keys')
82 | parser.add_argument('src', help='src detectron model path')
83 | parser.add_argument('dst', help='save path')
84 | args = parser.parse_args()
85 | convert(args.src, args.dst)
86 |
87 |
88 | if __name__ == '__main__':
89 | main()
90 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py:
--------------------------------------------------------------------------------
1 | voxel_size = [0.16, 0.16, 4]
2 |
3 | model = dict(
4 | type='VoxelNet',
5 | voxel_layer=dict(
6 | max_num_points=32, # max_points_per_voxel
7 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],
8 | voxel_size=voxel_size,
9 | max_voxels=(16000, 40000) # (training, testing) max_voxels
10 | ),
11 | voxel_encoder=dict(
12 | type='PillarFeatureNet',
13 | in_channels=4,
14 | feat_channels=[64],
15 | with_distance=False,
16 | voxel_size=voxel_size,
17 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),
18 | middle_encoder=dict(
19 | type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
20 | backbone=dict(
21 | type='SECOND',
22 | in_channels=64,
23 | layer_nums=[3, 5, 5],
24 | layer_strides=[2, 2, 2],
25 | out_channels=[64, 128, 256]),
26 | neck=dict(
27 | type='SECONDFPN',
28 | in_channels=[64, 128, 256],
29 | upsample_strides=[1, 2, 4],
30 | out_channels=[128, 128, 128]),
31 | bbox_head=dict(
32 | type='Anchor3DHead',
33 | num_classes=3,
34 | in_channels=384,
35 | feat_channels=384,
36 | use_direction_classifier=True,
37 | anchor_generator=dict(
38 | type='Anchor3DRangeGenerator',
39 | ranges=[
40 | [0, -39.68, -0.6, 70.4, 39.68, -0.6],
41 | [0, -39.68, -0.6, 70.4, 39.68, -0.6],
42 | [0, -39.68, -1.78, 70.4, 39.68, -1.78],
43 | ],
44 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
45 | rotations=[0, 1.57],
46 | reshape_out=False),
47 | diff_rad_by_sin=True,
48 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
49 | loss_cls=dict(
50 | type='FocalLoss',
51 | use_sigmoid=True,
52 | gamma=2.0,
53 | alpha=0.25,
54 | loss_weight=1.0),
55 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
56 | loss_dir=dict(
57 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
58 | # model training and testing settings
59 | train_cfg=dict(
60 | assigner=[
61 | dict( # for Pedestrian
62 | type='MaxIoUAssigner',
63 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
64 | pos_iou_thr=0.5,
65 | neg_iou_thr=0.35,
66 | min_pos_iou=0.35,
67 | ignore_iof_thr=-1),
68 | dict( # for Cyclist
69 | type='MaxIoUAssigner',
70 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
71 | pos_iou_thr=0.5,
72 | neg_iou_thr=0.35,
73 | min_pos_iou=0.35,
74 | ignore_iof_thr=-1),
75 | dict( # for Car
76 | type='MaxIoUAssigner',
77 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
78 | pos_iou_thr=0.6,
79 | neg_iou_thr=0.45,
80 | min_pos_iou=0.45,
81 | ignore_iof_thr=-1),
82 | ],
83 | allowed_border=0,
84 | pos_weight=-1,
85 | debug=False),
86 | test_cfg=dict(
87 | use_rotate_nms=True,
88 | nms_across_levels=False,
89 | nms_thr=0.01,
90 | score_thr=0.1,
91 | min_bbox_size=0,
92 | nms_pre=100,
93 | max_num=50))
94 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py:
--------------------------------------------------------------------------------
1 | voxel_size = [0.2, 0.2, 8]
2 | model = dict(
3 | type='CenterPoint',
4 | pts_voxel_layer=dict(
5 | max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)),
6 | pts_voxel_encoder=dict(
7 | type='PillarFeatureNet',
8 | in_channels=5,
9 | feat_channels=[64],
10 | with_distance=False,
11 | voxel_size=(0.2, 0.2, 8),
12 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
13 | legacy=False),
14 | pts_middle_encoder=dict(
15 | type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),
16 | pts_backbone=dict(
17 | type='SECOND',
18 | in_channels=64,
19 | out_channels=[64, 128, 256],
20 | layer_nums=[3, 5, 5],
21 | layer_strides=[2, 2, 2],
22 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
23 | conv_cfg=dict(type='Conv2d', bias=False)),
24 | pts_neck=dict(
25 | type='SECONDFPN',
26 | in_channels=[64, 128, 256],
27 | out_channels=[128, 128, 128],
28 | upsample_strides=[0.5, 1, 2],
29 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
30 | upsample_cfg=dict(type='deconv', bias=False),
31 | use_conv_for_no_stride=True),
32 | pts_bbox_head=dict(
33 | type='CenterHead',
34 | in_channels=sum([128, 128, 128]),
35 | tasks=[
36 | dict(num_class=1, class_names=['car']),
37 | dict(num_class=2, class_names=['truck', 'construction_vehicle']),
38 | dict(num_class=2, class_names=['bus', 'trailer']),
39 | dict(num_class=1, class_names=['barrier']),
40 | dict(num_class=2, class_names=['motorcycle', 'bicycle']),
41 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
42 | ],
43 | common_heads=dict(
44 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
45 | share_conv_channel=64,
46 | bbox_coder=dict(
47 | type='CenterPointBBoxCoder',
48 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
49 | max_num=500,
50 | score_threshold=0.1,
51 | out_size_factor=4,
52 | voxel_size=voxel_size[:2],
53 | code_size=9),
54 | separate_head=dict(
55 | type='SeparateHead', init_bias=-2.19, final_kernel=3),
56 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
57 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
58 | norm_bbox=True),
59 | # model training and testing settings
60 | train_cfg=dict(
61 | pts=dict(
62 | grid_size=[512, 512, 1],
63 | voxel_size=voxel_size,
64 | out_size_factor=4,
65 | dense_reg=1,
66 | gaussian_overlap=0.1,
67 | max_objs=500,
68 | min_radius=2,
69 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
70 | test_cfg=dict(
71 | pts=dict(
72 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
73 | max_per_img=500,
74 | max_pool_nms=False,
75 | min_radius=[4, 12, 10, 1, 0.85, 0.175],
76 | score_threshold=0.1,
77 | pc_range=[-51.2, -51.2],
78 | out_size_factor=4,
79 | voxel_size=voxel_size[:2],
80 | nms_type='rotate',
81 | pre_max_size=1000,
82 | post_max_size=83,
83 | nms_thr=0.2)))
84 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py:
--------------------------------------------------------------------------------
1 | voxel_size = [0.1, 0.1, 0.2]
2 | model = dict(
3 | type='CenterPoint',
4 | pts_voxel_layer=dict(
5 | max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)),
6 | pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
7 | pts_middle_encoder=dict(
8 | type='SparseEncoder',
9 | in_channels=5,
10 | sparse_shape=[41, 1024, 1024],
11 | output_channels=128,
12 | order=('conv', 'norm', 'act'),
13 | encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,
14 | 128)),
15 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
16 | block_type='basicblock'),
17 | pts_backbone=dict(
18 | type='SECOND',
19 | in_channels=256,
20 | out_channels=[128, 256],
21 | layer_nums=[5, 5],
22 | layer_strides=[1, 2],
23 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
24 | conv_cfg=dict(type='Conv2d', bias=False)),
25 | pts_neck=dict(
26 | type='SECONDFPN',
27 | in_channels=[128, 256],
28 | out_channels=[256, 256],
29 | upsample_strides=[1, 2],
30 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
31 | upsample_cfg=dict(type='deconv', bias=False),
32 | use_conv_for_no_stride=True),
33 | pts_bbox_head=dict(
34 | type='CenterHead',
35 | in_channels=sum([256, 256]),
36 | tasks=[
37 | dict(num_class=1, class_names=['car']),
38 | dict(num_class=2, class_names=['truck', 'construction_vehicle']),
39 | dict(num_class=2, class_names=['bus', 'trailer']),
40 | dict(num_class=1, class_names=['barrier']),
41 | dict(num_class=2, class_names=['motorcycle', 'bicycle']),
42 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
43 | ],
44 | common_heads=dict(
45 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
46 | share_conv_channel=64,
47 | bbox_coder=dict(
48 | type='CenterPointBBoxCoder',
49 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
50 | max_num=500,
51 | score_threshold=0.1,
52 | out_size_factor=8,
53 | voxel_size=voxel_size[:2],
54 | code_size=9),
55 | separate_head=dict(
56 | type='SeparateHead', init_bias=-2.19, final_kernel=3),
57 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
58 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
59 | norm_bbox=True),
60 | # model training and testing settings
61 | train_cfg=dict(
62 | pts=dict(
63 | grid_size=[1024, 1024, 40],
64 | voxel_size=voxel_size,
65 | out_size_factor=8,
66 | dense_reg=1,
67 | gaussian_overlap=0.1,
68 | max_objs=500,
69 | min_radius=2,
70 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
71 | test_cfg=dict(
72 | pts=dict(
73 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
74 | max_per_img=500,
75 | max_pool_nms=False,
76 | min_radius=[4, 12, 10, 1, 0.85, 0.175],
77 | score_threshold=0.1,
78 | out_size_factor=8,
79 | voxel_size=voxel_size[:2],
80 | nms_type='rotate',
81 | pre_max_size=1000,
82 | post_max_size=83,
83 | nms_thr=0.2)))
84 |
--------------------------------------------------------------------------------
/tools/analysis_tools/benchmark.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import time
4 | import torch
5 | from mmcv import Config
6 | from mmcv.parallel import MMDataParallel
7 | from mmcv.runner import load_checkpoint, wrap_fp16_model
8 | import sys
9 | sys.path.append('.')
10 | from projects.mmdet3d_plugin.datasets.builder import build_dataloader
11 | from projects.mmdet3d_plugin.datasets import custom_build_dataset
12 | # from mmdet3d.datasets import build_dataloader, build_dataset
13 | from mmdet3d.models import build_detector
14 | #from tools.misc.fuse_conv_bn import fuse_module
15 |
16 |
17 | def parse_args():
18 | parser = argparse.ArgumentParser(description='MMDet benchmark a model')
19 | parser.add_argument('config', help='test config file path')
20 | parser.add_argument('--checkpoint', default=None, help='checkpoint file')
21 | parser.add_argument('--samples', default=2000, help='samples to benchmark')
22 | parser.add_argument(
23 | '--log-interval', default=50, help='interval of logging')
24 | parser.add_argument(
25 | '--fuse-conv-bn',
26 | action='store_true',
27 | help='Whether to fuse conv and bn, this will slightly increase'
28 | 'the inference speed')
29 | args = parser.parse_args()
30 | return args
31 |
32 |
33 | def main():
34 | args = parse_args()
35 |
36 | cfg = Config.fromfile(args.config)
37 | # set cudnn_benchmark
38 | if cfg.get('cudnn_benchmark', False):
39 | torch.backends.cudnn.benchmark = True
40 | cfg.model.pretrained = None
41 | cfg.data.test.test_mode = True
42 |
43 | # build the dataloader
44 | # TODO: support multiple images per gpu (only minor changes are needed)
45 | print(cfg.data.test)
46 | dataset = custom_build_dataset(cfg.data.test)
47 | data_loader = build_dataloader(
48 | dataset,
49 | samples_per_gpu=1,
50 | workers_per_gpu=cfg.data.workers_per_gpu,
51 | dist=False,
52 | shuffle=False)
53 |
54 | # build the model and load checkpoint
55 | cfg.model.train_cfg = None
56 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
57 | fp16_cfg = cfg.get('fp16', None)
58 | if fp16_cfg is not None:
59 | wrap_fp16_model(model)
60 | if args.checkpoint is not None:
61 | load_checkpoint(model, args.checkpoint, map_location='cpu')
62 | #if args.fuse_conv_bn:
63 | # model = fuse_module(model)
64 |
65 | model = MMDataParallel(model, device_ids=[0])
66 |
67 | model.eval()
68 |
69 | # the first several iterations may be very slow so skip them
70 | num_warmup = 5
71 | pure_inf_time = 0
72 |
73 | # benchmark with several samples and take the average
74 | for i, data in enumerate(data_loader):
75 | torch.cuda.synchronize()
76 | start_time = time.perf_counter()
77 | with torch.no_grad():
78 | model(return_loss=False, rescale=True, **data)
79 |
80 | torch.cuda.synchronize()
81 | elapsed = time.perf_counter() - start_time
82 |
83 | if i >= num_warmup:
84 | pure_inf_time += elapsed
85 | if (i + 1) % args.log_interval == 0:
86 | fps = (i + 1 - num_warmup) / pure_inf_time
87 | print(f'Done image [{i + 1:<3}/ {args.samples}], '
88 | f'fps: {fps:.1f} img / s')
89 |
90 | if (i + 1) == args.samples:
91 | pure_inf_time += elapsed
92 | fps = (i + 1 - num_warmup) / pure_inf_time
93 | print(f'Overall fps: {fps:.1f} img / s')
94 | break
95 |
96 |
97 | if __name__ == '__main__':
98 | main()
99 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_fpn_nus.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | # Voxel size for voxel encoder
3 | # Usually voxel size is changed consistently with the point cloud range
4 | # If point cloud range is modified, do remember to change all related
5 | # keys in the config.
6 | voxel_size = [0.25, 0.25, 8]
7 | model = dict(
8 | type='MVXFasterRCNN',
9 | pts_voxel_layer=dict(
10 | max_num_points=64,
11 | point_cloud_range=[-50, -50, -5, 50, 50, 3],
12 | voxel_size=voxel_size,
13 | max_voxels=(30000, 40000)),
14 | pts_voxel_encoder=dict(
15 | type='HardVFE',
16 | in_channels=4,
17 | feat_channels=[64, 64],
18 | with_distance=False,
19 | voxel_size=voxel_size,
20 | with_cluster_center=True,
21 | with_voxel_center=True,
22 | point_cloud_range=[-50, -50, -5, 50, 50, 3],
23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
24 | pts_middle_encoder=dict(
25 | type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
26 | pts_backbone=dict(
27 | type='SECOND',
28 | in_channels=64,
29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
30 | layer_nums=[3, 5, 5],
31 | layer_strides=[2, 2, 2],
32 | out_channels=[64, 128, 256]),
33 | pts_neck=dict(
34 | type='FPN',
35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
36 | act_cfg=dict(type='ReLU'),
37 | in_channels=[64, 128, 256],
38 | out_channels=256,
39 | start_level=0,
40 | num_outs=3),
41 | pts_bbox_head=dict(
42 | type='Anchor3DHead',
43 | num_classes=10,
44 | in_channels=256,
45 | feat_channels=256,
46 | use_direction_classifier=True,
47 | anchor_generator=dict(
48 | type='AlignedAnchor3DRangeGenerator',
49 | ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
50 | scales=[1, 2, 4],
51 | sizes=[
52 | [0.8660, 2.5981, 1.], # 1.5/sqrt(3)
53 | [0.5774, 1.7321, 1.], # 1/sqrt(3)
54 | [1., 1., 1.],
55 | [0.4, 0.4, 1],
56 | ],
57 | custom_values=[0, 0],
58 | rotations=[0, 1.57],
59 | reshape_out=True),
60 | assigner_per_size=False,
61 | diff_rad_by_sin=True,
62 | dir_offset=0.7854, # pi/4
63 | dir_limit_offset=0,
64 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
65 | loss_cls=dict(
66 | type='FocalLoss',
67 | use_sigmoid=True,
68 | gamma=2.0,
69 | alpha=0.25,
70 | loss_weight=1.0),
71 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
72 | loss_dir=dict(
73 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
74 | # model training and testing settings
75 | train_cfg=dict(
76 | pts=dict(
77 | assigner=dict(
78 | type='MaxIoUAssigner',
79 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
80 | pos_iou_thr=0.6,
81 | neg_iou_thr=0.3,
82 | min_pos_iou=0.3,
83 | ignore_iof_thr=-1),
84 | allowed_border=0,
85 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
86 | pos_weight=-1,
87 | debug=False)),
88 | test_cfg=dict(
89 | pts=dict(
90 | use_rotate_nms=True,
91 | nms_across_levels=False,
92 | nms_pre=1000,
93 | nms_thr=0.2,
94 | score_thr=0.05,
95 | min_bbox_size=0,
96 | max_num=500)))
97 |
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/nus-mono3d.py:
--------------------------------------------------------------------------------
1 | dataset_type = 'CustomNuScenesMonoDataset'
2 | data_root = 'data/nuscenes/'
3 | class_names = [
4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
6 | ]
7 | # Input modality for nuScenes dataset, this is consistent with the submission
8 | # format which requires the information in input_modality.
9 | input_modality = dict(
10 | use_lidar=False,
11 | use_camera=True,
12 | use_radar=False,
13 | use_map=False,
14 | use_external=False)
15 | img_norm_cfg = dict(
16 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
17 | train_pipeline = [
18 | dict(type='LoadImageFromFileMono3D'),
19 | dict(
20 | type='LoadAnnotations3D',
21 | with_bbox=True,
22 | with_label=True,
23 | with_attr_label=True,
24 | with_bbox_3d=True,
25 | with_label_3d=True,
26 | with_bbox_depth=True),
27 | dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),
28 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
29 | dict(type='Normalize', **img_norm_cfg),
30 | dict(type='Pad', size_divisor=32),
31 | dict(type='DefaultFormatBundle3D', class_names=class_names),
32 | dict(
33 | type='Collect3D',
34 | keys=[
35 | 'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',
36 | 'gt_labels_3d', 'centers2d', 'depths'
37 | ]),
38 | ]
39 | test_pipeline = [
40 | dict(type='LoadImageFromFileMono3D'),
41 | dict(
42 | type='MultiScaleFlipAug',
43 | scale_factor=1.0,
44 | flip=False,
45 | transforms=[
46 | dict(type='RandomFlip3D'),
47 | dict(type='Normalize', **img_norm_cfg),
48 | dict(type='Pad', size_divisor=32),
49 | dict(
50 | type='DefaultFormatBundle3D',
51 | class_names=class_names,
52 | with_label=False),
53 | dict(type='Collect3D', keys=['img']),
54 | ])
55 | ]
56 | # construct a pipeline for data and gt loading in show function
57 | # please keep its loading function consistent with test_pipeline (e.g. client)
58 | eval_pipeline = [
59 | dict(type='LoadImageFromFileMono3D'),
60 | dict(
61 | type='DefaultFormatBundle3D',
62 | class_names=class_names,
63 | with_label=False),
64 | dict(type='Collect3D', keys=['img'])
65 | ]
66 |
67 | data = dict(
68 | samples_per_gpu=2,
69 | workers_per_gpu=2,
70 | train=dict(
71 | type=dataset_type,
72 | data_root=data_root,
73 | ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json',
74 | img_prefix=data_root,
75 | classes=class_names,
76 | pipeline=train_pipeline,
77 | modality=input_modality,
78 | test_mode=False,
79 | box_type_3d='Camera'),
80 | val=dict(
81 | type=dataset_type,
82 | data_root=data_root,
83 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
84 | img_prefix=data_root,
85 | classes=class_names,
86 | pipeline=test_pipeline,
87 | modality=input_modality,
88 | test_mode=True,
89 | box_type_3d='Camera'),
90 | test=dict(
91 | type=dataset_type,
92 | data_root=data_root,
93 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
94 | img_prefix=data_root,
95 | classes=class_names,
96 | pipeline=test_pipeline,
97 | modality=input_modality,
98 | test_mode=True,
99 | box_type_3d='Camera'))
100 | evaluation = dict(interval=2)
101 |
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/sunrgbd-3d-10class.py:
--------------------------------------------------------------------------------
1 | dataset_type = 'SUNRGBDDataset'
2 | data_root = 'data/sunrgbd/'
3 | class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
4 | 'night_stand', 'bookshelf', 'bathtub')
5 | train_pipeline = [
6 | dict(
7 | type='LoadPointsFromFile',
8 | coord_type='DEPTH',
9 | shift_height=True,
10 | load_dim=6,
11 | use_dim=[0, 1, 2]),
12 | dict(type='LoadAnnotations3D'),
13 | dict(
14 | type='RandomFlip3D',
15 | sync_2d=False,
16 | flip_ratio_bev_horizontal=0.5,
17 | ),
18 | dict(
19 | type='GlobalRotScaleTrans',
20 | rot_range=[-0.523599, 0.523599],
21 | scale_ratio_range=[0.85, 1.15],
22 | shift_height=True),
23 | dict(type='PointSample', num_points=20000),
24 | dict(type='DefaultFormatBundle3D', class_names=class_names),
25 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
26 | ]
27 | test_pipeline = [
28 | dict(
29 | type='LoadPointsFromFile',
30 | coord_type='DEPTH',
31 | shift_height=True,
32 | load_dim=6,
33 | use_dim=[0, 1, 2]),
34 | dict(
35 | type='MultiScaleFlipAug3D',
36 | img_scale=(1333, 800),
37 | pts_scale_ratio=1,
38 | flip=False,
39 | transforms=[
40 | dict(
41 | type='GlobalRotScaleTrans',
42 | rot_range=[0, 0],
43 | scale_ratio_range=[1., 1.],
44 | translation_std=[0, 0, 0]),
45 | dict(
46 | type='RandomFlip3D',
47 | sync_2d=False,
48 | flip_ratio_bev_horizontal=0.5,
49 | ),
50 | dict(type='PointSample', num_points=20000),
51 | dict(
52 | type='DefaultFormatBundle3D',
53 | class_names=class_names,
54 | with_label=False),
55 | dict(type='Collect3D', keys=['points'])
56 | ])
57 | ]
58 | # construct a pipeline for data and gt loading in show function
59 | # please keep its loading function consistent with test_pipeline (e.g. client)
60 | eval_pipeline = [
61 | dict(
62 | type='LoadPointsFromFile',
63 | coord_type='DEPTH',
64 | shift_height=False,
65 | load_dim=6,
66 | use_dim=[0, 1, 2]),
67 | dict(
68 | type='DefaultFormatBundle3D',
69 | class_names=class_names,
70 | with_label=False),
71 | dict(type='Collect3D', keys=['points'])
72 | ]
73 |
74 | data = dict(
75 | samples_per_gpu=16,
76 | workers_per_gpu=4,
77 | train=dict(
78 | type='RepeatDataset',
79 | times=5,
80 | dataset=dict(
81 | type=dataset_type,
82 | data_root=data_root,
83 | ann_file=data_root + 'sunrgbd_infos_train.pkl',
84 | pipeline=train_pipeline,
85 | classes=class_names,
86 | filter_empty_gt=False,
87 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
88 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
89 | box_type_3d='Depth')),
90 | val=dict(
91 | type=dataset_type,
92 | data_root=data_root,
93 | ann_file=data_root + 'sunrgbd_infos_val.pkl',
94 | pipeline=test_pipeline,
95 | classes=class_names,
96 | test_mode=True,
97 | box_type_3d='Depth'),
98 | test=dict(
99 | type=dataset_type,
100 | data_root=data_root,
101 | ann_file=data_root + 'sunrgbd_infos_val.pkl',
102 | pipeline=test_pipeline,
103 | classes=class_names,
104 | test_mode=True,
105 | box_type_3d='Depth'))
106 |
107 | evaluation = dict(pipeline=eval_pipeline)
108 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/evaluation/eval_hooks.py:
--------------------------------------------------------------------------------
1 |
2 | # Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,
3 | # in order to avoid strong version dependency, we did not directly
4 | # inherit EvalHook but BaseDistEvalHook.
5 |
6 | import bisect
7 | import os.path as osp
8 |
9 | import mmcv
10 | import torch.distributed as dist
11 | from mmcv.runner import DistEvalHook as BaseDistEvalHook
12 | from mmcv.runner import EvalHook as BaseEvalHook
13 | from torch.nn.modules.batchnorm import _BatchNorm
14 | from mmdet.core.evaluation.eval_hooks import DistEvalHook
15 |
16 |
17 | def _calc_dynamic_intervals(start_interval, dynamic_interval_list):
18 | assert mmcv.is_list_of(dynamic_interval_list, tuple)
19 |
20 | dynamic_milestones = [0]
21 | dynamic_milestones.extend(
22 | [dynamic_interval[0] for dynamic_interval in dynamic_interval_list])
23 | dynamic_intervals = [start_interval]
24 | dynamic_intervals.extend(
25 | [dynamic_interval[1] for dynamic_interval in dynamic_interval_list])
26 | return dynamic_milestones, dynamic_intervals
27 |
28 |
29 | class CustomDistEvalHook(BaseDistEvalHook):
30 |
31 | def __init__(self, *args, dynamic_intervals=None, **kwargs):
32 | super(CustomDistEvalHook, self).__init__(*args, **kwargs)
33 | self.use_dynamic_intervals = dynamic_intervals is not None
34 | if self.use_dynamic_intervals:
35 | self.dynamic_milestones, self.dynamic_intervals = \
36 | _calc_dynamic_intervals(self.interval, dynamic_intervals)
37 |
38 | def _decide_interval(self, runner):
39 | if self.use_dynamic_intervals:
40 | progress = runner.epoch if self.by_epoch else runner.iter
41 | step = bisect.bisect(self.dynamic_milestones, (progress + 1))
42 | # Dynamically modify the evaluation interval
43 | self.interval = self.dynamic_intervals[step - 1]
44 |
45 | def before_train_epoch(self, runner):
46 | """Evaluate the model only at the start of training by epoch."""
47 | self._decide_interval(runner)
48 | super().before_train_epoch(runner)
49 |
50 | def before_train_iter(self, runner):
51 | self._decide_interval(runner)
52 | super().before_train_iter(runner)
53 |
54 | def _do_evaluate(self, runner):
55 | """perform evaluation and save ckpt."""
56 | # Synchronization of BatchNorm's buffer (running_mean
57 | # and running_var) is not supported in the DDP of pytorch,
58 | # which may cause the inconsistent performance of models in
59 | # different ranks, so we broadcast BatchNorm's buffers
60 | # of rank 0 to other ranks to avoid this.
61 | if self.broadcast_bn_buffer:
62 | model = runner.model
63 | for name, module in model.named_modules():
64 | if isinstance(module,
65 | _BatchNorm) and module.track_running_stats:
66 | dist.broadcast(module.running_var, 0)
67 | dist.broadcast(module.running_mean, 0)
68 |
69 | if not self._should_evaluate(runner):
70 | return
71 |
72 | tmpdir = self.tmpdir
73 | if tmpdir is None:
74 | tmpdir = osp.join(runner.work_dir, '.eval_hook')
75 |
76 | from projects.mmdet3d_plugin.bevformer.apis.test import custom_multi_gpu_test # to solve circlur import
77 |
78 | results = custom_multi_gpu_test(
79 | runner.model,
80 | self.dataloader,
81 | tmpdir=tmpdir,
82 | gpu_collect=self.gpu_collect)
83 | if runner.rank == 0:
84 | print('\n')
85 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
86 |
87 | key_score = self.evaluate(runner, results)
88 |
89 | if self.save_best:
90 | self._save_ckpt(runner, key_score)
91 |
92 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_second_secfpn_waymo.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | # Voxel size for voxel encoder
3 | # Usually voxel size is changed consistently with the point cloud range
4 | # If point cloud range is modified, do remember to change all related
5 | # keys in the config.
6 | voxel_size = [0.08, 0.08, 0.1]
7 | model = dict(
8 | type='VoxelNet',
9 | voxel_layer=dict(
10 | max_num_points=10,
11 | point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4],
12 | voxel_size=voxel_size,
13 | max_voxels=(80000, 90000)),
14 | voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
15 | middle_encoder=dict(
16 | type='SparseEncoder',
17 | in_channels=5,
18 | sparse_shape=[61, 1280, 1920],
19 | order=('conv', 'norm', 'act')),
20 | backbone=dict(
21 | type='SECOND',
22 | in_channels=384,
23 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
24 | layer_nums=[5, 5],
25 | layer_strides=[1, 2],
26 | out_channels=[128, 256]),
27 | neck=dict(
28 | type='SECONDFPN',
29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
30 | in_channels=[128, 256],
31 | upsample_strides=[1, 2],
32 | out_channels=[256, 256]),
33 | bbox_head=dict(
34 | type='Anchor3DHead',
35 | num_classes=3,
36 | in_channels=512,
37 | feat_channels=512,
38 | use_direction_classifier=True,
39 | anchor_generator=dict(
40 | type='AlignedAnchor3DRangeGenerator',
41 | ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345],
42 | [-76.8, -51.2, 0, 76.8, 51.2, 0],
43 | [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]],
44 | sizes=[
45 | [2.08, 4.73, 1.77], # car
46 | [0.84, 0.91, 1.74], # pedestrian
47 | [0.84, 1.81, 1.77] # cyclist
48 | ],
49 | rotations=[0, 1.57],
50 | reshape_out=False),
51 | diff_rad_by_sin=True,
52 | dir_offset=0.7854, # pi/4
53 | dir_limit_offset=0,
54 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
55 | loss_cls=dict(
56 | type='FocalLoss',
57 | use_sigmoid=True,
58 | gamma=2.0,
59 | alpha=0.25,
60 | loss_weight=1.0),
61 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
62 | loss_dir=dict(
63 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
64 | # model training and testing settings
65 | train_cfg=dict(
66 | assigner=[
67 | dict( # car
68 | type='MaxIoUAssigner',
69 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
70 | pos_iou_thr=0.55,
71 | neg_iou_thr=0.4,
72 | min_pos_iou=0.4,
73 | ignore_iof_thr=-1),
74 | dict( # pedestrian
75 | type='MaxIoUAssigner',
76 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
77 | pos_iou_thr=0.5,
78 | neg_iou_thr=0.3,
79 | min_pos_iou=0.3,
80 | ignore_iof_thr=-1),
81 | dict( # cyclist
82 | type='MaxIoUAssigner',
83 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
84 | pos_iou_thr=0.5,
85 | neg_iou_thr=0.3,
86 | min_pos_iou=0.3,
87 | ignore_iof_thr=-1)
88 | ],
89 | allowed_border=0,
90 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
91 | pos_weight=-1,
92 | debug=False),
93 | test_cfg=dict(
94 | use_rotate_nms=True,
95 | nms_across_levels=False,
96 | nms_pre=4096,
97 | nms_thr=0.25,
98 | score_thr=0.1,
99 | min_bbox_size=0,
100 | max_num=500))
101 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/imvotenet_image.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='ImVoteNet',
3 | img_backbone=dict(
4 | type='ResNet',
5 | depth=50,
6 | num_stages=4,
7 | out_indices=(0, 1, 2, 3),
8 | frozen_stages=1,
9 | norm_cfg=dict(type='BN', requires_grad=False),
10 | norm_eval=True,
11 | style='caffe'),
12 | img_neck=dict(
13 | type='FPN',
14 | in_channels=[256, 512, 1024, 2048],
15 | out_channels=256,
16 | num_outs=5),
17 | img_rpn_head=dict(
18 | type='RPNHead',
19 | in_channels=256,
20 | feat_channels=256,
21 | anchor_generator=dict(
22 | type='AnchorGenerator',
23 | scales=[8],
24 | ratios=[0.5, 1.0, 2.0],
25 | strides=[4, 8, 16, 32, 64]),
26 | bbox_coder=dict(
27 | type='DeltaXYWHBBoxCoder',
28 | target_means=[.0, .0, .0, .0],
29 | target_stds=[1.0, 1.0, 1.0, 1.0]),
30 | loss_cls=dict(
31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
33 | img_roi_head=dict(
34 | type='StandardRoIHead',
35 | bbox_roi_extractor=dict(
36 | type='SingleRoIExtractor',
37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
38 | out_channels=256,
39 | featmap_strides=[4, 8, 16, 32]),
40 | bbox_head=dict(
41 | type='Shared2FCBBoxHead',
42 | in_channels=256,
43 | fc_out_channels=1024,
44 | roi_feat_size=7,
45 | num_classes=10,
46 | bbox_coder=dict(
47 | type='DeltaXYWHBBoxCoder',
48 | target_means=[0., 0., 0., 0.],
49 | target_stds=[0.1, 0.1, 0.2, 0.2]),
50 | reg_class_agnostic=False,
51 | loss_cls=dict(
52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
54 |
55 | # model training and testing settings
56 | train_cfg=dict(
57 | img_rpn=dict(
58 | assigner=dict(
59 | type='MaxIoUAssigner',
60 | pos_iou_thr=0.7,
61 | neg_iou_thr=0.3,
62 | min_pos_iou=0.3,
63 | match_low_quality=True,
64 | ignore_iof_thr=-1),
65 | sampler=dict(
66 | type='RandomSampler',
67 | num=256,
68 | pos_fraction=0.5,
69 | neg_pos_ub=-1,
70 | add_gt_as_proposals=False),
71 | allowed_border=-1,
72 | pos_weight=-1,
73 | debug=False),
74 | img_rpn_proposal=dict(
75 | nms_across_levels=False,
76 | nms_pre=2000,
77 | nms_post=1000,
78 | max_per_img=1000,
79 | nms=dict(type='nms', iou_threshold=0.7),
80 | min_bbox_size=0),
81 | img_rcnn=dict(
82 | assigner=dict(
83 | type='MaxIoUAssigner',
84 | pos_iou_thr=0.5,
85 | neg_iou_thr=0.5,
86 | min_pos_iou=0.5,
87 | match_low_quality=False,
88 | ignore_iof_thr=-1),
89 | sampler=dict(
90 | type='RandomSampler',
91 | num=512,
92 | pos_fraction=0.25,
93 | neg_pos_ub=-1,
94 | add_gt_as_proposals=True),
95 | pos_weight=-1,
96 | debug=False)),
97 | test_cfg=dict(
98 | img_rpn=dict(
99 | nms_across_levels=False,
100 | nms_pre=1000,
101 | nms_post=1000,
102 | max_per_img=1000,
103 | nms=dict(type='nms', iou_threshold=0.7),
104 | min_bbox_size=0),
105 | img_rcnn=dict(
106 | score_thr=0.05,
107 | nms=dict(type='nms', iou_threshold=0.5),
108 | max_per_img=100)))
109 |
--------------------------------------------------------------------------------
/hf_guide.md:
--------------------------------------------------------------------------------
1 | # mmlab 代码萤火适配指引
2 |
3 | 下面以 BEVFormer 为例,介绍 mmlab 代码适配到萤火的关键步骤。
4 |
5 | ## 适配 1: 启动方式
6 |
7 | 萤火集群要求启动分布式时 `bind_numa`,因此推荐使用 `torch.multiprocessing.spawn` 启动,不推荐使用 `torch.distributed.launch`。
8 |
9 | 修改前
10 |
11 | ```
12 | def main():
13 | args = parse_args()
14 | ...
15 |
16 | if __name__ == '__main__':
17 | main()
18 | ```
19 |
20 | 修改后
21 |
22 | ```
23 | import hfai
24 |
25 | def main(local_rank, args):
26 | ...
27 |
28 | if __name__ == '__main__':
29 | args = parse_args()
30 | ngpus = torch.cuda.device_count()
31 | hfai.multiprocessing.spawn(main, args=(args,), nprocs=ngpus, bind_numa=True)
32 | ```
33 |
34 | ## 适配 2: 初始化分布式参数
35 | 启动萤火任务时,我们通过 --num-nodes 指定节点的数量,每个节点有 8 台 GPU。
36 |
37 | 萤火集群中的环境变量含义如下:
38 | - `world_size` 代表节点的数量
39 | - `rank` 代表当前节点的 id
40 |
41 | 因此需要对初始化分布式部分进行修改。
42 |
43 | 修改前
44 |
45 | ```
46 | def main(local_rank, args):
47 | ...
48 | if args.launcher == 'none':
49 | distributed = False
50 | else:
51 | distributed = True
52 | init_dist(args.launcher, **cfg.dist_params)
53 | # re-set gpu_ids with distributed training mode
54 | _, world_size = get_dist_info()
55 | cfg.gpu_ids = range(world_size)
56 | ```
57 |
58 | 修改后
59 |
60 | ```
61 | def main(local_rank, args):
62 | ...
63 | if args.launcher == 'none':
64 | distributed = False
65 | rank = 0
66 | else:
67 | distributed = True
68 | # init distributed env first, since logger depends on the dist info.
69 | ip = os.environ.get("MASTER_ADDR", "127.0.0.1")
70 | port = os.environ.get("MASTER_PORT", "2223")
71 | hosts = int(os.environ.get("WORLD_SIZE", 1)) # number of nodes
72 | rank = int(os.environ.get("RANK", 0)) # node id
73 | gpus = torch.cuda.device_count() # gpus per node
74 | dist.init_process_group(
75 | backend="nccl", init_method=f"tcp://{ip}:{port}", world_size=hosts * gpus, rank=rank * gpus + local_rank
76 | )
77 | torch.cuda.set_device(local_rank)
78 | # re-set gpu_ids with distributed training mode
79 | _, world_size = get_dist_info()
80 | cfg.gpu_ids = range(world_size)
81 | ```
82 |
83 | ## 适配 3: 打断继续训练
84 |
85 | 萤火集群中的任务都会参与分时调度,因此任务需要支持端点续跑。`mmcv.runner` 会自动在每个 epoch 结束保存 checkpoint,因此需要完成下面两步,保证打断后可以继续训练。
86 |
87 | 1. 训练时指定保存目录 --work-dir
88 | 2. 启动时增加 --auto-resume 参数
89 |
90 |
91 | ## 适配 4: 保存 config
92 |
93 | 为了保证集群多个进程的写文件操作只执行一次,需要在写文件前检查 `local_rank` 变量和 `rank` 变量。
94 |
95 | 修改前
96 |
97 | ```
98 | def main(local_rank, args):
99 | ...
100 | # dump config
101 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
102 | ```
103 |
104 | 修改后
105 |
106 | ```
107 | def main(local_rank, args):
108 | ...
109 | # dump config
110 | if local_rank == 0 and rank == 0:
111 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
112 |
113 | ```
114 |
115 | ## 适配 5:转换数据为 FFRecord
116 |
117 | 转换步骤请参考:[ffrecord_converter](https://github.com/HFAiLab/ffrecord_converters)
118 |
119 | ## 适配 6:使用 hfai 算子
120 |
121 | 幻方 AI 对一些常用的 AI 算子进行了重新研发,提升了模型整体训练效率,通过增加如下代码,可以自动替换相应算子。
122 |
123 | ```
124 | from hfai.nn import to_hfai
125 | def main(local_rank, args):
126 | ...
127 | model = to_hfai(model, contiguous_param=False, verbose=True, inplace=True)
128 | ```
129 | 注意:在 `batch_size` 较大时,提速明显。
130 |
131 |
132 |
133 | ## 常见问题整理
134 | 下面整理一些常见的问题,供用户参考。
135 |
136 | ### 问题 1:cannot pickle 'dict_values' object.
137 |
138 | 该问题是因为 nuscenes-devkit 中使用了 `dict_values` 的数据类型,导致 `dataloader` 设置 `num_workers` 大于 `0` 时,多进程无法 `pickle dump` 数据集。
139 |
140 | 修改步骤如下:
141 |
142 | 1 - 找到 nuscenes-devkit 安装目录 `$nuscenes_devkit_path`:
143 | ```
144 | python -c "import nuscenes; print(nuscenes.__file__)"
145 | ```
146 |
147 | 2 - 将 `$nuscenes_devkit_path/eval/detection/data_classes.py` 路径下的 `dict_values` 数据类型进行修改。
148 |
149 | 修改前
150 | ```
151 | self.class_names = self.class_range.keys()
152 | ```
153 | 修改后
154 | ```
155 | self.class_names = list(self.class_range.keys())
156 | ```
157 |
158 |
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/s3dis-3d-5class.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'S3DISDataset'
3 | data_root = './data/s3dis/'
4 | class_names = ('table', 'chair', 'sofa', 'bookcase', 'board')
5 | train_area = [1, 2, 3, 4, 6]
6 | test_area = 5
7 |
8 | train_pipeline = [
9 | dict(
10 | type='LoadPointsFromFile',
11 | coord_type='DEPTH',
12 | shift_height=True,
13 | load_dim=6,
14 | use_dim=[0, 1, 2, 3, 4, 5]),
15 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
16 | dict(type='PointSample', num_points=40000),
17 | dict(
18 | type='RandomFlip3D',
19 | sync_2d=False,
20 | flip_ratio_bev_horizontal=0.5,
21 | flip_ratio_bev_vertical=0.5),
22 | dict(
23 | type='GlobalRotScaleTrans',
24 | # following ScanNet dataset the rotation range is 5 degrees
25 | rot_range=[-0.087266, 0.087266],
26 | scale_ratio_range=[1.0, 1.0],
27 | shift_height=True),
28 | dict(type='DefaultFormatBundle3D', class_names=class_names),
29 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
30 | ]
31 | test_pipeline = [
32 | dict(
33 | type='LoadPointsFromFile',
34 | coord_type='DEPTH',
35 | shift_height=True,
36 | load_dim=6,
37 | use_dim=[0, 1, 2, 3, 4, 5]),
38 | dict(
39 | type='MultiScaleFlipAug3D',
40 | img_scale=(1333, 800),
41 | pts_scale_ratio=1,
42 | flip=False,
43 | transforms=[
44 | dict(
45 | type='GlobalRotScaleTrans',
46 | rot_range=[0, 0],
47 | scale_ratio_range=[1., 1.],
48 | translation_std=[0, 0, 0]),
49 | dict(
50 | type='RandomFlip3D',
51 | sync_2d=False,
52 | flip_ratio_bev_horizontal=0.5,
53 | flip_ratio_bev_vertical=0.5),
54 | dict(type='PointSample', num_points=40000),
55 | dict(
56 | type='DefaultFormatBundle3D',
57 | class_names=class_names,
58 | with_label=False),
59 | dict(type='Collect3D', keys=['points'])
60 | ])
61 | ]
62 | # construct a pipeline for data and gt loading in show function
63 | # please keep its loading function consistent with test_pipeline (e.g. client)
64 | eval_pipeline = [
65 | dict(
66 | type='LoadPointsFromFile',
67 | coord_type='DEPTH',
68 | shift_height=False,
69 | load_dim=6,
70 | use_dim=[0, 1, 2, 3, 4, 5]),
71 | dict(
72 | type='DefaultFormatBundle3D',
73 | class_names=class_names,
74 | with_label=False),
75 | dict(type='Collect3D', keys=['points'])
76 | ]
77 |
78 | data = dict(
79 | samples_per_gpu=8,
80 | workers_per_gpu=4,
81 | train=dict(
82 | type='RepeatDataset',
83 | times=5,
84 | dataset=dict(
85 | type='ConcatDataset',
86 | datasets=[
87 | dict(
88 | type=dataset_type,
89 | data_root=data_root,
90 | ann_file=data_root + f's3dis_infos_Area_{i}.pkl',
91 | pipeline=train_pipeline,
92 | filter_empty_gt=False,
93 | classes=class_names,
94 | box_type_3d='Depth') for i in train_area
95 | ],
96 | separate_eval=False)),
97 | val=dict(
98 | type=dataset_type,
99 | data_root=data_root,
100 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
101 | pipeline=test_pipeline,
102 | classes=class_names,
103 | test_mode=True,
104 | box_type_3d='Depth'),
105 | test=dict(
106 | type=dataset_type,
107 | data_root=data_root,
108 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
109 | pipeline=test_pipeline,
110 | classes=class_names,
111 | test_mode=True,
112 | box_type_3d='Depth'))
113 |
114 | evaluation = dict(pipeline=eval_pipeline)
115 |
--------------------------------------------------------------------------------
/projects/configs/datasets/custom_waymo-3d.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | # D5 in the config name means the whole dataset is divided into 5 folds
3 | # We only use one fold for efficient experiments
4 | dataset_type = 'CustomWaymoDataset'
5 | data_root = 'data/waymo/kitti_format/'
6 | file_client_args = dict(backend='disk')
7 | # Uncomment the following if use ceph or other file clients.
8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
9 | # for more details.
10 | # file_client_args = dict(
11 | # backend='petrel', path_mapping=dict(data='s3://waymo_data/'))
12 |
13 | img_norm_cfg = dict(
14 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
15 | class_names = ['Car', 'Pedestrian', 'Cyclist']
16 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
17 | input_modality = dict(use_lidar=False, use_camera=True)
18 | db_sampler = dict(
19 | data_root=data_root,
20 | info_path=data_root + 'waymo_dbinfos_train.pkl',
21 | rate=1.0,
22 | prepare=dict(
23 | filter_by_difficulty=[-1],
24 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
25 | classes=class_names,
26 | sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),
27 | points_loader=dict(
28 | type='LoadPointsFromFile',
29 | coord_type='LIDAR',
30 | load_dim=5,
31 | use_dim=[0, 1, 2, 3, 4],
32 | file_client_args=file_client_args))
33 |
34 |
35 |
36 | train_pipeline = [
37 | dict(type='LoadMultiViewImageFromFiles', to_float32=True),
38 | dict(type='PhotoMetricDistortionMultiViewImage'),
39 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
40 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
41 | dict(type='ObjectNameFilter', classes=class_names),
42 | dict(type='NormalizeMultiviewImage', **img_norm_cfg),
43 | dict(type='PadMultiViewImage', size_divisor=32),
44 | dict(type='DefaultFormatBundle3D', class_names=class_names),
45 | dict(type='CustomCollect3D', keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'])
46 | ]
47 |
48 |
49 | test_pipeline = [
50 | dict(type='LoadMultiViewImageFromFiles', to_float32=True),
51 | dict(type='NormalizeMultiviewImage', **img_norm_cfg),
52 | dict(type='PadMultiViewImage', size_divisor=32),
53 | dict(
54 | type='MultiScaleFlipAug3D',
55 | img_scale=(1920, 1280),
56 | pts_scale_ratio=1,
57 | flip=False,
58 | transforms=[
59 | dict(
60 | type='DefaultFormatBundle3D',
61 | class_names=class_names,
62 | with_label=False),
63 | dict(type='CustomCollect3D', keys=['img'])
64 | ])
65 | ]
66 |
67 |
68 | # construct a pipeline for data and gt loading in show function
69 | # please keep its loading function consistent with test_pipeline (e.g. client)
70 |
71 | data = dict(
72 | samples_per_gpu=2,
73 | workers_per_gpu=4,
74 | train=dict(
75 | type='RepeatDataset',
76 | times=2,
77 | dataset=dict(
78 | type=dataset_type,
79 | data_root=data_root,
80 | ann_file=data_root + 'waymo_infos_train.pkl',
81 | split='training',
82 | pipeline=train_pipeline,
83 | modality=input_modality,
84 | classes=class_names,
85 | test_mode=False,
86 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
87 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
88 | box_type_3d='LiDAR',
89 | # load one frame every five frames
90 | load_interval=5)),
91 | val=dict(
92 | type=dataset_type,
93 | data_root=data_root,
94 | ann_file=data_root + 'waymo_infos_val.pkl',
95 | split='training',
96 | pipeline=test_pipeline,
97 | modality=input_modality,
98 | classes=class_names,
99 | test_mode=True,
100 | box_type_3d='LiDAR'),
101 | test=dict(
102 | type=dataset_type,
103 | data_root=data_root,
104 | ann_file=data_root + 'waymo_infos_val.pkl',
105 | split='training',
106 | pipeline=test_pipeline,
107 | modality=input_modality,
108 | classes=class_names,
109 | test_mode=True,
110 | box_type_3d='LiDAR'))
111 |
112 | evaluation = dict(interval=24, pipeline=test_pipeline)
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/samplers/group_sampler.py:
--------------------------------------------------------------------------------
1 |
2 | # Copyright (c) OpenMMLab. All rights reserved.
3 | import math
4 |
5 | import numpy as np
6 | import torch
7 | from mmcv.runner import get_dist_info
8 | from torch.utils.data import Sampler
9 | from .sampler import SAMPLER
10 | import random
11 | from IPython import embed
12 |
13 |
14 | @SAMPLER.register_module()
15 | class DistributedGroupSampler(Sampler):
16 | """Sampler that restricts data loading to a subset of the dataset.
17 | It is especially useful in conjunction with
18 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
19 | process can pass a DistributedSampler instance as a DataLoader sampler,
20 | and load a subset of the original dataset that is exclusive to it.
21 | .. note::
22 | Dataset is assumed to be of constant size.
23 | Arguments:
24 | dataset: Dataset used for sampling.
25 | num_replicas (optional): Number of processes participating in
26 | distributed training.
27 | rank (optional): Rank of the current process within num_replicas.
28 | seed (int, optional): random seed used to shuffle the sampler if
29 | ``shuffle=True``. This number should be identical across all
30 | processes in the distributed group. Default: 0.
31 | """
32 |
33 | def __init__(self,
34 | dataset,
35 | samples_per_gpu=1,
36 | num_replicas=None,
37 | rank=None,
38 | seed=0):
39 | _rank, _num_replicas = get_dist_info()
40 | if num_replicas is None:
41 | num_replicas = _num_replicas
42 | if rank is None:
43 | rank = _rank
44 | self.dataset = dataset
45 | self.samples_per_gpu = samples_per_gpu
46 | self.num_replicas = num_replicas
47 | self.rank = rank
48 | self.epoch = 0
49 | self.seed = seed if seed is not None else 0
50 |
51 | assert hasattr(self.dataset, 'flag')
52 | self.flag = self.dataset.flag
53 | self.group_sizes = np.bincount(self.flag)
54 |
55 | self.num_samples = 0
56 | for i, j in enumerate(self.group_sizes):
57 | self.num_samples += int(
58 | math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
59 | self.num_replicas)) * self.samples_per_gpu
60 | self.total_size = self.num_samples * self.num_replicas
61 |
62 | def __iter__(self):
63 | # deterministically shuffle based on epoch
64 | g = torch.Generator()
65 | g.manual_seed(self.epoch + self.seed)
66 |
67 | indices = []
68 | for i, size in enumerate(self.group_sizes):
69 | if size > 0:
70 | indice = np.where(self.flag == i)[0]
71 | assert len(indice) == size
72 | # add .numpy() to avoid bug when selecting indice in parrots.
73 | # TODO: check whether torch.randperm() can be replaced by
74 | # numpy.random.permutation().
75 | indice = indice[list(
76 | torch.randperm(int(size), generator=g).numpy())].tolist()
77 | extra = int(
78 | math.ceil(
79 | size * 1.0 / self.samples_per_gpu / self.num_replicas)
80 | ) * self.samples_per_gpu * self.num_replicas - len(indice)
81 | # pad indice
82 | tmp = indice.copy()
83 | for _ in range(extra // size):
84 | indice.extend(tmp)
85 | indice.extend(tmp[:extra % size])
86 | indices.extend(indice)
87 |
88 | assert len(indices) == self.total_size
89 |
90 | indices = [
91 | indices[j] for i in list(
92 | torch.randperm(
93 | len(indices) // self.samples_per_gpu, generator=g))
94 | for j in range(i * self.samples_per_gpu, (i + 1) *
95 | self.samples_per_gpu)
96 | ]
97 |
98 | # subsample
99 | offset = self.num_samples * self.rank
100 | indices = indices[offset:offset + self.num_samples]
101 | assert len(indices) == self.num_samples
102 |
103 | return iter(indices)
104 |
105 | def __len__(self):
106 | return self.num_samples
107 |
108 | def set_epoch(self, epoch):
109 | self.epoch = epoch
110 |
111 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | # Voxel size for voxel encoder
3 | # Usually voxel size is changed consistently with the point cloud range
4 | # If point cloud range is modified, do remember to change all related
5 | # keys in the config.
6 | voxel_size = [0.32, 0.32, 6]
7 | model = dict(
8 | type='MVXFasterRCNN',
9 | pts_voxel_layer=dict(
10 | max_num_points=20,
11 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
12 | voxel_size=voxel_size,
13 | max_voxels=(32000, 32000)),
14 | pts_voxel_encoder=dict(
15 | type='HardVFE',
16 | in_channels=5,
17 | feat_channels=[64],
18 | with_distance=False,
19 | voxel_size=voxel_size,
20 | with_cluster_center=True,
21 | with_voxel_center=True,
22 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
24 | pts_middle_encoder=dict(
25 | type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]),
26 | pts_backbone=dict(
27 | type='SECOND',
28 | in_channels=64,
29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
30 | layer_nums=[3, 5, 5],
31 | layer_strides=[1, 2, 2],
32 | out_channels=[64, 128, 256]),
33 | pts_neck=dict(
34 | type='SECONDFPN',
35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
36 | in_channels=[64, 128, 256],
37 | upsample_strides=[1, 2, 4],
38 | out_channels=[128, 128, 128]),
39 | pts_bbox_head=dict(
40 | type='Anchor3DHead',
41 | num_classes=3,
42 | in_channels=384,
43 | feat_channels=384,
44 | use_direction_classifier=True,
45 | anchor_generator=dict(
46 | type='AlignedAnchor3DRangeGenerator',
47 | ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],
48 | [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188],
49 | [-74.88, -74.88, 0, 74.88, 74.88, 0]],
50 | sizes=[
51 | [2.08, 4.73, 1.77], # car
52 | [0.84, 1.81, 1.77], # cyclist
53 | [0.84, 0.91, 1.74] # pedestrian
54 | ],
55 | rotations=[0, 1.57],
56 | reshape_out=False),
57 | diff_rad_by_sin=True,
58 | dir_offset=0.7854, # pi/4
59 | dir_limit_offset=0,
60 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
61 | loss_cls=dict(
62 | type='FocalLoss',
63 | use_sigmoid=True,
64 | gamma=2.0,
65 | alpha=0.25,
66 | loss_weight=1.0),
67 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
68 | loss_dir=dict(
69 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
70 | # model training and testing settings
71 | train_cfg=dict(
72 | pts=dict(
73 | assigner=[
74 | dict( # car
75 | type='MaxIoUAssigner',
76 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
77 | pos_iou_thr=0.55,
78 | neg_iou_thr=0.4,
79 | min_pos_iou=0.4,
80 | ignore_iof_thr=-1),
81 | dict( # cyclist
82 | type='MaxIoUAssigner',
83 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
84 | pos_iou_thr=0.5,
85 | neg_iou_thr=0.3,
86 | min_pos_iou=0.3,
87 | ignore_iof_thr=-1),
88 | dict( # pedestrian
89 | type='MaxIoUAssigner',
90 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
91 | pos_iou_thr=0.5,
92 | neg_iou_thr=0.3,
93 | min_pos_iou=0.3,
94 | ignore_iof_thr=-1),
95 | ],
96 | allowed_border=0,
97 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
98 | pos_weight=-1,
99 | debug=False)),
100 | test_cfg=dict(
101 | pts=dict(
102 | use_rotate_nms=True,
103 | nms_across_levels=False,
104 | nms_pre=4096,
105 | nms_thr=0.25,
106 | score_thr=0.1,
107 | min_bbox_size=0,
108 | max_num=500)))
109 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/grid_mask.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | from PIL import Image
5 | from mmcv.runner import force_fp32, auto_fp16
6 |
7 | class Grid(object):
8 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
9 | self.use_h = use_h
10 | self.use_w = use_w
11 | self.rotate = rotate
12 | self.offset = offset
13 | self.ratio = ratio
14 | self.mode=mode
15 | self.st_prob = prob
16 | self.prob = prob
17 |
18 | def set_prob(self, epoch, max_epoch):
19 | self.prob = self.st_prob * epoch / max_epoch
20 |
21 | def __call__(self, img, label):
22 | if np.random.rand() > self.prob:
23 | return img, label
24 | h = img.size(1)
25 | w = img.size(2)
26 | self.d1 = 2
27 | self.d2 = min(h, w)
28 | hh = int(1.5*h)
29 | ww = int(1.5*w)
30 | d = np.random.randint(self.d1, self.d2)
31 | if self.ratio == 1:
32 | self.l = np.random.randint(1, d)
33 | else:
34 | self.l = min(max(int(d*self.ratio+0.5),1),d-1)
35 | mask = np.ones((hh, ww), np.float32)
36 | st_h = np.random.randint(d)
37 | st_w = np.random.randint(d)
38 | if self.use_h:
39 | for i in range(hh//d):
40 | s = d*i + st_h
41 | t = min(s+self.l, hh)
42 | mask[s:t,:] *= 0
43 | if self.use_w:
44 | for i in range(ww//d):
45 | s = d*i + st_w
46 | t = min(s+self.l, ww)
47 | mask[:,s:t] *= 0
48 |
49 | r = np.random.randint(self.rotate)
50 | mask = Image.fromarray(np.uint8(mask))
51 | mask = mask.rotate(r)
52 | mask = np.asarray(mask)
53 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
54 |
55 | mask = torch.from_numpy(mask).float()
56 | if self.mode == 1:
57 | mask = 1-mask
58 |
59 | mask = mask.expand_as(img)
60 | if self.offset:
61 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float()
62 | offset = (1 - mask) * offset
63 | img = img * mask + offset
64 | else:
65 | img = img * mask
66 |
67 | return img, label
68 |
69 |
70 | class GridMask(nn.Module):
71 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
72 | super(GridMask, self).__init__()
73 | self.use_h = use_h
74 | self.use_w = use_w
75 | self.rotate = rotate
76 | self.offset = offset
77 | self.ratio = ratio
78 | self.mode = mode
79 | self.st_prob = prob
80 | self.prob = prob
81 | self.fp16_enable = False
82 | def set_prob(self, epoch, max_epoch):
83 | self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5
84 | @auto_fp16()
85 | def forward(self, x):
86 | if np.random.rand() > self.prob or not self.training:
87 | return x
88 | n,c,h,w = x.size()
89 | x = x.view(-1,h,w)
90 | hh = int(1.5*h)
91 | ww = int(1.5*w)
92 | d = np.random.randint(2, h)
93 | self.l = min(max(int(d*self.ratio+0.5),1),d-1)
94 | mask = np.ones((hh, ww), np.float32)
95 | st_h = np.random.randint(d)
96 | st_w = np.random.randint(d)
97 | if self.use_h:
98 | for i in range(hh//d):
99 | s = d*i + st_h
100 | t = min(s+self.l, hh)
101 | mask[s:t,:] *= 0
102 | if self.use_w:
103 | for i in range(ww//d):
104 | s = d*i + st_w
105 | t = min(s+self.l, ww)
106 | mask[:,s:t] *= 0
107 |
108 | r = np.random.randint(self.rotate)
109 | mask = Image.fromarray(np.uint8(mask))
110 | mask = mask.rotate(r)
111 | mask = np.asarray(mask)
112 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
113 |
114 | mask = torch.from_numpy(mask).to(x.dtype).cuda()
115 | if self.mode == 1:
116 | mask = 1-mask
117 | mask = mask.expand_as(x)
118 | if self.offset:
119 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).to(x.dtype).cuda()
120 | x = x * mask + offset * (1 - mask)
121 | else:
122 | x = x * mask
123 |
124 | return x.view(n,c,h,w)
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/scannet-3d-18class.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'ScanNetDataset'
3 | data_root = './data/scannet/'
4 | class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
5 | 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
6 | 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
7 | 'garbagebin')
8 | train_pipeline = [
9 | dict(
10 | type='LoadPointsFromFile',
11 | coord_type='DEPTH',
12 | shift_height=True,
13 | load_dim=6,
14 | use_dim=[0, 1, 2]),
15 | dict(
16 | type='LoadAnnotations3D',
17 | with_bbox_3d=True,
18 | with_label_3d=True,
19 | with_mask_3d=True,
20 | with_seg_3d=True),
21 | dict(type='GlobalAlignment', rotation_axis=2),
22 | dict(
23 | type='PointSegClassMapping',
24 | valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
25 | 36, 39),
26 | max_cat_id=40),
27 | dict(type='PointSample', num_points=40000),
28 | dict(
29 | type='RandomFlip3D',
30 | sync_2d=False,
31 | flip_ratio_bev_horizontal=0.5,
32 | flip_ratio_bev_vertical=0.5),
33 | dict(
34 | type='GlobalRotScaleTrans',
35 | rot_range=[-0.087266, 0.087266],
36 | scale_ratio_range=[1.0, 1.0],
37 | shift_height=True),
38 | dict(type='DefaultFormatBundle3D', class_names=class_names),
39 | dict(
40 | type='Collect3D',
41 | keys=[
42 | 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
43 | 'pts_instance_mask'
44 | ])
45 | ]
46 | test_pipeline = [
47 | dict(
48 | type='LoadPointsFromFile',
49 | coord_type='DEPTH',
50 | shift_height=True,
51 | load_dim=6,
52 | use_dim=[0, 1, 2]),
53 | dict(type='GlobalAlignment', rotation_axis=2),
54 | dict(
55 | type='MultiScaleFlipAug3D',
56 | img_scale=(1333, 800),
57 | pts_scale_ratio=1,
58 | flip=False,
59 | transforms=[
60 | dict(
61 | type='GlobalRotScaleTrans',
62 | rot_range=[0, 0],
63 | scale_ratio_range=[1., 1.],
64 | translation_std=[0, 0, 0]),
65 | dict(
66 | type='RandomFlip3D',
67 | sync_2d=False,
68 | flip_ratio_bev_horizontal=0.5,
69 | flip_ratio_bev_vertical=0.5),
70 | dict(type='PointSample', num_points=40000),
71 | dict(
72 | type='DefaultFormatBundle3D',
73 | class_names=class_names,
74 | with_label=False),
75 | dict(type='Collect3D', keys=['points'])
76 | ])
77 | ]
78 | # construct a pipeline for data and gt loading in show function
79 | # please keep its loading function consistent with test_pipeline (e.g. client)
80 | eval_pipeline = [
81 | dict(
82 | type='LoadPointsFromFile',
83 | coord_type='DEPTH',
84 | shift_height=False,
85 | load_dim=6,
86 | use_dim=[0, 1, 2]),
87 | dict(type='GlobalAlignment', rotation_axis=2),
88 | dict(
89 | type='DefaultFormatBundle3D',
90 | class_names=class_names,
91 | with_label=False),
92 | dict(type='Collect3D', keys=['points'])
93 | ]
94 |
95 | data = dict(
96 | samples_per_gpu=8,
97 | workers_per_gpu=4,
98 | train=dict(
99 | type='RepeatDataset',
100 | times=5,
101 | dataset=dict(
102 | type=dataset_type,
103 | data_root=data_root,
104 | ann_file=data_root + 'scannet_infos_train.pkl',
105 | pipeline=train_pipeline,
106 | filter_empty_gt=False,
107 | classes=class_names,
108 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
109 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
110 | box_type_3d='Depth')),
111 | val=dict(
112 | type=dataset_type,
113 | data_root=data_root,
114 | ann_file=data_root + 'scannet_infos_val.pkl',
115 | pipeline=test_pipeline,
116 | classes=class_names,
117 | test_mode=True,
118 | box_type_3d='Depth'),
119 | test=dict(
120 | type=dataset_type,
121 | data_root=data_root,
122 | ann_file=data_root + 'scannet_infos_val.pkl',
123 | pipeline=test_pipeline,
124 | classes=class_names,
125 | test_mode=True,
126 | box_type_3d='Depth'))
127 |
128 | evaluation = dict(pipeline=eval_pipeline)
129 |
--------------------------------------------------------------------------------
/projects/configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='MaskRCNN',
4 | pretrained='torchvision://resnet50',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | norm_cfg=dict(type='BN', requires_grad=True),
12 | norm_eval=True,
13 | style='pytorch'),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | roi_head=dict(
36 | type='StandardRoIHead',
37 | bbox_roi_extractor=dict(
38 | type='SingleRoIExtractor',
39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 | out_channels=256,
41 | featmap_strides=[4, 8, 16, 32]),
42 | bbox_head=dict(
43 | type='Shared2FCBBoxHead',
44 | in_channels=256,
45 | fc_out_channels=1024,
46 | roi_feat_size=7,
47 | num_classes=80,
48 | bbox_coder=dict(
49 | type='DeltaXYWHBBoxCoder',
50 | target_means=[0., 0., 0., 0.],
51 | target_stds=[0.1, 0.1, 0.2, 0.2]),
52 | reg_class_agnostic=False,
53 | loss_cls=dict(
54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
56 | mask_roi_extractor=dict(
57 | type='SingleRoIExtractor',
58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
59 | out_channels=256,
60 | featmap_strides=[4, 8, 16, 32]),
61 | mask_head=dict(
62 | type='FCNMaskHead',
63 | num_convs=4,
64 | in_channels=256,
65 | conv_out_channels=256,
66 | num_classes=80,
67 | loss_mask=dict(
68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
69 | # model training and testing settings
70 | train_cfg=dict(
71 | rpn=dict(
72 | assigner=dict(
73 | type='MaxIoUAssigner',
74 | pos_iou_thr=0.7,
75 | neg_iou_thr=0.3,
76 | min_pos_iou=0.3,
77 | match_low_quality=True,
78 | ignore_iof_thr=-1),
79 | sampler=dict(
80 | type='RandomSampler',
81 | num=256,
82 | pos_fraction=0.5,
83 | neg_pos_ub=-1,
84 | add_gt_as_proposals=False),
85 | allowed_border=-1,
86 | pos_weight=-1,
87 | debug=False),
88 | rpn_proposal=dict(
89 | nms_across_levels=False,
90 | nms_pre=2000,
91 | nms_post=1000,
92 | max_num=1000,
93 | nms_thr=0.7,
94 | min_bbox_size=0),
95 | rcnn=dict(
96 | assigner=dict(
97 | type='MaxIoUAssigner',
98 | pos_iou_thr=0.5,
99 | neg_iou_thr=0.5,
100 | min_pos_iou=0.5,
101 | match_low_quality=True,
102 | ignore_iof_thr=-1),
103 | sampler=dict(
104 | type='RandomSampler',
105 | num=512,
106 | pos_fraction=0.25,
107 | neg_pos_ub=-1,
108 | add_gt_as_proposals=True),
109 | mask_size=28,
110 | pos_weight=-1,
111 | debug=False)),
112 | test_cfg=dict(
113 | rpn=dict(
114 | nms_across_levels=False,
115 | nms_pre=1000,
116 | nms_post=1000,
117 | max_num=1000,
118 | nms_thr=0.7,
119 | min_bbox_size=0),
120 | rcnn=dict(
121 | score_thr=0.05,
122 | nms=dict(type='nms', iou_threshold=0.5),
123 | max_per_img=100,
124 | mask_thr_binary=0.5)))
125 |
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/scannet_seg-3d-20class.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'ScanNetSegDataset'
3 | data_root = './data/scannet/'
4 | class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',
5 | 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',
6 | 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink',
7 | 'bathtub', 'otherfurniture')
8 | num_points = 8192
9 | train_pipeline = [
10 | dict(
11 | type='LoadPointsFromFile',
12 | coord_type='DEPTH',
13 | shift_height=False,
14 | use_color=True,
15 | load_dim=6,
16 | use_dim=[0, 1, 2, 3, 4, 5]),
17 | dict(
18 | type='LoadAnnotations3D',
19 | with_bbox_3d=False,
20 | with_label_3d=False,
21 | with_mask_3d=False,
22 | with_seg_3d=True),
23 | dict(
24 | type='PointSegClassMapping',
25 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
26 | 33, 34, 36, 39),
27 | max_cat_id=40),
28 | dict(
29 | type='IndoorPatchPointSample',
30 | num_points=num_points,
31 | block_size=1.5,
32 | ignore_index=len(class_names),
33 | use_normalized_coord=False,
34 | enlarge_size=0.2,
35 | min_unique_num=None),
36 | dict(type='NormalizePointsColor', color_mean=None),
37 | dict(type='DefaultFormatBundle3D', class_names=class_names),
38 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
39 | ]
40 | test_pipeline = [
41 | dict(
42 | type='LoadPointsFromFile',
43 | coord_type='DEPTH',
44 | shift_height=False,
45 | use_color=True,
46 | load_dim=6,
47 | use_dim=[0, 1, 2, 3, 4, 5]),
48 | dict(type='NormalizePointsColor', color_mean=None),
49 | dict(
50 | # a wrapper in order to successfully call test function
51 | # actually we don't perform test-time-aug
52 | type='MultiScaleFlipAug3D',
53 | img_scale=(1333, 800),
54 | pts_scale_ratio=1,
55 | flip=False,
56 | transforms=[
57 | dict(
58 | type='GlobalRotScaleTrans',
59 | rot_range=[0, 0],
60 | scale_ratio_range=[1., 1.],
61 | translation_std=[0, 0, 0]),
62 | dict(
63 | type='RandomFlip3D',
64 | sync_2d=False,
65 | flip_ratio_bev_horizontal=0.0,
66 | flip_ratio_bev_vertical=0.0),
67 | dict(
68 | type='DefaultFormatBundle3D',
69 | class_names=class_names,
70 | with_label=False),
71 | dict(type='Collect3D', keys=['points'])
72 | ])
73 | ]
74 | # construct a pipeline for data and gt loading in show function
75 | # please keep its loading function consistent with test_pipeline (e.g. client)
76 | # we need to load gt seg_mask!
77 | eval_pipeline = [
78 | dict(
79 | type='LoadPointsFromFile',
80 | coord_type='DEPTH',
81 | shift_height=False,
82 | use_color=True,
83 | load_dim=6,
84 | use_dim=[0, 1, 2, 3, 4, 5]),
85 | dict(
86 | type='LoadAnnotations3D',
87 | with_bbox_3d=False,
88 | with_label_3d=False,
89 | with_mask_3d=False,
90 | with_seg_3d=True),
91 | dict(
92 | type='PointSegClassMapping',
93 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
94 | 33, 34, 36, 39),
95 | max_cat_id=40),
96 | dict(
97 | type='DefaultFormatBundle3D',
98 | with_label=False,
99 | class_names=class_names),
100 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
101 | ]
102 |
103 | data = dict(
104 | samples_per_gpu=8,
105 | workers_per_gpu=4,
106 | train=dict(
107 | type=dataset_type,
108 | data_root=data_root,
109 | ann_file=data_root + 'scannet_infos_train.pkl',
110 | pipeline=train_pipeline,
111 | classes=class_names,
112 | test_mode=False,
113 | ignore_index=len(class_names),
114 | scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy'),
115 | val=dict(
116 | type=dataset_type,
117 | data_root=data_root,
118 | ann_file=data_root + 'scannet_infos_val.pkl',
119 | pipeline=test_pipeline,
120 | classes=class_names,
121 | test_mode=True,
122 | ignore_index=len(class_names)),
123 | test=dict(
124 | type=dataset_type,
125 | data_root=data_root,
126 | ann_file=data_root + 'scannet_infos_val.pkl',
127 | pipeline=test_pipeline,
128 | classes=class_names,
129 | test_mode=True,
130 | ignore_index=len(class_names)))
131 |
132 | evaluation = dict(pipeline=eval_pipeline)
133 |
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/s3dis_seg-3d-13class.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'S3DISSegDataset'
3 | data_root = './data/s3dis/'
4 | class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',
5 | 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')
6 | num_points = 4096
7 | train_area = [1, 2, 3, 4, 6]
8 | test_area = 5
9 | train_pipeline = [
10 | dict(
11 | type='LoadPointsFromFile',
12 | coord_type='DEPTH',
13 | shift_height=False,
14 | use_color=True,
15 | load_dim=6,
16 | use_dim=[0, 1, 2, 3, 4, 5]),
17 | dict(
18 | type='LoadAnnotations3D',
19 | with_bbox_3d=False,
20 | with_label_3d=False,
21 | with_mask_3d=False,
22 | with_seg_3d=True),
23 | dict(
24 | type='PointSegClassMapping',
25 | valid_cat_ids=tuple(range(len(class_names))),
26 | max_cat_id=13),
27 | dict(
28 | type='IndoorPatchPointSample',
29 | num_points=num_points,
30 | block_size=1.0,
31 | ignore_index=len(class_names),
32 | use_normalized_coord=True,
33 | enlarge_size=0.2,
34 | min_unique_num=None),
35 | dict(type='NormalizePointsColor', color_mean=None),
36 | dict(type='DefaultFormatBundle3D', class_names=class_names),
37 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
38 | ]
39 | test_pipeline = [
40 | dict(
41 | type='LoadPointsFromFile',
42 | coord_type='DEPTH',
43 | shift_height=False,
44 | use_color=True,
45 | load_dim=6,
46 | use_dim=[0, 1, 2, 3, 4, 5]),
47 | dict(type='NormalizePointsColor', color_mean=None),
48 | dict(
49 | # a wrapper in order to successfully call test function
50 | # actually we don't perform test-time-aug
51 | type='MultiScaleFlipAug3D',
52 | img_scale=(1333, 800),
53 | pts_scale_ratio=1,
54 | flip=False,
55 | transforms=[
56 | dict(
57 | type='GlobalRotScaleTrans',
58 | rot_range=[0, 0],
59 | scale_ratio_range=[1., 1.],
60 | translation_std=[0, 0, 0]),
61 | dict(
62 | type='RandomFlip3D',
63 | sync_2d=False,
64 | flip_ratio_bev_horizontal=0.0,
65 | flip_ratio_bev_vertical=0.0),
66 | dict(
67 | type='DefaultFormatBundle3D',
68 | class_names=class_names,
69 | with_label=False),
70 | dict(type='Collect3D', keys=['points'])
71 | ])
72 | ]
73 | # construct a pipeline for data and gt loading in show function
74 | # please keep its loading function consistent with test_pipeline (e.g. client)
75 | # we need to load gt seg_mask!
76 | eval_pipeline = [
77 | dict(
78 | type='LoadPointsFromFile',
79 | coord_type='DEPTH',
80 | shift_height=False,
81 | use_color=True,
82 | load_dim=6,
83 | use_dim=[0, 1, 2, 3, 4, 5]),
84 | dict(
85 | type='LoadAnnotations3D',
86 | with_bbox_3d=False,
87 | with_label_3d=False,
88 | with_mask_3d=False,
89 | with_seg_3d=True),
90 | dict(
91 | type='PointSegClassMapping',
92 | valid_cat_ids=tuple(range(len(class_names))),
93 | max_cat_id=13),
94 | dict(
95 | type='DefaultFormatBundle3D',
96 | with_label=False,
97 | class_names=class_names),
98 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
99 | ]
100 |
101 | data = dict(
102 | samples_per_gpu=8,
103 | workers_per_gpu=4,
104 | # train on area 1, 2, 3, 4, 6
105 | # test on area 5
106 | train=dict(
107 | type=dataset_type,
108 | data_root=data_root,
109 | ann_files=[
110 | data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area
111 | ],
112 | pipeline=train_pipeline,
113 | classes=class_names,
114 | test_mode=False,
115 | ignore_index=len(class_names),
116 | scene_idxs=[
117 | data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy'
118 | for i in train_area
119 | ]),
120 | val=dict(
121 | type=dataset_type,
122 | data_root=data_root,
123 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',
124 | pipeline=test_pipeline,
125 | classes=class_names,
126 | test_mode=True,
127 | ignore_index=len(class_names),
128 | scene_idxs=data_root +
129 | f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'),
130 | test=dict(
131 | type=dataset_type,
132 | data_root=data_root,
133 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',
134 | pipeline=test_pipeline,
135 | classes=class_names,
136 | test_mode=True,
137 | ignore_index=len(class_names)))
138 |
139 | evaluation = dict(pipeline=eval_pipeline)
140 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from mmdet.core.bbox import BaseBBoxCoder
4 | from mmdet.core.bbox.builder import BBOX_CODERS
5 | from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox
6 | import numpy as np
7 |
8 |
9 | @BBOX_CODERS.register_module()
10 | class NMSFreeCoder(BaseBBoxCoder):
11 | """Bbox coder for NMS-free detector.
12 | Args:
13 | pc_range (list[float]): Range of point cloud.
14 | post_center_range (list[float]): Limit of the center.
15 | Default: None.
16 | max_num (int): Max number to be kept. Default: 100.
17 | score_threshold (float): Threshold to filter boxes based on score.
18 | Default: None.
19 | code_size (int): Code size of bboxes. Default: 9
20 | """
21 |
22 | def __init__(self,
23 | pc_range,
24 | voxel_size=None,
25 | post_center_range=None,
26 | max_num=100,
27 | score_threshold=None,
28 | num_classes=10):
29 | self.pc_range = pc_range
30 | self.voxel_size = voxel_size
31 | self.post_center_range = post_center_range
32 | self.max_num = max_num
33 | self.score_threshold = score_threshold
34 | self.num_classes = num_classes
35 |
36 | def encode(self):
37 |
38 | pass
39 |
40 | def decode_single(self, cls_scores, bbox_preds):
41 | """Decode bboxes.
42 | Args:
43 | cls_scores (Tensor): Outputs from the classification head, \
44 | shape [num_query, cls_out_channels]. Note \
45 | cls_out_channels should includes background.
46 | bbox_preds (Tensor): Outputs from the regression \
47 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
48 | Shape [num_query, 9].
49 | Returns:
50 | list[dict]: Decoded boxes.
51 | """
52 | max_num = self.max_num
53 |
54 | cls_scores = cls_scores.sigmoid()
55 | scores, indexs = cls_scores.view(-1).topk(max_num)
56 | labels = indexs % self.num_classes
57 | bbox_index = indexs // self.num_classes
58 | bbox_preds = bbox_preds[bbox_index]
59 |
60 | final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)
61 | final_scores = scores
62 | final_preds = labels
63 |
64 | # use score threshold
65 | if self.score_threshold is not None:
66 | thresh_mask = final_scores > self.score_threshold
67 | tmp_score = self.score_threshold
68 | while thresh_mask.sum() == 0:
69 | tmp_score *= 0.9
70 | if tmp_score < 0.01:
71 | thresh_mask = final_scores > -1
72 | break
73 | thresh_mask = final_scores >= tmp_score
74 |
75 | if self.post_center_range is not None:
76 | self.post_center_range = torch.tensor(
77 | self.post_center_range, device=scores.device)
78 | mask = (final_box_preds[..., :3] >=
79 | self.post_center_range[:3]).all(1)
80 | mask &= (final_box_preds[..., :3] <=
81 | self.post_center_range[3:]).all(1)
82 |
83 | if self.score_threshold:
84 | mask &= thresh_mask
85 |
86 | boxes3d = final_box_preds[mask]
87 | scores = final_scores[mask]
88 |
89 | labels = final_preds[mask]
90 | predictions_dict = {
91 | 'bboxes': boxes3d,
92 | 'scores': scores,
93 | 'labels': labels
94 | }
95 |
96 | else:
97 | raise NotImplementedError(
98 | 'Need to reorganize output as a batch, only '
99 | 'support post_center_range is not None for now!')
100 | return predictions_dict
101 |
102 | def decode(self, preds_dicts):
103 | """Decode bboxes.
104 | Args:
105 | all_cls_scores (Tensor): Outputs from the classification head, \
106 | shape [nb_dec, bs, num_query, cls_out_channels]. Note \
107 | cls_out_channels should includes background.
108 | all_bbox_preds (Tensor): Sigmoid outputs from the regression \
109 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
110 | Shape [nb_dec, bs, num_query, 9].
111 | Returns:
112 | list[dict]: Decoded boxes.
113 | """
114 | all_cls_scores = preds_dicts['all_cls_scores'][-1]
115 | all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
116 |
117 | batch_size = all_cls_scores.size()[0]
118 | predictions_list = []
119 | for i in range(batch_size):
120 | predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i]))
121 | return predictions_list
122 |
123 |
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/kitti-3d-car.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'KittiDataset'
3 | data_root = 'data/kitti/'
4 | class_names = ['Car']
5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1]
6 | input_modality = dict(use_lidar=True, use_camera=False)
7 | db_sampler = dict(
8 | data_root=data_root,
9 | info_path=data_root + 'kitti_dbinfos_train.pkl',
10 | rate=1.0,
11 | prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
12 | classes=class_names,
13 | sample_groups=dict(Car=15))
14 |
15 | file_client_args = dict(backend='disk')
16 | # Uncomment the following if use ceph or other file clients.
17 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
18 | # for more details.
19 | # file_client_args = dict(
20 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
21 |
22 | train_pipeline = [
23 | dict(
24 | type='LoadPointsFromFile',
25 | coord_type='LIDAR',
26 | load_dim=4,
27 | use_dim=4,
28 | file_client_args=file_client_args),
29 | dict(
30 | type='LoadAnnotations3D',
31 | with_bbox_3d=True,
32 | with_label_3d=True,
33 | file_client_args=file_client_args),
34 | dict(type='ObjectSample', db_sampler=db_sampler),
35 | dict(
36 | type='ObjectNoise',
37 | num_try=100,
38 | translation_std=[1.0, 1.0, 0.5],
39 | global_rot_range=[0.0, 0.0],
40 | rot_range=[-0.78539816, 0.78539816]),
41 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
42 | dict(
43 | type='GlobalRotScaleTrans',
44 | rot_range=[-0.78539816, 0.78539816],
45 | scale_ratio_range=[0.95, 1.05]),
46 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
47 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
48 | dict(type='PointShuffle'),
49 | dict(type='DefaultFormatBundle3D', class_names=class_names),
50 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
51 | ]
52 | test_pipeline = [
53 | dict(
54 | type='LoadPointsFromFile',
55 | coord_type='LIDAR',
56 | load_dim=4,
57 | use_dim=4,
58 | file_client_args=file_client_args),
59 | dict(
60 | type='MultiScaleFlipAug3D',
61 | img_scale=(1333, 800),
62 | pts_scale_ratio=1,
63 | flip=False,
64 | transforms=[
65 | dict(
66 | type='GlobalRotScaleTrans',
67 | rot_range=[0, 0],
68 | scale_ratio_range=[1., 1.],
69 | translation_std=[0, 0, 0]),
70 | dict(type='RandomFlip3D'),
71 | dict(
72 | type='PointsRangeFilter', point_cloud_range=point_cloud_range),
73 | dict(
74 | type='DefaultFormatBundle3D',
75 | class_names=class_names,
76 | with_label=False),
77 | dict(type='Collect3D', keys=['points'])
78 | ])
79 | ]
80 | # construct a pipeline for data and gt loading in show function
81 | # please keep its loading function consistent with test_pipeline (e.g. client)
82 | eval_pipeline = [
83 | dict(
84 | type='LoadPointsFromFile',
85 | coord_type='LIDAR',
86 | load_dim=4,
87 | use_dim=4,
88 | file_client_args=file_client_args),
89 | dict(
90 | type='DefaultFormatBundle3D',
91 | class_names=class_names,
92 | with_label=False),
93 | dict(type='Collect3D', keys=['points'])
94 | ]
95 |
96 | data = dict(
97 | samples_per_gpu=6,
98 | workers_per_gpu=4,
99 | train=dict(
100 | type='RepeatDataset',
101 | times=2,
102 | dataset=dict(
103 | type=dataset_type,
104 | data_root=data_root,
105 | ann_file=data_root + 'kitti_infos_train.pkl',
106 | split='training',
107 | pts_prefix='velodyne_reduced',
108 | pipeline=train_pipeline,
109 | modality=input_modality,
110 | classes=class_names,
111 | test_mode=False,
112 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
113 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
114 | box_type_3d='LiDAR')),
115 | val=dict(
116 | type=dataset_type,
117 | data_root=data_root,
118 | ann_file=data_root + 'kitti_infos_val.pkl',
119 | split='training',
120 | pts_prefix='velodyne_reduced',
121 | pipeline=test_pipeline,
122 | modality=input_modality,
123 | classes=class_names,
124 | test_mode=True,
125 | box_type_3d='LiDAR'),
126 | test=dict(
127 | type=dataset_type,
128 | data_root=data_root,
129 | ann_file=data_root + 'kitti_infos_val.pkl',
130 | split='training',
131 | pts_prefix='velodyne_reduced',
132 | pipeline=test_pipeline,
133 | modality=input_modality,
134 | classes=class_names,
135 | test_mode=True,
136 | box_type_3d='LiDAR'))
137 |
138 | evaluation = dict(interval=1, pipeline=eval_pipeline)
139 |
--------------------------------------------------------------------------------
/projects/configs/datasets/custom_lyft-3d.py:
--------------------------------------------------------------------------------
1 | # If point cloud range is changed, the models should also change their point
2 | # cloud range accordingly
3 | point_cloud_range = [-80, -80, -5, 80, 80, 3]
4 | # For Lyft we usually do 9-class detection
5 | class_names = [
6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
7 | 'bicycle', 'pedestrian', 'animal'
8 | ]
9 | dataset_type = 'CustomLyftDataset'
10 | data_root = 'data/lyft/'
11 | # Input modality for Lyft dataset, this is consistent with the submission
12 | # format which requires the information in input_modality.
13 | input_modality = dict(
14 | use_lidar=True,
15 | use_camera=False,
16 | use_radar=False,
17 | use_map=False,
18 | use_external=True)
19 | file_client_args = dict(backend='disk')
20 | # Uncomment the following if use ceph or other file clients.
21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
22 | # for more details.
23 | # file_client_args = dict(
24 | # backend='petrel',
25 | # path_mapping=dict({
26 | # './data/lyft/': 's3://lyft/lyft/',
27 | # 'data/lyft/': 's3://lyft/lyft/'
28 | # }))
29 | train_pipeline = [
30 | dict(
31 | type='LoadPointsFromFile',
32 | coord_type='LIDAR',
33 | load_dim=5,
34 | use_dim=5,
35 | file_client_args=file_client_args),
36 | dict(
37 | type='LoadPointsFromMultiSweeps',
38 | sweeps_num=10,
39 | file_client_args=file_client_args),
40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
41 | dict(
42 | type='GlobalRotScaleTrans',
43 | rot_range=[-0.3925, 0.3925],
44 | scale_ratio_range=[0.95, 1.05],
45 | translation_std=[0, 0, 0]),
46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
49 | dict(type='PointShuffle'),
50 | dict(type='DefaultFormatBundle3D', class_names=class_names),
51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
52 | ]
53 | test_pipeline = [
54 | dict(
55 | type='LoadPointsFromFile',
56 | coord_type='LIDAR',
57 | load_dim=5,
58 | use_dim=5,
59 | file_client_args=file_client_args),
60 | dict(
61 | type='LoadPointsFromMultiSweeps',
62 | sweeps_num=10,
63 | file_client_args=file_client_args),
64 | dict(
65 | type='MultiScaleFlipAug3D',
66 | img_scale=(1333, 800),
67 | pts_scale_ratio=1,
68 | flip=False,
69 | transforms=[
70 | dict(
71 | type='GlobalRotScaleTrans',
72 | rot_range=[0, 0],
73 | scale_ratio_range=[1., 1.],
74 | translation_std=[0, 0, 0]),
75 | dict(type='RandomFlip3D'),
76 | dict(
77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range),
78 | dict(
79 | type='DefaultFormatBundle3D',
80 | class_names=class_names,
81 | with_label=False),
82 | dict(type='Collect3D', keys=['points'])
83 | ])
84 | ]
85 | # construct a pipeline for data and gt loading in show function
86 | # please keep its loading function consistent with test_pipeline (e.g. client)
87 | eval_pipeline = [
88 | dict(
89 | type='LoadPointsFromFile',
90 | coord_type='LIDAR',
91 | load_dim=5,
92 | use_dim=5,
93 | file_client_args=file_client_args),
94 | dict(
95 | type='LoadPointsFromMultiSweeps',
96 | sweeps_num=10,
97 | file_client_args=file_client_args),
98 | dict(
99 | type='DefaultFormatBundle3D',
100 | class_names=class_names,
101 | with_label=False),
102 | dict(type='Collect3D', keys=['points'])
103 | ]
104 |
105 | data = dict(
106 | samples_per_gpu=2,
107 | workers_per_gpu=2,
108 | train=dict(
109 | type=dataset_type,
110 | data_root=data_root,
111 | ann_file=data_root + 'lyft_infos_train.pkl',
112 | pipeline=train_pipeline,
113 | classes=class_names,
114 | modality=input_modality,
115 | test_mode=False),
116 | val=dict(
117 | type=dataset_type,
118 | data_root=data_root,
119 | ann_file=data_root + 'lyft_infos_val.pkl',
120 | pipeline=test_pipeline,
121 | classes=class_names,
122 | modality=input_modality,
123 | test_mode=True),
124 | test=dict(
125 | type=dataset_type,
126 | data_root=data_root,
127 | ann_file=data_root + 'lyft_infos_val.pkl',
128 | pipeline=test_pipeline,
129 | classes=class_names,
130 | modality=input_modality,
131 | test_mode=True))
132 | # For Lyft dataset, we usually evaluate the model at the end of training.
133 | # Since the models are trained by 24 epochs by default, we set evaluation
134 | # interval to be 24. Please change the interval accordingly if you do not
135 | # use a default schedule.
136 | evaluation = dict(interval=24, pipeline=eval_pipeline)
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/lyft-3d.py:
--------------------------------------------------------------------------------
1 | # If point cloud range is changed, the models should also change their point
2 | # cloud range accordingly
3 | point_cloud_range = [-80, -80, -5, 80, 80, 3]
4 | # For Lyft we usually do 9-class detection
5 | class_names = [
6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
7 | 'bicycle', 'pedestrian', 'animal'
8 | ]
9 | dataset_type = 'LyftDataset'
10 | data_root = 'data/lyft/'
11 | # Input modality for Lyft dataset, this is consistent with the submission
12 | # format which requires the information in input_modality.
13 | input_modality = dict(
14 | use_lidar=True,
15 | use_camera=False,
16 | use_radar=False,
17 | use_map=False,
18 | use_external=False)
19 | file_client_args = dict(backend='disk')
20 | # Uncomment the following if use ceph or other file clients.
21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
22 | # for more details.
23 | # file_client_args = dict(
24 | # backend='petrel',
25 | # path_mapping=dict({
26 | # './data/lyft/': 's3://lyft/lyft/',
27 | # 'data/lyft/': 's3://lyft/lyft/'
28 | # }))
29 | train_pipeline = [
30 | dict(
31 | type='LoadPointsFromFile',
32 | coord_type='LIDAR',
33 | load_dim=5,
34 | use_dim=5,
35 | file_client_args=file_client_args),
36 | dict(
37 | type='LoadPointsFromMultiSweeps',
38 | sweeps_num=10,
39 | file_client_args=file_client_args),
40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
41 | dict(
42 | type='GlobalRotScaleTrans',
43 | rot_range=[-0.3925, 0.3925],
44 | scale_ratio_range=[0.95, 1.05],
45 | translation_std=[0, 0, 0]),
46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
49 | dict(type='PointShuffle'),
50 | dict(type='DefaultFormatBundle3D', class_names=class_names),
51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
52 | ]
53 | test_pipeline = [
54 | dict(
55 | type='LoadPointsFromFile',
56 | coord_type='LIDAR',
57 | load_dim=5,
58 | use_dim=5,
59 | file_client_args=file_client_args),
60 | dict(
61 | type='LoadPointsFromMultiSweeps',
62 | sweeps_num=10,
63 | file_client_args=file_client_args),
64 | dict(
65 | type='MultiScaleFlipAug3D',
66 | img_scale=(1333, 800),
67 | pts_scale_ratio=1,
68 | flip=False,
69 | transforms=[
70 | dict(
71 | type='GlobalRotScaleTrans',
72 | rot_range=[0, 0],
73 | scale_ratio_range=[1., 1.],
74 | translation_std=[0, 0, 0]),
75 | dict(type='RandomFlip3D'),
76 | dict(
77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range),
78 | dict(
79 | type='DefaultFormatBundle3D',
80 | class_names=class_names,
81 | with_label=False),
82 | dict(type='Collect3D', keys=['points'])
83 | ])
84 | ]
85 | # construct a pipeline for data and gt loading in show function
86 | # please keep its loading function consistent with test_pipeline (e.g. client)
87 | eval_pipeline = [
88 | dict(
89 | type='LoadPointsFromFile',
90 | coord_type='LIDAR',
91 | load_dim=5,
92 | use_dim=5,
93 | file_client_args=file_client_args),
94 | dict(
95 | type='LoadPointsFromMultiSweeps',
96 | sweeps_num=10,
97 | file_client_args=file_client_args),
98 | dict(
99 | type='DefaultFormatBundle3D',
100 | class_names=class_names,
101 | with_label=False),
102 | dict(type='Collect3D', keys=['points'])
103 | ]
104 |
105 | data = dict(
106 | samples_per_gpu=2,
107 | workers_per_gpu=2,
108 | train=dict(
109 | type=dataset_type,
110 | data_root=data_root,
111 | ann_file=data_root + 'lyft_infos_train.pkl',
112 | pipeline=train_pipeline,
113 | classes=class_names,
114 | modality=input_modality,
115 | test_mode=False),
116 | val=dict(
117 | type=dataset_type,
118 | data_root=data_root,
119 | ann_file=data_root + 'lyft_infos_val.pkl',
120 | pipeline=test_pipeline,
121 | classes=class_names,
122 | modality=input_modality,
123 | test_mode=True),
124 | test=dict(
125 | type=dataset_type,
126 | data_root=data_root,
127 | ann_file=data_root + 'lyft_infos_test.pkl',
128 | pipeline=test_pipeline,
129 | classes=class_names,
130 | modality=input_modality,
131 | test_mode=True))
132 | # For Lyft dataset, we usually evaluate the model at the end of training.
133 | # Since the models are trained by 24 epochs by default, we set evaluation
134 | # interval to be 24. Please change the interval accordingly if you do not
135 | # use a default schedule.
136 | evaluation = dict(interval=24, pipeline=eval_pipeline)
137 |
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/range100_lyft-3d.py:
--------------------------------------------------------------------------------
1 | # If point cloud range is changed, the models should also change their point
2 | # cloud range accordingly
3 | point_cloud_range = [-100, -100, -5, 100, 100, 3]
4 | # For Lyft we usually do 9-class detection
5 | class_names = [
6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
7 | 'bicycle', 'pedestrian', 'animal'
8 | ]
9 | dataset_type = 'LyftDataset'
10 | data_root = 'data/lyft/'
11 | # Input modality for Lyft dataset, this is consistent with the submission
12 | # format which requires the information in input_modality.
13 | input_modality = dict(
14 | use_lidar=True,
15 | use_camera=False,
16 | use_radar=False,
17 | use_map=False,
18 | use_external=False)
19 | file_client_args = dict(backend='disk')
20 | # Uncomment the following if use ceph or other file clients.
21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
22 | # for more details.
23 | # file_client_args = dict(
24 | # backend='petrel',
25 | # path_mapping=dict({
26 | # './data/lyft/': 's3://lyft/lyft/',
27 | # 'data/lyft/': 's3://lyft/lyft/'
28 | # }))
29 | train_pipeline = [
30 | dict(
31 | type='LoadPointsFromFile',
32 | coord_type='LIDAR',
33 | load_dim=5,
34 | use_dim=5,
35 | file_client_args=file_client_args),
36 | dict(
37 | type='LoadPointsFromMultiSweeps',
38 | sweeps_num=10,
39 | file_client_args=file_client_args),
40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
41 | dict(
42 | type='GlobalRotScaleTrans',
43 | rot_range=[-0.3925, 0.3925],
44 | scale_ratio_range=[0.95, 1.05],
45 | translation_std=[0, 0, 0]),
46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
49 | dict(type='PointShuffle'),
50 | dict(type='DefaultFormatBundle3D', class_names=class_names),
51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
52 | ]
53 | test_pipeline = [
54 | dict(
55 | type='LoadPointsFromFile',
56 | coord_type='LIDAR',
57 | load_dim=5,
58 | use_dim=5,
59 | file_client_args=file_client_args),
60 | dict(
61 | type='LoadPointsFromMultiSweeps',
62 | sweeps_num=10,
63 | file_client_args=file_client_args),
64 | dict(
65 | type='MultiScaleFlipAug3D',
66 | img_scale=(1333, 800),
67 | pts_scale_ratio=1,
68 | flip=False,
69 | transforms=[
70 | dict(
71 | type='GlobalRotScaleTrans',
72 | rot_range=[0, 0],
73 | scale_ratio_range=[1., 1.],
74 | translation_std=[0, 0, 0]),
75 | dict(type='RandomFlip3D'),
76 | dict(
77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range),
78 | dict(
79 | type='DefaultFormatBundle3D',
80 | class_names=class_names,
81 | with_label=False),
82 | dict(type='Collect3D', keys=['points'])
83 | ])
84 | ]
85 | # construct a pipeline for data and gt loading in show function
86 | # please keep its loading function consistent with test_pipeline (e.g. client)
87 | eval_pipeline = [
88 | dict(
89 | type='LoadPointsFromFile',
90 | coord_type='LIDAR',
91 | load_dim=5,
92 | use_dim=5,
93 | file_client_args=file_client_args),
94 | dict(
95 | type='LoadPointsFromMultiSweeps',
96 | sweeps_num=10,
97 | file_client_args=file_client_args),
98 | dict(
99 | type='DefaultFormatBundle3D',
100 | class_names=class_names,
101 | with_label=False),
102 | dict(type='Collect3D', keys=['points'])
103 | ]
104 |
105 | data = dict(
106 | samples_per_gpu=2,
107 | workers_per_gpu=2,
108 | train=dict(
109 | type=dataset_type,
110 | data_root=data_root,
111 | ann_file=data_root + 'lyft_infos_train.pkl',
112 | pipeline=train_pipeline,
113 | classes=class_names,
114 | modality=input_modality,
115 | test_mode=False),
116 | val=dict(
117 | type=dataset_type,
118 | data_root=data_root,
119 | ann_file=data_root + 'lyft_infos_val.pkl',
120 | pipeline=test_pipeline,
121 | classes=class_names,
122 | modality=input_modality,
123 | test_mode=True),
124 | test=dict(
125 | type=dataset_type,
126 | data_root=data_root,
127 | ann_file=data_root + 'lyft_infos_test.pkl',
128 | pipeline=test_pipeline,
129 | classes=class_names,
130 | modality=input_modality,
131 | test_mode=True))
132 | # For Lyft dataset, we usually evaluate the model at the end of training.
133 | # Since the models are trained by 24 epochs by default, we set evaluation
134 | # interval to be 24. Please change the interval accordingly if you do not
135 | # use a default schedule.
136 | evaluation = dict(interval=24, pipeline=eval_pipeline)
137 |
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/kitti-3d-3class.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'KittiDataset'
3 | data_root = 'data/kitti/'
4 | class_names = ['Pedestrian', 'Cyclist', 'Car']
5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1]
6 | input_modality = dict(use_lidar=True, use_camera=False)
7 | db_sampler = dict(
8 | data_root=data_root,
9 | info_path=data_root + 'kitti_dbinfos_train.pkl',
10 | rate=1.0,
11 | prepare=dict(
12 | filter_by_difficulty=[-1],
13 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
14 | classes=class_names,
15 | sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6))
16 |
17 | file_client_args = dict(backend='disk')
18 | # Uncomment the following if use ceph or other file clients.
19 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
20 | # for more details.
21 | # file_client_args = dict(
22 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
23 |
24 | train_pipeline = [
25 | dict(
26 | type='LoadPointsFromFile',
27 | coord_type='LIDAR',
28 | load_dim=4,
29 | use_dim=4,
30 | file_client_args=file_client_args),
31 | dict(
32 | type='LoadAnnotations3D',
33 | with_bbox_3d=True,
34 | with_label_3d=True,
35 | file_client_args=file_client_args),
36 | dict(type='ObjectSample', db_sampler=db_sampler),
37 | dict(
38 | type='ObjectNoise',
39 | num_try=100,
40 | translation_std=[1.0, 1.0, 0.5],
41 | global_rot_range=[0.0, 0.0],
42 | rot_range=[-0.78539816, 0.78539816]),
43 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
44 | dict(
45 | type='GlobalRotScaleTrans',
46 | rot_range=[-0.78539816, 0.78539816],
47 | scale_ratio_range=[0.95, 1.05]),
48 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
49 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
50 | dict(type='PointShuffle'),
51 | dict(type='DefaultFormatBundle3D', class_names=class_names),
52 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
53 | ]
54 | test_pipeline = [
55 | dict(
56 | type='LoadPointsFromFile',
57 | coord_type='LIDAR',
58 | load_dim=4,
59 | use_dim=4,
60 | file_client_args=file_client_args),
61 | dict(
62 | type='MultiScaleFlipAug3D',
63 | img_scale=(1333, 800),
64 | pts_scale_ratio=1,
65 | flip=False,
66 | transforms=[
67 | dict(
68 | type='GlobalRotScaleTrans',
69 | rot_range=[0, 0],
70 | scale_ratio_range=[1., 1.],
71 | translation_std=[0, 0, 0]),
72 | dict(type='RandomFlip3D'),
73 | dict(
74 | type='PointsRangeFilter', point_cloud_range=point_cloud_range),
75 | dict(
76 | type='DefaultFormatBundle3D',
77 | class_names=class_names,
78 | with_label=False),
79 | dict(type='Collect3D', keys=['points'])
80 | ])
81 | ]
82 | # construct a pipeline for data and gt loading in show function
83 | # please keep its loading function consistent with test_pipeline (e.g. client)
84 | eval_pipeline = [
85 | dict(
86 | type='LoadPointsFromFile',
87 | coord_type='LIDAR',
88 | load_dim=4,
89 | use_dim=4,
90 | file_client_args=file_client_args),
91 | dict(
92 | type='DefaultFormatBundle3D',
93 | class_names=class_names,
94 | with_label=False),
95 | dict(type='Collect3D', keys=['points'])
96 | ]
97 |
98 | data = dict(
99 | samples_per_gpu=6,
100 | workers_per_gpu=4,
101 | train=dict(
102 | type='RepeatDataset',
103 | times=2,
104 | dataset=dict(
105 | type=dataset_type,
106 | data_root=data_root,
107 | ann_file=data_root + 'kitti_infos_train.pkl',
108 | split='training',
109 | pts_prefix='velodyne_reduced',
110 | pipeline=train_pipeline,
111 | modality=input_modality,
112 | classes=class_names,
113 | test_mode=False,
114 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
115 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
116 | box_type_3d='LiDAR')),
117 | val=dict(
118 | type=dataset_type,
119 | data_root=data_root,
120 | ann_file=data_root + 'kitti_infos_val.pkl',
121 | split='training',
122 | pts_prefix='velodyne_reduced',
123 | pipeline=test_pipeline,
124 | modality=input_modality,
125 | classes=class_names,
126 | test_mode=True,
127 | box_type_3d='LiDAR'),
128 | test=dict(
129 | type=dataset_type,
130 | data_root=data_root,
131 | ann_file=data_root + 'kitti_infos_val.pkl',
132 | split='training',
133 | pts_prefix='velodyne_reduced',
134 | pipeline=test_pipeline,
135 | modality=input_modality,
136 | classes=class_names,
137 | test_mode=True,
138 | box_type_3d='LiDAR'))
139 |
140 | evaluation = dict(interval=1, pipeline=eval_pipeline)
141 |
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/waymoD5-3d-car.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | # D5 in the config name means the whole dataset is divided into 5 folds
3 | # We only use one fold for efficient experiments
4 | dataset_type = 'WaymoDataset'
5 | data_root = 'data/waymo/kitti_format/'
6 | file_client_args = dict(backend='disk')
7 | # Uncomment the following if use ceph or other file clients.
8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
9 | # for more details.
10 | # file_client_args = dict(
11 | # backend='petrel', path_mapping=dict(data='s3://waymo_data/'))
12 |
13 | class_names = ['Car']
14 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
15 | input_modality = dict(use_lidar=True, use_camera=False)
16 | db_sampler = dict(
17 | data_root=data_root,
18 | info_path=data_root + 'waymo_dbinfos_train.pkl',
19 | rate=1.0,
20 | prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
21 | classes=class_names,
22 | sample_groups=dict(Car=15),
23 | points_loader=dict(
24 | type='LoadPointsFromFile',
25 | coord_type='LIDAR',
26 | load_dim=5,
27 | use_dim=[0, 1, 2, 3, 4],
28 | file_client_args=file_client_args))
29 |
30 | train_pipeline = [
31 | dict(
32 | type='LoadPointsFromFile',
33 | coord_type='LIDAR',
34 | load_dim=6,
35 | use_dim=5,
36 | file_client_args=file_client_args),
37 | dict(
38 | type='LoadAnnotations3D',
39 | with_bbox_3d=True,
40 | with_label_3d=True,
41 | file_client_args=file_client_args),
42 | dict(type='ObjectSample', db_sampler=db_sampler),
43 | dict(
44 | type='RandomFlip3D',
45 | sync_2d=False,
46 | flip_ratio_bev_horizontal=0.5,
47 | flip_ratio_bev_vertical=0.5),
48 | dict(
49 | type='GlobalRotScaleTrans',
50 | rot_range=[-0.78539816, 0.78539816],
51 | scale_ratio_range=[0.95, 1.05]),
52 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
53 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
54 | dict(type='PointShuffle'),
55 | dict(type='DefaultFormatBundle3D', class_names=class_names),
56 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
57 | ]
58 | test_pipeline = [
59 | dict(
60 | type='LoadPointsFromFile',
61 | coord_type='LIDAR',
62 | load_dim=6,
63 | use_dim=5,
64 | file_client_args=file_client_args),
65 | dict(
66 | type='MultiScaleFlipAug3D',
67 | img_scale=(1333, 800),
68 | pts_scale_ratio=1,
69 | flip=False,
70 | transforms=[
71 | dict(
72 | type='GlobalRotScaleTrans',
73 | rot_range=[0, 0],
74 | scale_ratio_range=[1., 1.],
75 | translation_std=[0, 0, 0]),
76 | dict(type='RandomFlip3D'),
77 | dict(
78 | type='PointsRangeFilter', point_cloud_range=point_cloud_range),
79 | dict(
80 | type='DefaultFormatBundle3D',
81 | class_names=class_names,
82 | with_label=False),
83 | dict(type='Collect3D', keys=['points'])
84 | ])
85 | ]
86 | # construct a pipeline for data and gt loading in show function
87 | # please keep its loading function consistent with test_pipeline (e.g. client)
88 | eval_pipeline = [
89 | dict(
90 | type='LoadPointsFromFile',
91 | coord_type='LIDAR',
92 | load_dim=6,
93 | use_dim=5,
94 | file_client_args=file_client_args),
95 | dict(
96 | type='DefaultFormatBundle3D',
97 | class_names=class_names,
98 | with_label=False),
99 | dict(type='Collect3D', keys=['points'])
100 | ]
101 |
102 | data = dict(
103 | samples_per_gpu=2,
104 | workers_per_gpu=4,
105 | train=dict(
106 | type='RepeatDataset',
107 | times=2,
108 | dataset=dict(
109 | type=dataset_type,
110 | data_root=data_root,
111 | ann_file=data_root + 'waymo_infos_train.pkl',
112 | split='training',
113 | pipeline=train_pipeline,
114 | modality=input_modality,
115 | classes=class_names,
116 | test_mode=False,
117 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
118 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
119 | box_type_3d='LiDAR',
120 | # load one frame every five frames
121 | load_interval=5)),
122 | val=dict(
123 | type=dataset_type,
124 | data_root=data_root,
125 | ann_file=data_root + 'waymo_infos_val.pkl',
126 | split='training',
127 | pipeline=test_pipeline,
128 | modality=input_modality,
129 | classes=class_names,
130 | test_mode=True,
131 | box_type_3d='LiDAR'),
132 | test=dict(
133 | type=dataset_type,
134 | data_root=data_root,
135 | ann_file=data_root + 'waymo_infos_val.pkl',
136 | split='training',
137 | pipeline=test_pipeline,
138 | modality=input_modality,
139 | classes=class_names,
140 | test_mode=True,
141 | box_type_3d='LiDAR'))
142 |
143 | evaluation = dict(interval=24, pipeline=eval_pipeline)
144 |
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/waymoD5-3d-3class.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | # D5 in the config name means the whole dataset is divided into 5 folds
3 | # We only use one fold for efficient experiments
4 | dataset_type = 'LidarWaymoDataset'
5 | data_root = 'data/waymo-full/kitti_format/'
6 | file_client_args = dict(backend='disk')
7 | # Uncomment the following if use ceph or other file clients.
8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
9 | # for more details.
10 | # file_client_args = dict(
11 | # backend='petrel', path_mapping=dict(data='s3://waymo_data/'))
12 |
13 | class_names = ['Car', 'Pedestrian', 'Cyclist']
14 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
15 | input_modality = dict(use_lidar=True, use_camera=False)
16 | db_sampler = dict(
17 | data_root=data_root,
18 | info_path=data_root + 'waymo_dbinfos_train.pkl',
19 | rate=1.0,
20 | prepare=dict(
21 | filter_by_difficulty=[-1],
22 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
23 | classes=class_names,
24 | sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),
25 | points_loader=dict(
26 | type='LoadPointsFromFile',
27 | coord_type='LIDAR',
28 | load_dim=5,
29 | use_dim=[0, 1, 2, 3, 4],
30 | file_client_args=file_client_args))
31 |
32 | train_pipeline = [
33 | dict(
34 | type='LoadPointsFromFile',
35 | coord_type='LIDAR',
36 | load_dim=6,
37 | use_dim=5,
38 | file_client_args=file_client_args),
39 | dict(
40 | type='LoadAnnotations3D',
41 | with_bbox_3d=True,
42 | with_label_3d=True,
43 | file_client_args=file_client_args),
44 | dict(type='ObjectSample', db_sampler=db_sampler),
45 | dict(
46 | type='RandomFlip3D',
47 | sync_2d=False,
48 | flip_ratio_bev_horizontal=0.5,
49 | flip_ratio_bev_vertical=0.5),
50 | dict(
51 | type='GlobalRotScaleTrans',
52 | rot_range=[-0.78539816, 0.78539816],
53 | scale_ratio_range=[0.95, 1.05]),
54 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
55 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
56 | dict(type='PointShuffle'),
57 | dict(type='DefaultFormatBundle3D', class_names=class_names),
58 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
59 | ]
60 | test_pipeline = [
61 | dict(
62 | type='LoadPointsFromFile',
63 | coord_type='LIDAR',
64 | load_dim=6,
65 | use_dim=5,
66 | file_client_args=file_client_args),
67 | dict(
68 | type='MultiScaleFlipAug3D',
69 | img_scale=(1333, 800),
70 | pts_scale_ratio=1,
71 | flip=False,
72 | transforms=[
73 | dict(
74 | type='GlobalRotScaleTrans',
75 | rot_range=[0, 0],
76 | scale_ratio_range=[1., 1.],
77 | translation_std=[0, 0, 0]),
78 | dict(type='RandomFlip3D'),
79 | dict(
80 | type='PointsRangeFilter', point_cloud_range=point_cloud_range),
81 | dict(
82 | type='DefaultFormatBundle3D',
83 | class_names=class_names,
84 | with_label=False),
85 | dict(type='Collect3D', keys=['points'])
86 | ])
87 | ]
88 | # construct a pipeline for data and gt loading in show function
89 | # please keep its loading function consistent with test_pipeline (e.g. client)
90 | eval_pipeline = [
91 | dict(
92 | type='LoadPointsFromFile',
93 | coord_type='LIDAR',
94 | load_dim=6,
95 | use_dim=5,
96 | file_client_args=file_client_args),
97 | dict(
98 | type='DefaultFormatBundle3D',
99 | class_names=class_names,
100 | with_label=False),
101 | dict(type='Collect3D', keys=['points'])
102 | ]
103 |
104 | data = dict(
105 | samples_per_gpu=2,
106 | workers_per_gpu=4,
107 | train=dict(
108 | type='RepeatDataset',
109 | times=2,
110 | dataset=dict(
111 | type=dataset_type,
112 | data_root=data_root,
113 | ann_file=data_root + 'waymo_infos_train.pkl',
114 | split='training',
115 | pipeline=train_pipeline,
116 | modality=input_modality,
117 | classes=class_names,
118 | test_mode=False,
119 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
120 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
121 | box_type_3d='LiDAR',
122 | # load one frame every five frames
123 | load_interval=5)),
124 | val=dict(
125 | type=dataset_type,
126 | data_root=data_root,
127 | ann_file=data_root + 'waymo_infos_val.pkl',
128 | split='training',
129 | pipeline=test_pipeline,
130 | modality=input_modality,
131 | classes=class_names,
132 | test_mode=True,
133 | box_type_3d='LiDAR'),
134 | test=dict(
135 | type=dataset_type,
136 | data_root=data_root,
137 | ann_file=data_root + 'waymo_infos_val.pkl',
138 | split='training',
139 | pipeline=test_pipeline,
140 | modality=input_modality,
141 | classes=class_names,
142 | test_mode=True,
143 | box_type_3d='LiDAR'))
144 |
145 | evaluation = dict(interval=24, pipeline=eval_pipeline)
146 |
--------------------------------------------------------------------------------
/projects/configs/datasets/custom_nus-3d.py:
--------------------------------------------------------------------------------
1 | # If point cloud range is changed, the models should also change their point
2 | # cloud range accordingly
3 | point_cloud_range = [-50, -50, -5, 50, 50, 3]
4 | # For nuScenes we usually do 10-class detection
5 | class_names = [
6 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
7 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
8 | ]
9 | dataset_type = 'NuScenesDataset_eval_modified'
10 | data_root = 'data/nuscenes/'
11 | # Input modality for nuScenes dataset, this is consistent with the submission
12 | # format which requires the information in input_modality.
13 | input_modality = dict(
14 | use_lidar=True,
15 | use_camera=False,
16 | use_radar=False,
17 | use_map=False,
18 | use_external=False)
19 | file_client_args = dict(backend='disk')
20 | # Uncomment the following if use ceph or other file clients.
21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
22 | # for more details.
23 | # file_client_args = dict(
24 | # backend='petrel',
25 | # path_mapping=dict({
26 | # './data/nuscenes/': 's3://nuscenes/nuscenes/',
27 | # 'data/nuscenes/': 's3://nuscenes/nuscenes/'
28 | # }))
29 | train_pipeline = [
30 | dict(
31 | type='LoadPointsFromFile',
32 | coord_type='LIDAR',
33 | load_dim=5,
34 | use_dim=5,
35 | file_client_args=file_client_args),
36 | dict(
37 | type='LoadPointsFromMultiSweeps',
38 | sweeps_num=10,
39 | file_client_args=file_client_args),
40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
41 | dict(
42 | type='GlobalRotScaleTrans',
43 | rot_range=[-0.3925, 0.3925],
44 | scale_ratio_range=[0.95, 1.05],
45 | translation_std=[0, 0, 0]),
46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
49 | dict(type='ObjectNameFilter', classes=class_names),
50 | dict(type='PointShuffle'),
51 | dict(type='DefaultFormatBundle3D', class_names=class_names),
52 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
53 | ]
54 | test_pipeline = [
55 | dict(
56 | type='LoadPointsFromFile',
57 | coord_type='LIDAR',
58 | load_dim=5,
59 | use_dim=5,
60 | file_client_args=file_client_args),
61 | dict(
62 | type='LoadPointsFromMultiSweeps',
63 | sweeps_num=10,
64 | file_client_args=file_client_args),
65 | dict(
66 | type='MultiScaleFlipAug3D',
67 | img_scale=(1333, 800),
68 | pts_scale_ratio=1,
69 | flip=False,
70 | transforms=[
71 | dict(
72 | type='GlobalRotScaleTrans',
73 | rot_range=[0, 0],
74 | scale_ratio_range=[1., 1.],
75 | translation_std=[0, 0, 0]),
76 | dict(type='RandomFlip3D'),
77 | dict(
78 | type='PointsRangeFilter', point_cloud_range=point_cloud_range),
79 | dict(
80 | type='DefaultFormatBundle3D',
81 | class_names=class_names,
82 | with_label=False),
83 | dict(type='Collect3D', keys=['points'])
84 | ])
85 | ]
86 | # construct a pipeline for data and gt loading in show function
87 | # please keep its loading function consistent with test_pipeline (e.g. client)
88 | eval_pipeline = [
89 | dict(
90 | type='LoadPointsFromFile',
91 | coord_type='LIDAR',
92 | load_dim=5,
93 | use_dim=5,
94 | file_client_args=file_client_args),
95 | dict(
96 | type='LoadPointsFromMultiSweeps',
97 | sweeps_num=10,
98 | file_client_args=file_client_args),
99 | dict(
100 | type='DefaultFormatBundle3D',
101 | class_names=class_names,
102 | with_label=False),
103 | dict(type='Collect3D', keys=['points'])
104 | ]
105 |
106 | data = dict(
107 | samples_per_gpu=4,
108 | workers_per_gpu=4,
109 | train=dict(
110 | type=dataset_type,
111 | data_root=data_root,
112 | ann_file=data_root + 'nuscenes_infos_train.pkl',
113 | pipeline=train_pipeline,
114 | classes=class_names,
115 | modality=input_modality,
116 | test_mode=False,
117 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
118 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
119 | box_type_3d='LiDAR'),
120 | val=dict(
121 | type=dataset_type,
122 | ann_file=data_root + 'nuscenes_infos_val.pkl',
123 | pipeline=test_pipeline,
124 | classes=class_names,
125 | modality=input_modality,
126 | test_mode=True,
127 | box_type_3d='LiDAR'),
128 | test=dict(
129 | type=dataset_type,
130 | data_root=data_root,
131 | ann_file=data_root + 'nuscenes_infos_val.pkl',
132 | pipeline=test_pipeline,
133 | classes=class_names,
134 | modality=input_modality,
135 | test_mode=True,
136 | box_type_3d='LiDAR'))
137 | # For nuScenes dataset, we usually evaluate the model at the end of training.
138 | # Since the models are trained by 24 epochs by default, we set evaluation
139 | # interval to be 24. Please change the interval accordingly if you do not
140 | # use a default schedule.
141 | evaluation = dict(interval=24, pipeline=eval_pipeline)
142 |
--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/nus-3d.py:
--------------------------------------------------------------------------------
1 | # If point cloud range is changed, the models should also change their point
2 | # cloud range accordingly
3 | point_cloud_range = [-50, -50, -5, 50, 50, 3]
4 | # For nuScenes we usually do 10-class detection
5 | class_names = [
6 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
7 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
8 | ]
9 | dataset_type = 'NuScenesDataset'
10 | data_root = 'data/nuscenes/'
11 | # Input modality for nuScenes dataset, this is consistent with the submission
12 | # format which requires the information in input_modality.
13 | input_modality = dict(
14 | use_lidar=True,
15 | use_camera=False,
16 | use_radar=False,
17 | use_map=False,
18 | use_external=False)
19 | file_client_args = dict(backend='disk')
20 | # Uncomment the following if use ceph or other file clients.
21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
22 | # for more details.
23 | # file_client_args = dict(
24 | # backend='petrel',
25 | # path_mapping=dict({
26 | # './data/nuscenes/': 's3://nuscenes/nuscenes/',
27 | # 'data/nuscenes/': 's3://nuscenes/nuscenes/'
28 | # }))
29 | train_pipeline = [
30 | dict(
31 | type='LoadPointsFromFile',
32 | coord_type='LIDAR',
33 | load_dim=5,
34 | use_dim=5,
35 | file_client_args=file_client_args),
36 | dict(
37 | type='LoadPointsFromMultiSweeps',
38 | sweeps_num=10,
39 | file_client_args=file_client_args),
40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
41 | dict(
42 | type='GlobalRotScaleTrans',
43 | rot_range=[-0.3925, 0.3925],
44 | scale_ratio_range=[0.95, 1.05],
45 | translation_std=[0, 0, 0]),
46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
49 | dict(type='ObjectNameFilter', classes=class_names),
50 | dict(type='PointShuffle'),
51 | dict(type='DefaultFormatBundle3D', class_names=class_names),
52 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
53 | ]
54 | test_pipeline = [
55 | dict(
56 | type='LoadPointsFromFile',
57 | coord_type='LIDAR',
58 | load_dim=5,
59 | use_dim=5,
60 | file_client_args=file_client_args),
61 | dict(
62 | type='LoadPointsFromMultiSweeps',
63 | sweeps_num=10,
64 | file_client_args=file_client_args),
65 | dict(
66 | type='MultiScaleFlipAug3D',
67 | img_scale=(1333, 800),
68 | pts_scale_ratio=1,
69 | flip=False,
70 | transforms=[
71 | dict(
72 | type='GlobalRotScaleTrans',
73 | rot_range=[0, 0],
74 | scale_ratio_range=[1., 1.],
75 | translation_std=[0, 0, 0]),
76 | dict(type='RandomFlip3D'),
77 | dict(
78 | type='PointsRangeFilter', point_cloud_range=point_cloud_range),
79 | dict(
80 | type='DefaultFormatBundle3D',
81 | class_names=class_names,
82 | with_label=False),
83 | dict(type='Collect3D', keys=['points'])
84 | ])
85 | ]
86 | # construct a pipeline for data and gt loading in show function
87 | # please keep its loading function consistent with test_pipeline (e.g. client)
88 | eval_pipeline = [
89 | dict(
90 | type='LoadPointsFromFile',
91 | coord_type='LIDAR',
92 | load_dim=5,
93 | use_dim=5,
94 | file_client_args=file_client_args),
95 | dict(
96 | type='LoadPointsFromMultiSweeps',
97 | sweeps_num=10,
98 | file_client_args=file_client_args),
99 | dict(
100 | type='DefaultFormatBundle3D',
101 | class_names=class_names,
102 | with_label=False),
103 | dict(type='Collect3D', keys=['points'])
104 | ]
105 |
106 | data = dict(
107 | samples_per_gpu=4,
108 | workers_per_gpu=4,
109 | train=dict(
110 | type=dataset_type,
111 | data_root=data_root,
112 | ann_file=data_root + 'nuscenes_infos_train.pkl',
113 | pipeline=train_pipeline,
114 | classes=class_names,
115 | modality=input_modality,
116 | test_mode=False,
117 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
118 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
119 | box_type_3d='LiDAR'),
120 | val=dict(
121 | type=dataset_type,
122 | data_root=data_root,
123 | ann_file=data_root + 'nuscenes_infos_val.pkl',
124 | pipeline=test_pipeline,
125 | classes=class_names,
126 | modality=input_modality,
127 | test_mode=True,
128 | box_type_3d='LiDAR'),
129 | test=dict(
130 | type=dataset_type,
131 | data_root=data_root,
132 | ann_file=data_root + 'nuscenes_infos_val.pkl',
133 | pipeline=test_pipeline,
134 | classes=class_names,
135 | modality=input_modality,
136 | test_mode=True,
137 | box_type_3d='LiDAR'))
138 | # For nuScenes dataset, we usually evaluate the model at the end of training.
139 | # Since the models are trained by 24 epochs by default, we set evaluation
140 | # interval to be 24. Please change the interval accordingly if you do not
141 | # use a default schedule.
142 | evaluation = dict(interval=24, pipeline=eval_pipeline)
143 |
--------------------------------------------------------------------------------
/tools/data_converter/indoor_converter.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import mmcv
3 | import numpy as np
4 | import os
5 | import sys
6 | sys.path.insert(0, '/ceph-jd/prod/jupyter/bixiao/notebooks/Workspace/Codes/CV/BEVFormer-master/')
7 | print(sys.path)
8 | import tools
9 | print(tools)
10 | from tools.data_converter.s3dis_data_utils import S3DISData, S3DISSegData
11 | from tools.data_converter.scannet_data_utils import ScanNetData, ScanNetSegData
12 | from tools.data_converter.sunrgbd_data_utils import SUNRGBDData
13 |
14 |
15 | def create_indoor_info_file(data_path,
16 | pkl_prefix='sunrgbd',
17 | save_path=None,
18 | use_v1=False,
19 | workers=4):
20 | """Create indoor information file.
21 |
22 | Get information of the raw data and save it to the pkl file.
23 |
24 | Args:
25 | data_path (str): Path of the data.
26 | pkl_prefix (str): Prefix of the pkl to be saved. Default: 'sunrgbd'.
27 | save_path (str): Path of the pkl to be saved. Default: None.
28 | use_v1 (bool): Whether to use v1. Default: False.
29 | workers (int): Number of threads to be used. Default: 4.
30 | """
31 | assert os.path.exists(data_path)
32 | assert pkl_prefix in ['sunrgbd', 'scannet', 's3dis'], \
33 | f'unsupported indoor dataset {pkl_prefix}'
34 | save_path = data_path if save_path is None else save_path
35 | assert os.path.exists(save_path)
36 |
37 | # generate infos for both detection and segmentation task
38 | if pkl_prefix in ['sunrgbd', 'scannet']:
39 | train_filename = os.path.join(save_path,
40 | f'{pkl_prefix}_infos_train.pkl')
41 | val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl')
42 | if pkl_prefix == 'sunrgbd':
43 | # SUN RGB-D has a train-val split
44 | train_dataset = SUNRGBDData(
45 | root_path=data_path, split='train', use_v1=use_v1)
46 | val_dataset = SUNRGBDData(
47 | root_path=data_path, split='val', use_v1=use_v1)
48 | else:
49 | # ScanNet has a train-val-test split
50 | train_dataset = ScanNetData(root_path=data_path, split='train')
51 | val_dataset = ScanNetData(root_path=data_path, split='val')
52 | test_dataset = ScanNetData(root_path=data_path, split='test')
53 | test_filename = os.path.join(save_path,
54 | f'{pkl_prefix}_infos_test.pkl')
55 |
56 | infos_train = train_dataset.get_infos(
57 | num_workers=workers, has_label=True)
58 | mmcv.dump(infos_train, train_filename, 'pkl')
59 | print(f'{pkl_prefix} info train file is saved to {train_filename}')
60 |
61 | infos_val = val_dataset.get_infos(num_workers=workers, has_label=True)
62 | mmcv.dump(infos_val, val_filename, 'pkl')
63 | print(f'{pkl_prefix} info val file is saved to {val_filename}')
64 |
65 | if pkl_prefix == 'scannet':
66 | infos_test = test_dataset.get_infos(
67 | num_workers=workers, has_label=False)
68 | mmcv.dump(infos_test, test_filename, 'pkl')
69 | print(f'{pkl_prefix} info test file is saved to {test_filename}')
70 |
71 | # generate infos for the semantic segmentation task
72 | # e.g. re-sampled scene indexes and label weights
73 | # scene indexes are used to re-sample rooms with different number of points
74 | # label weights are used to balance classes with different number of points
75 | if pkl_prefix == 'scannet':
76 | # label weight computation function is adopted from
77 | # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
78 | train_dataset = ScanNetSegData(
79 | data_root=data_path,
80 | ann_file=train_filename,
81 | split='train',
82 | num_points=8192,
83 | label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
84 | # TODO: do we need to generate on val set?
85 | val_dataset = ScanNetSegData(
86 | data_root=data_path,
87 | ann_file=val_filename,
88 | split='val',
89 | num_points=8192,
90 | label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
91 | # no need to generate for test set
92 | train_dataset.get_seg_infos()
93 | val_dataset.get_seg_infos()
94 | elif pkl_prefix == 's3dis':
95 | # S3DIS doesn't have a fixed train-val split
96 | # it has 6 areas instead, so we generate info file for each of them
97 | # in training, we will use dataset to wrap different areas
98 | splits = [f'Area_{i}' for i in [1, 2, 3, 4, 5, 6]]
99 | for split in splits:
100 | dataset = S3DISData(root_path=data_path, split=split)
101 | info = dataset.get_infos(num_workers=workers, has_label=True)
102 | filename = os.path.join(save_path,
103 | f'{pkl_prefix}_infos_{split}.pkl')
104 | mmcv.dump(info, filename, 'pkl')
105 | print(f'{pkl_prefix} info {split} file is saved to {filename}')
106 | seg_dataset = S3DISSegData(
107 | data_root=data_path,
108 | ann_file=filename,
109 | split=split,
110 | num_points=4096,
111 | label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
112 | seg_dataset.get_seg_infos()
113 |
--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/opt/adamw.py:
--------------------------------------------------------------------------------
1 | try:
2 | from torch.optim import _functional as F
3 | except:
4 | print('WARNING!!!, I recommend using torch>=1.8')
5 |
6 | import torch
7 | from torch.optim.optimizer import Optimizer
8 | from mmcv.runner.optimizer.builder import OPTIMIZERS
9 |
10 | @OPTIMIZERS.register_module()
11 | class AdamW2(Optimizer):
12 | r"""Implements AdamW algorithm. Solve the bug of torch 1.8
13 |
14 | The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_.
15 | The AdamW variant was proposed in `Decoupled Weight Decay Regularization`_.
16 |
17 | Args:
18 | params (iterable): iterable of parameters to optimize or dicts defining
19 | parameter groups
20 | lr (float, optional): learning rate (default: 1e-3)
21 | betas (Tuple[float, float], optional): coefficients used for computing
22 | running averages of gradient and its square (default: (0.9, 0.999))
23 | eps (float, optional): term added to the denominator to improve
24 | numerical stability (default: 1e-8)
25 | weight_decay (float, optional): weight decay coefficient (default: 1e-2)
26 | amsgrad (boolean, optional): whether to use the AMSGrad variant of this
27 | algorithm from the paper `On the Convergence of Adam and Beyond`_
28 | (default: False)
29 |
30 | .. _Adam\: A Method for Stochastic Optimization:
31 | https://arxiv.org/abs/1412.6980
32 | .. _Decoupled Weight Decay Regularization:
33 | https://arxiv.org/abs/1711.05101
34 | .. _On the Convergence of Adam and Beyond:
35 | https://openreview.net/forum?id=ryQu7f-RZ
36 | """
37 |
38 | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
39 | weight_decay=1e-2, amsgrad=False):
40 | if not 0.0 <= lr:
41 | raise ValueError("Invalid learning rate: {}".format(lr))
42 | if not 0.0 <= eps:
43 | raise ValueError("Invalid epsilon value: {}".format(eps))
44 | if not 0.0 <= betas[0] < 1.0:
45 | raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
46 | if not 0.0 <= betas[1] < 1.0:
47 | raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
48 | if not 0.0 <= weight_decay:
49 | raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
50 | defaults = dict(lr=lr, betas=betas, eps=eps,
51 | weight_decay=weight_decay, amsgrad=amsgrad)
52 | super(AdamW2, self).__init__(params, defaults)
53 |
54 | def __setstate__(self, state):
55 | super(AdamW2, self).__setstate__(state)
56 | for group in self.param_groups:
57 | group.setdefault('amsgrad', False)
58 |
59 | @torch.no_grad()
60 | def step(self, closure=None):
61 | """Performs a single optimization step.
62 |
63 | Args:
64 | closure (callable, optional): A closure that reevaluates the model
65 | and returns the loss.
66 | """
67 | loss = None
68 | if closure is not None:
69 | with torch.enable_grad():
70 | loss = closure()
71 |
72 | for group in self.param_groups:
73 | params_with_grad = []
74 | grads = []
75 | exp_avgs = []
76 | exp_avg_sqs = []
77 | state_sums = []
78 | max_exp_avg_sqs = []
79 | state_steps = []
80 | amsgrad = group['amsgrad']
81 |
82 | # put this line here for solving bug
83 | beta1, beta2 = group['betas']
84 |
85 | for p in group['params']:
86 | if p.grad is None:
87 | continue
88 | params_with_grad.append(p)
89 | if p.grad.is_sparse:
90 | raise RuntimeError('AdamW does not support sparse gradients')
91 | grads.append(p.grad)
92 |
93 | state = self.state[p]
94 |
95 | # State initialization
96 | if len(state) == 0:
97 | state['step'] = 0
98 | # Exponential moving average of gradient values
99 | state['exp_avg'] = torch.zeros_like(p, memory_format=torch.preserve_format)
100 | # Exponential moving average of squared gradient values
101 | state['exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)
102 | if amsgrad:
103 | # Maintains max of all exp. moving avg. of sq. grad. values
104 | state['max_exp_avg_sq'] = torch.zeros_like(p, memory_format=torch.preserve_format)
105 |
106 | exp_avgs.append(state['exp_avg'])
107 | exp_avg_sqs.append(state['exp_avg_sq'])
108 |
109 | if amsgrad:
110 | max_exp_avg_sqs.append(state['max_exp_avg_sq'])
111 |
112 |
113 | # update the steps for each param group update
114 | state['step'] += 1
115 | # record the step after step update
116 | state_steps.append(state['step'])
117 |
118 | F.adamw(params_with_grad,
119 | grads,
120 | exp_avgs,
121 | exp_avg_sqs,
122 | max_exp_avg_sqs,
123 | state_steps,
124 | amsgrad,
125 | beta1,
126 | beta2,
127 | group['lr'],
128 | group['weight_decay'],
129 | group['eps'])
130 |
131 | return loss
--------------------------------------------------------------------------------
/tools/model_converters/convert_votenet_checkpoints.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import tempfile
4 | import torch
5 | from mmcv import Config
6 | from mmcv.runner import load_state_dict
7 |
8 | from mmdet3d.models import build_detector
9 |
10 |
11 | def parse_args():
12 | parser = argparse.ArgumentParser(
13 | description='MMDet3D upgrade model version(before v0.6.0) of VoteNet')
14 | parser.add_argument('checkpoint', help='checkpoint file')
15 | parser.add_argument('--out', help='path of the output checkpoint file')
16 | args = parser.parse_args()
17 | return args
18 |
19 |
20 | def parse_config(config_strings):
21 | """Parse config from strings.
22 |
23 | Args:
24 | config_strings (string): strings of model config.
25 |
26 | Returns:
27 | Config: model config
28 | """
29 | temp_file = tempfile.NamedTemporaryFile()
30 | config_path = f'{temp_file.name}.py'
31 | with open(config_path, 'w') as f:
32 | f.write(config_strings)
33 |
34 | config = Config.fromfile(config_path)
35 |
36 | # Update backbone config
37 | if 'pool_mod' in config.model.backbone:
38 | config.model.backbone.pop('pool_mod')
39 |
40 | if 'sa_cfg' not in config.model.backbone:
41 | config.model.backbone['sa_cfg'] = dict(
42 | type='PointSAModule',
43 | pool_mod='max',
44 | use_xyz=True,
45 | normalize_xyz=True)
46 |
47 | if 'type' not in config.model.bbox_head.vote_aggregation_cfg:
48 | config.model.bbox_head.vote_aggregation_cfg['type'] = 'PointSAModule'
49 |
50 | # Update bbox_head config
51 | if 'pred_layer_cfg' not in config.model.bbox_head:
52 | config.model.bbox_head['pred_layer_cfg'] = dict(
53 | in_channels=128, shared_conv_channels=(128, 128), bias=True)
54 |
55 | if 'feat_channels' in config.model.bbox_head:
56 | config.model.bbox_head.pop('feat_channels')
57 |
58 | if 'vote_moudule_cfg' in config.model.bbox_head:
59 | config.model.bbox_head['vote_module_cfg'] = config.model.bbox_head.pop(
60 | 'vote_moudule_cfg')
61 |
62 | if config.model.bbox_head.vote_aggregation_cfg.use_xyz:
63 | config.model.bbox_head.vote_aggregation_cfg.mlp_channels[0] -= 3
64 |
65 | temp_file.close()
66 |
67 | return config
68 |
69 |
70 | def main():
71 | """Convert keys in checkpoints for VoteNet.
72 |
73 | There can be some breaking changes during the development of mmdetection3d,
74 | and this tool is used for upgrading checkpoints trained with old versions
75 | (before v0.6.0) to the latest one.
76 | """
77 | args = parse_args()
78 | checkpoint = torch.load(args.checkpoint)
79 | cfg = parse_config(checkpoint['meta']['config'])
80 | # Build the model and load checkpoint
81 | model = build_detector(
82 | cfg.model,
83 | train_cfg=cfg.get('train_cfg'),
84 | test_cfg=cfg.get('test_cfg'))
85 | orig_ckpt = checkpoint['state_dict']
86 | converted_ckpt = orig_ckpt.copy()
87 |
88 | if cfg['dataset_type'] == 'ScanNetDataset':
89 | NUM_CLASSES = 18
90 | elif cfg['dataset_type'] == 'SUNRGBDDataset':
91 | NUM_CLASSES = 10
92 | else:
93 | raise NotImplementedError
94 |
95 | RENAME_PREFIX = {
96 | 'bbox_head.conv_pred.0': 'bbox_head.conv_pred.shared_convs.layer0',
97 | 'bbox_head.conv_pred.1': 'bbox_head.conv_pred.shared_convs.layer1'
98 | }
99 |
100 | DEL_KEYS = [
101 | 'bbox_head.conv_pred.0.bn.num_batches_tracked',
102 | 'bbox_head.conv_pred.1.bn.num_batches_tracked'
103 | ]
104 |
105 | EXTRACT_KEYS = {
106 | 'bbox_head.conv_pred.conv_cls.weight':
107 | ('bbox_head.conv_pred.conv_out.weight', [(0, 2), (-NUM_CLASSES, -1)]),
108 | 'bbox_head.conv_pred.conv_cls.bias':
109 | ('bbox_head.conv_pred.conv_out.bias', [(0, 2), (-NUM_CLASSES, -1)]),
110 | 'bbox_head.conv_pred.conv_reg.weight':
111 | ('bbox_head.conv_pred.conv_out.weight', [(2, -NUM_CLASSES)]),
112 | 'bbox_head.conv_pred.conv_reg.bias':
113 | ('bbox_head.conv_pred.conv_out.bias', [(2, -NUM_CLASSES)])
114 | }
115 |
116 | # Delete some useless keys
117 | for key in DEL_KEYS:
118 | converted_ckpt.pop(key)
119 |
120 | # Rename keys with specific prefix
121 | RENAME_KEYS = dict()
122 | for old_key in converted_ckpt.keys():
123 | for rename_prefix in RENAME_PREFIX.keys():
124 | if rename_prefix in old_key:
125 | new_key = old_key.replace(rename_prefix,
126 | RENAME_PREFIX[rename_prefix])
127 | RENAME_KEYS[new_key] = old_key
128 | for new_key, old_key in RENAME_KEYS.items():
129 | converted_ckpt[new_key] = converted_ckpt.pop(old_key)
130 |
131 | # Extract weights and rename the keys
132 | for new_key, (old_key, indices) in EXTRACT_KEYS.items():
133 | cur_layers = orig_ckpt[old_key]
134 | converted_layers = []
135 | for (start, end) in indices:
136 | if end != -1:
137 | converted_layers.append(cur_layers[start:end])
138 | else:
139 | converted_layers.append(cur_layers[start:])
140 | converted_layers = torch.cat(converted_layers, 0)
141 | converted_ckpt[new_key] = converted_layers
142 | if old_key in converted_ckpt.keys():
143 | converted_ckpt.pop(old_key)
144 |
145 | # Check the converted checkpoint by loading to the model
146 | load_state_dict(model, converted_ckpt, strict=True)
147 | checkpoint['state_dict'] = converted_ckpt
148 | torch.save(checkpoint, args.out)
149 |
150 |
151 | if __name__ == '__main__':
152 | main()
153 |
--------------------------------------------------------------------------------