├── FreeWorld ├── __init__.py ├── utils │ ├── __init__.py │ ├── coord_transformation.py │ └── ros_coord_to_bev_coord.py ├── can_bus │ ├── __init__.py │ └── can_bus.py ├── map_expansion │ └── __init__.py └── tools │ ├── map2img.py │ ├── ego2img.py │ ├── split_dataset.py │ └── fix_isaac_annotation_coord.py ├── projects ├── __init__.py ├── mmdet3d_plugin │ ├── models │ │ ├── opt │ │ │ └── __init__.py │ │ ├── hooks │ │ │ ├── __init__.py │ │ │ └── hooks.py │ │ ├── backbones │ │ │ └── __init__.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── bricks.py │ │ │ ├── visual.py │ │ │ ├── position_embedding.py │ │ │ ├── embed.py │ │ │ └── grid_mask.py │ ├── bevformer │ │ ├── hooks │ │ │ ├── __init__.py │ │ │ └── custom_hooks.py │ │ ├── dense_heads │ │ │ └── __init__.py │ │ ├── runner │ │ │ ├── __init__.py │ │ │ └── epoch_based_runner.py │ │ ├── detectors │ │ │ ├── __init__.py │ │ │ └── bevformer_fp16.py │ │ ├── apis │ │ │ ├── __init__.py │ │ │ └── train.py │ │ ├── __init__.py │ │ └── modules │ │ │ └── __init__.py │ ├── core │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── metric_motion.py │ │ │ └── eval_hooks.py │ │ └── bbox │ │ │ ├── structures │ │ │ └── __init__.py │ │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ └── match_cost.py │ │ │ ├── assigners │ │ │ └── __init__.py │ │ │ ├── coders │ │ │ ├── __init__.py │ │ │ └── nms_free_coder.py │ │ │ └── util.py │ ├── VAD │ │ ├── runner │ │ │ ├── __init__.py │ │ │ └── epoch_based_runner.py │ │ ├── hooks │ │ │ ├── __init__.py │ │ │ └── custom_hooks.py │ │ ├── apis │ │ │ ├── __init__.py │ │ │ └── train.py │ │ ├── __init__.py │ │ ├── modules │ │ │ └── __init__.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── traj_lr_warmup.py │ │ │ └── map_utils.py │ ├── datasets │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── sampler.py │ │ │ ├── distributed_sampler.py │ │ │ └── group_sampler.py │ │ ├── __init__.py │ │ ├── detection_cvpr_2019.json │ │ ├── pipelines │ │ │ ├── __init__.py │ │ │ └── formating.py │ │ └── vad_nusc_detection_cvpr_2019.json │ └── __init__.py └── configs │ ├── _base_ │ ├── models │ │ ├── paconv_cuda_ssg.py │ │ ├── hv_pointpillars_fpn_lyft.py │ │ ├── hv_pointpillars_fpn_range100_lyft.py │ │ ├── pointnet2_msg.py │ │ ├── pointnet2_ssg.py │ │ ├── paconv_ssg.py │ │ ├── fcos3d.py │ │ ├── votenet.py │ │ ├── groupfree3d.py │ │ ├── hv_second_secfpn_kitti.py │ │ ├── 3dssd.py │ │ ├── hv_pointpillars_secfpn_kitti.py │ │ ├── centerpoint_02pillar_second_secfpn_nus.py │ │ ├── centerpoint_01voxel_second_secfpn_nus.py │ │ ├── hv_pointpillars_fpn_nus.py │ │ ├── hv_second_secfpn_waymo.py │ │ ├── imvotenet_image.py │ │ ├── hv_pointpillars_secfpn_waymo.py │ │ └── mask_rcnn_r50_fpn.py │ ├── schedules │ │ ├── mmdet_schedule_1x.py │ │ ├── seg_cosine_200e.py │ │ ├── seg_cosine_50e.py │ │ ├── seg_cosine_150e.py │ │ ├── schedule_3x.py │ │ ├── schedule_2x.py │ │ ├── cosine.py │ │ ├── cyclic_20e.py │ │ └── cyclic_40e.py │ ├── default_runtime.py │ └── datasets │ │ ├── coco_instance.py │ │ ├── nuim_instance.py │ │ ├── nus-mono3d.py │ │ ├── sunrgbd-3d-10class.py │ │ ├── s3dis-3d-5class.py │ │ ├── scannet-3d-18class.py │ │ ├── scannet_seg-3d-20class.py │ │ ├── s3dis_seg-3d-13class.py │ │ └── kitti-3d-car.py │ └── datasets │ └── custom_waymo-3d.py ├── tools ├── analysis_tools │ ├── __init__.py │ ├── get_params.py │ ├── serialize_bev_results.py │ ├── benchmark.py │ └── ros_coord_to_bev_coord.py ├── data_converter │ └── __init__.py ├── dist_train.sh ├── dist_test.sh ├── misc │ ├── print_config.py │ ├── visualize_results.py │ └── fuse_conv_bn.py └── model_converters │ ├── publish_model.py │ └── regnet2mmdet.py ├── assets └── arch.png ├── figs ├── ExperimentResults.png ├── qualitative_analysis.jpg └── VAD_qualitative_results.jpg ├── vis.sh ├── docs ├── visualization.md ├── train_eval.md ├── install.md └── prepare_dataset.md ├── prepare_data.sh ├── fine_tune.sh ├── CoordinateSystem.md ├── train_stage_2.sh ├── train_stage_1.sh ├── test.sh └── .gitignore /FreeWorld/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /projects/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /FreeWorld/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /FreeWorld/can_bus/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tools/analysis_tools/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /FreeWorld/map_expansion/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/opt/__init__.py: -------------------------------------------------------------------------------- 1 | from .adamw import AdamW2 -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .hooks import GradChecker -------------------------------------------------------------------------------- /assets/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIR-DISCOVER/FreeAD/HEAD/assets/arch.png -------------------------------------------------------------------------------- /tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom_hooks import TransferWeight -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval_hooks import CustomDistEvalHook -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer_head import BEVFormerHead -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/VAD/runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .epoch_based_runner import EpochBasedRunner_video -------------------------------------------------------------------------------- /FreeWorld/utils/coord_transformation.py: -------------------------------------------------------------------------------- 1 | 2 | def RightHandCoord2LeftHandCoord(x, y): 3 | return -y, x -------------------------------------------------------------------------------- /figs/ExperimentResults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIR-DISCOVER/FreeAD/HEAD/figs/ExperimentResults.png -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .epoch_based_runner import EpochBasedRunner_video -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .vovnet import VoVNet 2 | 3 | __all__ = ['VoVNet'] -------------------------------------------------------------------------------- /figs/qualitative_analysis.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIR-DISCOVER/FreeAD/HEAD/figs/qualitative_analysis.jpg -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/VAD/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom_hooks import TransferWeight, CustomSetEpochInfoHook -------------------------------------------------------------------------------- /figs/VAD_qualitative_results.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIR-DISCOVER/FreeAD/HEAD/figs/VAD_qualitative_results.jpg -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer import BEVFormer 2 | from .bevformer_fp16 import BEVFormer_fp16 -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/structures/__init__.py: -------------------------------------------------------------------------------- 1 | from .lidar_box3d import CustomLiDARInstance3DBoxes 2 | 3 | __all__ = ['CustomLiDARInstance3DBoxes'] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/VAD/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .train import custom_train_model 2 | from .mmdet_train import custom_train_detector 3 | # from .test import custom_multi_gpu_test -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .train import custom_train_model 2 | from .mmdet_train import custom_train_detector 3 | # from .test import custom_multi_gpu_test -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .dense_heads import * 3 | from .detectors import * 4 | from .modules import * 5 | from .runner import * 6 | from .hooks import * 7 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox.match_costs import build_match_cost 2 | from .match_cost import BBox3DL1Cost 3 | 4 | __all__ = ['build_match_cost', 'BBox3DL1Cost'] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_sampler import DistributedGroupSampler 2 | from .distributed_sampler import DistributedSampler 3 | from .sampler import SAMPLER, build_sampler 4 | 5 | -------------------------------------------------------------------------------- /vis.sh: -------------------------------------------------------------------------------- 1 | 2 | python tools/analysis_tools/visualization.py --result-path /home/tsinghuaair/pengyh/FreeAD/my_project/FreeAD/test/FreeAskWorld/Sun_Sep_14_06_19_20_2025/pts_bbox/results_nusc.pkl --save-path path/to/save/visualization/results 3 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .nuscenes_vad_dataset import VADCustomNuScenesDataset 2 | from .FreeAD_dataset import FreeWorldDataset 3 | 4 | __all__ = [ 5 | 'VADCustomNuScenesDataset', 6 | 'FreeWorldDataset' 7 | ] 8 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .bricks import run_time 3 | from .grid_mask import GridMask 4 | from .position_embedding import RelPositionEmbedding 5 | from .visual import save_tensor 6 | from .embed import PatchEmbed -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .hungarian_assigner_3d import HungarianAssigner3D 2 | from .map_hungarian_assigner_3d import MapHungarianAssigner3D 3 | 4 | __all__ = ['HungarianAssigner3D', 'MapHungarianAssigner3D'] 5 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/paconv_cuda_ssg.py: -------------------------------------------------------------------------------- 1 | _base_ = './paconv_ssg.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | sa_cfg=dict( 6 | type='PAConvCUDASAModule', 7 | scorenet_cfg=dict(mlp_channels=[8, 16, 16])))) 8 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/sampler.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils.registry import Registry, build_from_cfg 2 | 3 | SAMPLER = Registry('sampler') 4 | 5 | 6 | def build_sampler(cfg, default_args): 7 | return build_from_cfg(cfg, SAMPLER, default_args) 8 | -------------------------------------------------------------------------------- /tools/analysis_tools/get_params.py: -------------------------------------------------------------------------------- 1 | import torch 2 | YOUR_CKPT_PATH = None 3 | file_path = YOUR_CKPT_PATH 4 | model = torch.load(file_path, map_location='cpu') 5 | all = 0 6 | for key in list(model['state_dict'].keys()): 7 | all += model['state_dict'][key].nelement() 8 | print(all) 9 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/VAD/__init__.py: -------------------------------------------------------------------------------- 1 | from .modules import * 2 | from .runner import * 3 | from .hooks import * 4 | 5 | from .VAD import VAD 6 | from .VAD_head import VADHead 7 | from .VAD_transformer import VADPerceptionTransformer, \ 8 | CustomTransformerDecoder, MapDetectionTransformerDecoder -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_free_coder import NMSFreeCoder 2 | from .fut_nms_free_coder import CustomNMSFreeCoder 3 | from .map_nms_free_coder import MapNMSFreeCoder 4 | 5 | __all__ = ['NMSFreeCoder', 6 | 'CustomNMSFreeCoder', 7 | 'MapNMSFreeCoder'] 8 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-28509} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic 10 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29503} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --eval bbox 11 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/VAD/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import PerceptionTransformer 2 | from .spatial_cross_attention import SpatialCrossAttention, MSDeformableAttention3D 3 | from .temporal_self_attention import TemporalSelfAttention 4 | from .encoder import BEVFormerEncoder, BEVFormerLayer 5 | from .decoder import DetectionTransformerDecoder -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import PerceptionTransformer 2 | from .spatial_cross_attention import SpatialCrossAttention, MSDeformableAttention3D 3 | from .temporal_self_attention import TemporalSelfAttention 4 | from .encoder import BEVFormerEncoder, BEVFormerLayer 5 | from .decoder import DetectionTransformerDecoder 6 | 7 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/VAD/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .map_utils import normalize_2d_bbox, normalize_2d_pts, denormalize_2d_bbox, denormalize_2d_pts 2 | from .CD_loss import ( 3 | MyChamferDistance, MyChamferDistanceCost, 4 | OrderedPtsL1Cost, PtsL1Cost, OrderedPtsSmoothL1Cost, 5 | OrderedPtsL1Loss, PtsL1Loss, PtsDirCosLoss 6 | ) 7 | from .plan_loss import PlanMapBoundLoss, PlanCollisionLoss, PlanMapDirectionLoss -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/mmdet_schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_200e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on ScanNet dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=200) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_50e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=50) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_150e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=150) 10 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/VAD/utils/traj_lr_warmup.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def get_traj_warmup_loss_weight( 4 | cur_epoch, 5 | tot_epoch, 6 | start_pos=0.3, 7 | end_pos=0.35, 8 | scale_weight=1.1 9 | ): 10 | epoch_percentage = cur_epoch / tot_epoch 11 | sigmoid_input = 5 / (end_pos-start_pos) * epoch_percentage - 2.5 * (end_pos+start_pos) / (end_pos - start_pos) 12 | 13 | return scale_weight * torch.sigmoid(torch.tensor(sigmoid_input)) 14 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/schedule_3x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on indoor dataset, 3 | # e.g., VoteNet on SUNRGBD and ScanNet 4 | lr = 0.008 # max learning rate 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32]) 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=36) 10 | -------------------------------------------------------------------------------- /docs/visualization.md: -------------------------------------------------------------------------------- 1 | 4 | 5 | ## Visualize prediction 6 | 7 | ```shell 8 | cd /path/to/FreeAD/ 9 | conda activate freead 10 | sh vis.sh 11 | ``` 12 | 13 | The inference results is a prefix_results_nusc.pkl automaticly saved to the work_dir after running evaluation. It's a list of prediction results for each validation sample. 14 | -------------------------------------------------------------------------------- /prepare_data.sh: -------------------------------------------------------------------------------- 1 | 2 | # 设置变量 3 | DataSet="FreeWorld" 4 | ROOT_PATH="./data/$DataSet" 5 | OUT_DIR="./data/$DataSet" 6 | EXTRA_TAG="vad_nuscenes" 7 | VERSION="v1.0" 8 | CANBUS_PATH="./data/$DataSet/can_bus" 9 | 10 | export PYTHONPATH=$PYTHONPATH:/home/tsinghuaair/pengyh/FreeAD/my_project/FreeAD 11 | # 提取数据集,生成pkl文件 12 | python tools/data_converter/freead_data_converter.py nuscenes --root-path "$ROOT_PATH" --out-dir "$OUT_DIR" --extra-tag "$EXTRA_TAG" --version "$VERSION" --canbus "$CANBUS_PATH" 13 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/hooks/hooks.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.hooks.hook import HOOKS, Hook 2 | from projects.mmdet3d_plugin.models.utils import run_time 3 | 4 | 5 | @HOOKS.register_module() 6 | class GradChecker(Hook): 7 | 8 | def after_train_iter(self, runner): 9 | for key, val in runner.model.named_parameters(): 10 | if val.grad == None and val.requires_grad: 11 | print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key)) 12 | 13 | 14 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.hooks.hook import HOOKS, Hook 2 | from projects.mmdet3d_plugin.models.utils import run_time 3 | 4 | 5 | @HOOKS.register_module() 6 | class TransferWeight(Hook): 7 | 8 | def __init__(self, every_n_inters=1): 9 | self.every_n_inters=every_n_inters 10 | 11 | def after_train_iter(self, runner): 12 | if self.every_n_inner_iters(runner, self.every_n_inters): 13 | runner.eval_model.load_state_dict(runner.model.state_dict()) 14 | 15 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on nuScenes dataset 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01) 4 | # max_norm=10 is better for SECOND 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 6 | lr_config = dict( 7 | policy='step', 8 | warmup='linear', 9 | warmup_iters=1000, 10 | warmup_ratio=1.0 / 1000, 11 | step=[20, 23]) 12 | momentum_config = None 13 | # runtime settings 14 | runner = dict(type='EpochBasedRunner', max_epochs=24) 15 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/detection_cvpr_2019.json: -------------------------------------------------------------------------------- 1 | { 2 | "class_range": { 3 | "car": 50, 4 | "truck": 50, 5 | "bus": 50, 6 | "trailer": 50, 7 | "construction_vehicle": 50, 8 | "pedestrian": 40, 9 | "motorcycle": 40, 10 | "bicycle": 40, 11 | "traffic_cone": 30, 12 | "barrier": 30 13 | }, 14 | "dist_fcn": "center_distance", 15 | "dist_ths": [0.5, 1.0, 2.0, 4.0], 16 | "dist_th_tp": 2.0, 17 | "min_recall": 0.1, 18 | "min_precision": 0.1, 19 | "max_boxes_per_sample": 500, 20 | "mean_ap_weight": 5 21 | } 22 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/__init__.py: -------------------------------------------------------------------------------- 1 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D 2 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder 3 | from .core.bbox.match_costs import BBox3DL1Cost 4 | from .core.evaluation.eval_hooks import CustomDistEvalHook 5 | from .datasets.pipelines import ( 6 | PhotoMetricDistortionMultiViewImage, PadMultiViewImage, 7 | NormalizeMultiviewImage, CustomCollect3D) 8 | from .models.backbones.vovnet import VoVNet 9 | from .models.utils import * 10 | from .models.opt.adamw import AdamW2 11 | from .VAD import * 12 | -------------------------------------------------------------------------------- /projects/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=50, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook') 11 | ]) 12 | # yapf:enable 13 | dist_params = dict(backend='nccl') 14 | log_level = 'INFO' 15 | work_dir = None 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cosine.py: -------------------------------------------------------------------------------- 1 | # This schedule is mainly used by models with dynamic voxelization 2 | # optimizer 3 | lr = 0.003 # max learning rate 4 | optimizer = dict( 5 | type='AdamW', 6 | lr=lr, 7 | betas=(0.95, 0.99), # the momentum is change during training 8 | weight_decay=0.001) 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 10 | 11 | lr_config = dict( 12 | policy='CosineAnnealing', 13 | warmup='linear', 14 | warmup_iters=1000, 15 | warmup_ratio=1.0 / 10, 16 | min_lr_ratio=1e-5) 17 | 18 | momentum_config = None 19 | 20 | runner = dict(type='EpochBasedRunner', max_epochs=40) 21 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .transform_3d import ( 2 | PadMultiViewImage, NormalizeMultiviewImage, 3 | PhotoMetricDistortionMultiViewImage, CustomCollect3D, 4 | RandomScaleImageMultiViewImage, CustomObjectRangeFilter, CustomObjectNameFilter) 5 | from .formating import CustomDefaultFormatBundle3D 6 | from .loading import CustomLoadPointsFromFile, CustomLoadPointsFromMultiSweeps 7 | 8 | __all__ = [ 9 | 'PadMultiViewImage', 'NormalizeMultiviewImage', 10 | 'PhotoMetricDistortionMultiViewImage', 'CustomDefaultFormatBundle3D', 11 | 'CustomCollect3D', 'RandomScaleImageMultiViewImage', 12 | 'CustomObjectRangeFilter', 'CustomObjectNameFilter', 13 | 'CustomLoadPointsFromFile', 'CustomLoadPointsFromMultiSweeps' 14 | ] -------------------------------------------------------------------------------- /tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | from mmcv import Config, DictAction 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser(description='Print the whole config') 8 | parser.add_argument('config', help='config file path') 9 | parser.add_argument( 10 | '--options', nargs='+', action=DictAction, help='arguments in dict') 11 | args = parser.parse_args() 12 | 13 | return args 14 | 15 | 16 | def main(): 17 | args = parse_args() 18 | 19 | cfg = Config.fromfile(args.config) 20 | if args.options is not None: 21 | cfg.merge_from_dict(args.options) 22 | print(f'Config:\n{cfg.pretty_text}') 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/bricks.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import time 3 | from collections import defaultdict 4 | import torch 5 | time_maps = defaultdict(lambda :0.) 6 | count_maps = defaultdict(lambda :0.) 7 | def run_time(name): 8 | def middle(fn): 9 | def wrapper(*args, **kwargs): 10 | torch.cuda.synchronize() 11 | start = time.time() 12 | res = fn(*args, **kwargs) 13 | torch.cuda.synchronize() 14 | time_maps['%s : %s'%(name, fn.__name__) ] += time.time()-start 15 | count_maps['%s : %s'%(name, fn.__name__) ] +=1 16 | print("%s : %s takes up %f "% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] )) 17 | return res 18 | return wrapper 19 | return middle 20 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/visual.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision.utils import make_grid 3 | import torchvision 4 | import matplotlib.pyplot as plt 5 | import cv2 6 | 7 | 8 | def convert_color(img_path): 9 | plt.figure() 10 | img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 11 | plt.imsave(img_path, img, cmap=plt.get_cmap('viridis')) 12 | plt.close() 13 | 14 | 15 | def save_tensor(tensor, path, pad_value=254.0,): 16 | print('save_tensor', path) 17 | tensor = tensor.to(torch.float).detach().cpu() 18 | if tensor.type() == 'torch.BoolTensor': 19 | tensor = tensor*255 20 | if len(tensor.shape) == 3: 21 | tensor = tensor.unsqueeze(1) 22 | tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy() 23 | torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path) 24 | convert_color(path) 25 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cyclic_20e.py: -------------------------------------------------------------------------------- 1 | # For nuScenes dataset, we usually evaluate the model at the end of training. 2 | # Since the models are trained by 24 epochs by default, we set evaluation 3 | # interval to be 20. Please change the interval accordingly if you do not 4 | # use a default schedule. 5 | # optimizer 6 | # This schedule is mainly used by models on nuScenes dataset 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01) 8 | # max_norm=10 is better for SECOND 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 10 | lr_config = dict( 11 | policy='cyclic', 12 | target_ratio=(10, 1e-4), 13 | cyclic_times=1, 14 | step_ratio_up=0.4, 15 | ) 16 | momentum_config = dict( 17 | policy='cyclic', 18 | target_ratio=(0.85 / 0.95, 1), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | 23 | # runtime settings 24 | runner = dict(type='EpochBasedRunner', max_epochs=20) 25 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/vad_nusc_detection_cvpr_2019.json: -------------------------------------------------------------------------------- 1 | { 2 | "class_range_x": { 3 | "car": 30, 4 | "truck": 30, 5 | "bus": 30, 6 | "trailer": 30, 7 | "construction_vehicle": 30, 8 | "pedestrian": 30, 9 | "motorcycle": 30, 10 | "bicycle": 30, 11 | "traffic_cone": 30, 12 | "barrier": 30 13 | }, 14 | "class_range_y": { 15 | "car": 15, 16 | "truck": 15, 17 | "bus": 15, 18 | "trailer": 15, 19 | "construction_vehicle": 15, 20 | "pedestrian": 15, 21 | "motorcycle": 15, 22 | "bicycle": 15, 23 | "traffic_cone": 15, 24 | "barrier": 15 25 | }, 26 | "dist_fcn": "center_distance", 27 | "dist_ths": [0.5, 1.0, 2.0, 4.0], 28 | "dist_th_tp": 2.0, 29 | "min_recall": 0.1, 30 | "min_precision": 0.1, 31 | "max_boxes_per_sample": 500, 32 | "mean_ap_weight": 5 33 | } 34 | -------------------------------------------------------------------------------- /fine_tune.sh: -------------------------------------------------------------------------------- 1 | 2 | # train with 8 card 3 | #**NOTE**: We release two types of training configs: the end-to-end configs and the two-stage (stage-1: Perception & Prediction; stage-2: Planning) configs. They should produce similar results. 4 | #The two-stage configs are recommended because you can just train the stage-1 model once and use it as a pre-train model for stage-2. 5 | #最好选择两个阶段的config,一个阶段一个阶段的训练,前面的可以作为pretrained model 6 | export CUDA_VISIBLE_DEVICES="0,1,2" # 在 多卡的情况下指定 7 | python -m torch.distributed.run --nproc_per_node=3 --master_port=2734 tools/fine_tune.py projects/configs/FreeAD/FreeAD_base_map_fine_tune.py --launcher pytorch --deterministic --work-dir path/to/save/outputs 8 | 9 | # train with 1 card 10 | #python tools/train.py projects/configs/VAD/FreeAD_base.py --deterministic --work-dir path/to/save/outputs 11 | 12 | 13 | # test 14 | #python tools1/check_train_data.py projects/configs/VAD/VAD_base_issac.py --deterministic --work-dir path/to/save/outputs -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/VAD/hooks/custom_hooks.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.hooks.hook import HOOKS, Hook 2 | from projects.mmdet3d_plugin.models.utils import run_time 3 | from mmcv.parallel import is_module_wrapper 4 | 5 | 6 | @HOOKS.register_module() 7 | class TransferWeight(Hook): 8 | 9 | def __init__(self, every_n_inters=1): 10 | self.every_n_inters=every_n_inters 11 | 12 | def after_train_iter(self, runner): 13 | if self.every_n_inner_iters(runner, self.every_n_inters): 14 | runner.eval_model.load_state_dict(runner.model.state_dict()) 15 | 16 | @HOOKS.register_module() 17 | class CustomSetEpochInfoHook(Hook): 18 | """Set runner's epoch information to the model.""" 19 | 20 | def before_train_epoch(self, runner): 21 | epoch = runner.epoch 22 | model = runner.model 23 | if is_module_wrapper(model): 24 | model = model.module 25 | model.set_epoch(epoch) 26 | 27 | -------------------------------------------------------------------------------- /CoordinateSystem.md: -------------------------------------------------------------------------------- 1 | Camera coordinate system – the coordinate system of most cameras, in which the positive direction of the y-axis points to the ground, the positive direction of the x-axis points to the right, and the positive direction of the z-axis points to the front. 2 | 3 | up z front 4 | | ^ 5 | | / 6 | | / 7 | | / 8 | |/ 9 | left ------ 0 ------> x right 10 | | 11 | | 12 | | 13 | | 14 | v 15 | y down 16 | 17 | 18 | LiDAR coordinate system – the coordinate system of many LiDARs, in which the negative direction of the z-axis points to the ground, the positive direction of the x-axis points to the front, and the positive direction of the y-axis points to the left. 19 | z up x front 20 | ^ ^ 21 | | / 22 | | / 23 | | / 24 | |/ 25 | y left <------ 0 ------ right -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST 3 | 4 | 5 | @MATCH_COST.register_module() 6 | class BBox3DL1Cost(object): 7 | """BBox3DL1Cost. 8 | Args: 9 | weight (int | float, optional): loss_weight 10 | """ 11 | 12 | def __init__(self, weight=1.): 13 | self.weight = weight 14 | 15 | def __call__(self, bbox_pred, gt_bboxes): 16 | """ 17 | Args: 18 | bbox_pred (Tensor): Predicted boxes with normalized coordinates 19 | (cx, cy, w, h), which are all in range [0, 1]. Shape 20 | [num_query, 4]. 21 | gt_bboxes (Tensor): Ground truth boxes with normalized 22 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 23 | Returns: 24 | torch.Tensor: bbox_cost value with weight 25 | """ 26 | bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1) 27 | return bbox_cost * self.weight -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-80, -80, -5, 80, 80, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]), 15 | pts_middle_encoder=dict(output_shape=[640, 640]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-100, -100, -5, 100, 100, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]), 15 | pts_middle_encoder=dict(output_shape=[800, 800]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /train_stage_2.sh: -------------------------------------------------------------------------------- 1 | 2 | # train with 8 card 3 | #**NOTE**: We release two types of training configs: the end-to-end configs and the two-stage (stage-1: Perception & Prediction; stage-2: Planning) configs. They should produce similar results. 4 | #The two-stage configs are recommended because you can just train the stage-1 model once and use it as a pre-train model for stage-2. 5 | #最好选择两个阶段的config,一个阶段一个阶段的训练,前面的可以作为pretrained model 6 | export CUDA_VISIBLE_DEVICES="0,1" # 在 多卡的情况下指定 7 | #python -m torch.distributed.run --nproc_per_node=3 --master_port=2734 tools/train.py projects/configs/FreeAD/FreeAD_base_map_fine_tune.py --launcher pytorch --deterministic --work-dir path/to/save/outputs 8 | 9 | python -m torch.distributed.run --nproc_per_node=2 --master_port=2734 tools/train.py projects/configs/FreeAD/FreeAD_base_stage_2.py --launcher pytorch --deterministic --work-dir path/to/save/outputs/FreeeAskWorlds1e3s2e1_FreeWorld_s1e1s2e1 10 | 11 | # train with 1 card 12 | #python tools/train.py projects/configs/VAD/FreeAD_base.py --deterministic --work-dir path/to/save/outputs 13 | 14 | 15 | # test 16 | #python tools1/check_train_data.py projects/configs/VAD/VAD_base_issac.py --deterministic --work-dir path/to/save/outputs -------------------------------------------------------------------------------- /train_stage_1.sh: -------------------------------------------------------------------------------- 1 | 2 | # train with 8 card 3 | #**NOTE**: We release two types of training configs: the end-to-end configs and the two-stage (stage-1: Perception & Prediction; stage-2: Planning) configs. They should produce similar results. 4 | #The two-stage configs are recommended because you can just train the stage-1 model once and use it as a pre-train model for stage-2. 5 | #最好选择两个阶段的config,一个阶段一个阶段的训练,前面的可以作为pretrained model 6 | export CUDA_VISIBLE_DEVICES="0,1" # 在 多卡的情况下指定 7 | #python -m torch.distributed.run --nproc_per_node=3 --master_port=2734 tools/train.py projects/configs/FreeAD/FreeAD_base_map_fine_tune.py --launcher pytorch --deterministic --work-dir path/to/save/outputs 8 | 9 | python -m torch.distributed.run --nproc_per_node=2 --master_port=2734 tools/train.py projects/configs/FreeAD/FreeAD_base_stage_1.py --launcher pytorch --deterministic --work-dir path/to/save/outputs/FreeWorld_s1 10 | 11 | # train with 1 card 12 | # python tools/train.py projects/configs/FreeAD/FreeAD_base_stage_1.py --deterministic --work-dir path/to/save/outputs/FreeAskWorld_s1e3s2e1_FreeWorld_s1 13 | 14 | 15 | # test 16 | #python tools1/check_train_data.py projects/configs/VAD/VAD_base_issac.py --deterministic --work-dir path/to/save/outputs -------------------------------------------------------------------------------- /tools/analysis_tools/serialize_bev_results.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import torch 4 | 5 | # 自定义 JSONEncoder 以处理所有可能的 numpy 和 Tensor 数据类型 6 | class NumpyEncoder(json.JSONEncoder): 7 | def default(self, obj): 8 | # 处理 ndarray 类型 9 | if isinstance(obj, np.ndarray): 10 | return obj.tolist() # 将 NumPy 数组转换为列表 11 | 12 | # 处理 numpy 的各种数值类型 13 | if isinstance(obj, (np.int64, np.int32, np.int16, np.int8)): 14 | return int(obj) # 将 numpy 的整数类型转换为普通的 Python int 15 | if isinstance(obj, (np.float64, np.float32, np.float16)): 16 | return float(obj) # 将 numpy 的浮动类型转换为 Python float 17 | 18 | # 处理 Tensor 类型(假设你使用的是 PyTorch) 19 | if isinstance(obj, torch.Tensor): 20 | return obj.cpu().numpy().tolist() # 将 PyTorch tensor 转换为 numpy 数组再转换为列表 21 | 22 | # 如果是 TensorFlow 的 Tensor 23 | try: 24 | import tensorflow as tf 25 | if isinstance(obj, tf.Tensor): 26 | return obj.numpy().tolist() # 将 TensorFlow tensor 转换为 numpy 数组再转换为列表 27 | except ImportError: 28 | pass # 如果没有安装 tensorflow,可以忽略 29 | 30 | return super().default(obj) # 调用父类方法处理其他对象 -------------------------------------------------------------------------------- /tools/model_converters/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | import torch 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='Process a checkpoint to be published') 10 | parser.add_argument('in_file', help='input checkpoint filename') 11 | parser.add_argument('out_file', help='output checkpoint filename') 12 | args = parser.parse_args() 13 | return args 14 | 15 | 16 | def process_checkpoint(in_file, out_file): 17 | checkpoint = torch.load(in_file, map_location='cpu') 18 | # remove optimizer for smaller file size 19 | if 'optimizer' in checkpoint: 20 | del checkpoint['optimizer'] 21 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 22 | # add the code here. 23 | torch.save(checkpoint, out_file) 24 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 25 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 26 | subprocess.Popen(['mv', out_file, final_file]) 27 | 28 | 29 | def main(): 30 | args = parse_args() 31 | process_checkpoint(args.in_file, args.out_file) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/pointnet2_msg.py: -------------------------------------------------------------------------------- 1 | _base_ = './pointnet2_ssg.py' 2 | 3 | # model settings 4 | model = dict( 5 | backbone=dict( 6 | _delete_=True, 7 | type='PointNet2SAMSG', 8 | in_channels=6, # [xyz, rgb], should be modified with dataset 9 | num_points=(1024, 256, 64, 16), 10 | radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)), 11 | num_samples=((16, 32), (16, 32), (16, 32), (16, 32)), 12 | sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96, 13 | 128)), 14 | ((128, 196, 256), (128, 196, 256)), ((256, 256, 512), 15 | (256, 384, 512))), 16 | aggregation_channels=(None, None, None, None), 17 | fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')), 18 | fps_sample_range_lists=((-1), (-1), (-1), (-1)), 19 | dilated_group=(False, False, False, False), 20 | out_indices=(0, 1, 2, 3), 21 | sa_cfg=dict( 22 | type='PointSAModuleMSG', 23 | pool_mod='max', 24 | use_xyz=True, 25 | normalize_xyz=False)), 26 | decode_head=dict( 27 | fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128), 28 | (128, 128, 128, 128)))) 29 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/pointnet2_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=6, # [xyz, rgb], should be modified with dataset 7 | num_points=(1024, 256, 64, 16), 8 | radius=(0.1, 0.2, 0.4, 0.8), 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d'), 14 | sa_cfg=dict( 15 | type='PointSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False)), 19 | decode_head=dict( 20 | type='PointNet2Head', 21 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 22 | (128, 128, 128, 128)), 23 | channels=128, 24 | dropout_ratio=0.5, 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | act_cfg=dict(type='ReLU'), 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | class_weight=None, # should be modified with dataset 32 | loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict(), 35 | test_cfg=dict(mode='slide')) 36 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | 2 | # 推理使用eval 3 | #CUDA_VISIBLE_DEVICES=1 python tools/test.py projects/configs/FreeAD/FreeAD_base.py path/to/save/outputs/VAD_base.pth --launcher none --eval bbox --tmpdir tmp 4 | #CUDA_VISIBLE_DEVICES=0 python tools/test.py projects/configs/FreeAD/FreeAD_base.py path/to/save/outputs/epoch_48.pth --launcher none --eval bbox --tmpdir tmp 5 | #CUDA_VISIBLE_DEVICES=0 python tools/test.py projects/configs/FreeAD/FreeAD_base.py ckpts/3dbox数据修复_FreeAD_s1e6_s2e3.pth --launcher none --eval bbox --tmpdir tmp 6 | CUDA_VISIBLE_DEVICES=0 python tools/test.py projects/configs/FreeAD/FreeAskWorld.py ckpts/VAD_FreeAskWorlds1e3s2e1.pth --launcher none --eval bbox --tmpdir tmp 7 | #CUDA_VISIBLE_DEVICES=0 python tools/test.py projects/configs/FreeAD/FreeAD_base.py ckpts/VAD_base.pth --launcher none --eval bbox --tmpdir tmp 8 | #CUDA_VISIBLE_DEVICES=0 python tools/test.py projects/configs/FreeAD/VAD_tiny_e2e.py ckpts/VAD_tiny.pth --launcher none --eval bbox --tmpdir tmp 9 | 10 | # 推理我自己生成pkl文件,直接out不eval,表示直接获取bbox等信息,out代表输出原始(没有eval)推理数据的,format_only代表不eval输出vis的格式数据 11 | ##CUDA_VISIBLE_DEVICES=0 python tools/test.py projects/configs/FreeAD/FreeAD_base.py path/to/save/outputs/epoch_36.pth --launcher none --tmpdir tmp --out path/to/results/my_data_results_no_eval.pkl --format-only 12 | 13 | 14 | #CUDA_VISIBLE_DEVICES=0 python tools/test.py projects/configs/FreeAD/FreeAD_base.py ckpts/VAD_base.pth --launcher none --tmpdir tmp --out path/to/results/my_data_results_no_eval.pkl --format-onlys -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.utils.data import DistributedSampler as _DistributedSampler 5 | from .sampler import SAMPLER 6 | 7 | 8 | @SAMPLER.register_module() 9 | class DistributedSampler(_DistributedSampler): 10 | 11 | def __init__(self, 12 | dataset=None, 13 | num_replicas=None, 14 | rank=None, 15 | shuffle=True, 16 | seed=0): 17 | super().__init__( 18 | dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) 19 | # for the compatibility from PyTorch 1.3+ 20 | self.seed = seed if seed is not None else 0 21 | 22 | def __iter__(self): 23 | # deterministically shuffle based on epoch 24 | if self.shuffle: 25 | assert False 26 | else: 27 | indices = torch.arange(len(self.dataset)).tolist() 28 | 29 | # add extra samples to make it evenly divisible 30 | # in case that indices is shorter than half of total_size 31 | indices = (indices * 32 | math.ceil(self.total_size / len(indices)))[:self.total_size] 33 | assert len(indices) == self.total_size 34 | 35 | # subsample 36 | per_replicas = self.total_size//self.num_replicas 37 | # indices = indices[self.rank:self.total_size:self.num_replicas] 38 | indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas] 39 | assert len(indices) == self.num_samples 40 | 41 | return iter(indices) 42 | -------------------------------------------------------------------------------- /docs/train_eval.md: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | 3 | **Please ensure you have prepared the environment and the nuScenes dataset.** 4 | 5 | # Train and Test 6 | 7 | ## Train VAD with 8 GPUs 8 | Be careful to set the params in config files in /projects/configs/FreeAD, such as epochs 9 | ```shell 10 | cd /path/to/FreeAD 11 | conda activate freead 12 | sh train_stage_1.sh 13 | sh train_stage_2.sh 14 | ``` 15 | 16 | **NOTE**: We release two types of training configs: the end-to-end configs and the two-stage (stage-1: Perception & Prediction; stage-2: Planning) configs. They should produce similar results. The two-stage configs are recommended because you can just train the stage-1 model once and use it as a pre-train model for stage-2. 17 | 18 | ## Eval VAD with 1 GPU 19 | ```shell 20 | cd /path/to/FreeAD 21 | conda activate freead 22 | sh test.sh 23 | ``` 24 | 25 | **NOTE**: Using distributed mode (multi GPUs) for evaluation will lead to inaccurate results, so make sure to use non-distributed mode (1 GPU) for evaluation. 26 | 27 | ## Reproduce results with pre-trained weights 28 | If you want to reproduce results with pre-trained weights, please change the `img_norm_cfg` setting in your config file to following: 29 | 30 | ``` 31 | img_norm_cfg = dict( 32 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 33 | ``` 34 | 35 | this is the original setting when we train the model, but we have update it in the recent commit according to this [issue](https://github.com/hustvl/VAD/issues/9)'s advice. If you use the new `img_norm_cfg` config, you will get wrong metric results and visualizations. -------------------------------------------------------------------------------- /tools/misc/visualize_results.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import mmcv 4 | from mmcv import Config 5 | 6 | from mmdet3d.datasets import build_dataset 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser( 11 | description='MMDet3D visualize the results') 12 | parser.add_argument('config', help='test config file path') 13 | parser.add_argument('--result', help='results file in pickle format') 14 | parser.add_argument( 15 | '--show-dir', help='directory where visualize results will be saved') 16 | args = parser.parse_args() 17 | 18 | return args 19 | 20 | 21 | def main(): 22 | args = parse_args() 23 | 24 | if args.result is not None and \ 25 | not args.result.endswith(('.pkl', '.pickle')): 26 | raise ValueError('The results file must be a pkl file.') 27 | 28 | cfg = Config.fromfile(args.config) 29 | cfg.data.test.test_mode = True 30 | 31 | # build the dataset 32 | dataset = build_dataset(cfg.data.test) 33 | results = mmcv.load(args.result) 34 | 35 | if getattr(dataset, 'show', None) is not None: 36 | # data loading pipeline for showing 37 | eval_pipeline = cfg.get('eval_pipeline', {}) 38 | if eval_pipeline: 39 | dataset.show(results, args.show_dir, pipeline=eval_pipeline) 40 | else: 41 | dataset.show(results, args.show_dir) # use default pipeline 42 | else: 43 | raise NotImplementedError( 44 | 'Show is not implemented for dataset {}!'.format( 45 | type(dataset).__name__)) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /FreeWorld/tools/map2img.py: -------------------------------------------------------------------------------- 1 | # Init nuScenes. Requires the dataset to be stored on disk. 2 | from nuscenes.nuscenes import NuScenes 3 | from FreeWorld.map_expansion.map_api import FreeWorldMap # export PYTHONPATH=$PYTHONPATH:$(pwd) 4 | 5 | import matplotlib.pyplot as plt 6 | import tqdm 7 | import numpy as np 8 | 9 | from nuscenes.map_expansion import arcline_path_utils 10 | from nuscenes.map_expansion.bitmap import BitMap 11 | 12 | FreeWorld_maps = { 13 | #'office_issac': (30, 87.0), # 名称 'office_issac' 对应的地图尺寸 (width, height) 14 | 15 | 'full_warehouse': (36.910000000000004, 74.99), 16 | 'AIR_F1': (93.45, 47.300000000000004), 17 | 'AIR_F11': (52.800000000000004, 58.400000000000006), 18 | 'AIR_B1': (33.35, 48.2), 19 | 'AIR_G': (34.6, 32.15) 20 | } 21 | 22 | data_root = 'data/FreeWorld' 23 | map_name = "AIR_B1" 24 | render_out_path = "map2img" + "_" + map_name + ".png" 25 | nusc = NuScenes(dataroot=data_root, version='v1.0-trainval', verbose=False) 26 | 27 | nusc_map = FreeWorldMap(dataroot=data_root, map_name=map_name, map_dim=FreeWorld_maps[map_name]) 28 | 29 | # render the map on front camera image. 30 | sample_token = nusc.sample[9]['token'] 31 | layer_names = ['road_segment', 'lane', 'ped_crossing', 'walkway', 'stop_line', 'carpark_area'] # 只能渲染polygon 32 | camera_channel = 'CAM_FRONT' 33 | nusc_map.render_map_in_image(nusc, sample_token, layer_names=layer_names, camera_channel=camera_channel, out_path=render_out_path) 34 | 35 | 36 | # Render ego poses. 37 | # nusc_map_bos = NuScenesMap(dataroot='/Users/lau/data_sets/nuscenes', map_name='boston-seaport') 38 | # ego_poses = nusc_map_bos.render_egoposes_on_fancy_map(nusc, scene_tokens=[nusc.scene[1]['token']], verbose=False) -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/VAD/utils/map_utils.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox.transforms import bbox_xyxy_to_cxcywh, bbox_cxcywh_to_xyxy 2 | 3 | def normalize_2d_bbox(bboxes, pc_range): 4 | 5 | patch_h = pc_range[4]-pc_range[1] 6 | patch_w = pc_range[3]-pc_range[0] 7 | cxcywh_bboxes = bbox_xyxy_to_cxcywh(bboxes) 8 | cxcywh_bboxes[...,0:1] = cxcywh_bboxes[..., 0:1] - pc_range[0] 9 | cxcywh_bboxes[...,1:2] = cxcywh_bboxes[...,1:2] - pc_range[1] 10 | factor = bboxes.new_tensor([patch_w, patch_h,patch_w,patch_h]) 11 | 12 | normalized_bboxes = cxcywh_bboxes / factor 13 | return normalized_bboxes 14 | 15 | def normalize_2d_pts(pts, pc_range): 16 | patch_h = pc_range[4]-pc_range[1] 17 | patch_w = pc_range[3]-pc_range[0] 18 | new_pts = pts.clone() 19 | new_pts[...,0:1] = pts[..., 0:1] - pc_range[0] 20 | new_pts[...,1:2] = pts[...,1:2] - pc_range[1] 21 | factor = pts.new_tensor([patch_w, patch_h]) 22 | normalized_pts = new_pts / factor 23 | return normalized_pts 24 | 25 | def denormalize_2d_bbox(bboxes, pc_range): 26 | 27 | bboxes = bbox_cxcywh_to_xyxy(bboxes) 28 | bboxes[..., 0::2] = (bboxes[..., 0::2]*(pc_range[3] - 29 | pc_range[0]) + pc_range[0]) 30 | bboxes[..., 1::2] = (bboxes[..., 1::2]*(pc_range[4] - 31 | pc_range[1]) + pc_range[1]) 32 | 33 | return bboxes 34 | 35 | def denormalize_2d_pts(pts, pc_range): 36 | new_pts = pts.clone() 37 | new_pts[...,0:1] = (pts[..., 0:1]*(pc_range[3] - 38 | pc_range[0]) + pc_range[0]) 39 | new_pts[...,1:2] = (pts[...,1:2]*(pc_range[4] - 40 | pc_range[1]) + pc_range[1]) 41 | return new_pts -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def normalize_bbox(bboxes, pc_range): 5 | 6 | cx = bboxes[..., 0:1] 7 | cy = bboxes[..., 1:2] 8 | cz = bboxes[..., 2:3] 9 | w = bboxes[..., 3:4].log() 10 | l = bboxes[..., 4:5].log() 11 | h = bboxes[..., 5:6].log() 12 | 13 | rot = bboxes[..., 6:7] 14 | if bboxes.size(-1) > 7: 15 | vx = bboxes[..., 7:8] 16 | vy = bboxes[..., 8:9] 17 | normalized_bboxes = torch.cat( 18 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1 19 | ) 20 | else: 21 | normalized_bboxes = torch.cat( 22 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1 23 | ) 24 | return normalized_bboxes 25 | 26 | def denormalize_bbox(normalized_bboxes, pc_range): 27 | # rotation 28 | rot_sine = normalized_bboxes[..., 6:7] 29 | 30 | rot_cosine = normalized_bboxes[..., 7:8] 31 | rot = torch.atan2(rot_sine, rot_cosine) 32 | 33 | # center in the bev 34 | cx = normalized_bboxes[..., 0:1] 35 | cy = normalized_bboxes[..., 1:2] 36 | cz = normalized_bboxes[..., 4:5] 37 | 38 | # size 39 | w = normalized_bboxes[..., 2:3] 40 | l = normalized_bboxes[..., 3:4] 41 | h = normalized_bboxes[..., 5:6] 42 | 43 | w = w.exp() 44 | l = l.exp() 45 | h = h.exp() 46 | if normalized_bboxes.size(-1) > 8: 47 | # velocity 48 | vx = normalized_bboxes[:, 8:9] 49 | vy = normalized_bboxes[:, 9:10] 50 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1) 51 | else: 52 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1) 53 | return denormalized_bboxes -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cyclic_40e.py: -------------------------------------------------------------------------------- 1 | # The schedule is usually used by models trained on KITTI dataset 2 | 3 | # The learning rate set in the cyclic schedule is the initial learning rate 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4), 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4 6 | lr = 0.0018 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use 8 | # the offcial AdamW optimizer implemented by PyTorch. 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) 10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch 12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa 13 | # We implement them in mmcv, for more details, please refer to 14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa 15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa 16 | lr_config = dict( 17 | policy='cyclic', 18 | target_ratio=(10, 1e-4), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | momentum_config = dict( 23 | policy='cyclic', 24 | target_ratio=(0.85 / 0.95, 1), 25 | cyclic_times=1, 26 | step_ratio_up=0.4, 27 | ) 28 | # Although the max_epochs is 40, this schedule is usually used we 29 | # RepeatDataset with repeat ratio N, thus the actual max epoch 30 | # number could be Nx40 31 | runner = dict(type='EpochBasedRunner', max_epochs=40) 32 | -------------------------------------------------------------------------------- /FreeWorld/tools/ego2img.py: -------------------------------------------------------------------------------- 1 | from nuscenes.nuscenes import NuScenes 2 | from FreeWorld.map_expansion.map_api import FreeWorldMap # export PYTHONPATH=$PYTHONPATH:$(pwd) 3 | 4 | import matplotlib.pyplot as plt 5 | import tqdm 6 | import numpy as np 7 | 8 | from nuscenes.map_expansion import arcline_path_utils 9 | from nuscenes.map_expansion.bitmap import BitMap 10 | 11 | 12 | FreeWorld_maps = { 13 | #'office_issac': (30, 87.0), # 名称 'office_issac' 对应的地图尺寸 (width, height) 14 | 15 | 'full_warehouse': (36.910000000000004, 74.99), 16 | 'AIR_F1': (93.45, 47.300000000000004), 17 | 'AIR_F11': (52.800000000000004, 58.400000000000006), 18 | 'AIR_B1': (33.35, 48.2), 19 | 'AIR_G': (34.6, 32.15) 20 | } 21 | 22 | data_root = '/home/tsinghuaair/pengyh/FreeAD/my_project/FreeAD/data/FreeWorld' 23 | map_name = "AIR_F11" 24 | render_out_path = "ego2img" + "_" + map_name + ".png" 25 | nusc = NuScenes(dataroot=data_root, version='v1.0-trainval', verbose=False) 26 | nusc_map_bos = FreeWorldMap(dataroot=data_root, map_name=map_name, map_dim=FreeWorld_maps[map_name]) 27 | 28 | scene_tokens = [ 29 | scene['token'] for scene in nusc.scene 30 | if nusc.get('log', scene['log_token'])['location'] == map_name 31 | ] 32 | 33 | input_scene_tokens = scene_tokens 34 | # input_scene_tokens = scene_tokens[0] # only vis one scene 35 | 36 | if not input_scene_tokens: 37 | print(f"No scenes found for location: {map_name}") 38 | else: 39 | print(f"Found {len(scene_tokens)} scenes for location: {map_name}") 40 | 41 | # 渲染 ego 位置并保存图像 42 | ego_poses = nusc_map_bos.render_egoposes_on_fancy_map( 43 | nusc, 44 | scene_tokens=input_scene_tokens, 45 | verbose=False, 46 | out_path=render_out_path, 47 | render_egoposes=False, 48 | render_egoposes_range=False, 49 | render_legend=False 50 | ) 51 | -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | # Step-by-step installation instructions 2 | 3 | Following https://mmdetection3d.readthedocs.io/en/latest/getting_started.html#installation. 4 | 5 | Detailed package versions can be found in [requirements.txt](../requirements.txt). 6 | 7 | 8 | 9 | **a. Create a conda virtual environment and activate it.** 10 | ```shell 11 | conda create -n freead python=3.8 -y 12 | conda activate freead 13 | ``` 14 | 15 | **b. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/).** 16 | ```shell 17 | pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html 18 | # Recommended torch>=1.9 19 | ``` 20 | 21 | **c. Install gcc>=5 in conda env (optional).** 22 | ```shell 23 | conda install -c omgarcia gcc-5 # gcc-6.2 24 | ``` 25 | 26 | **c. Install mmcv-full.** 27 | ```shell 28 | pip install mmcv-full==1.4.0 29 | # pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html 30 | ``` 31 | 32 | **d. Install mmdet and mmseg.** 33 | ```shell 34 | pip install mmdet==2.14.0 35 | pip install mmsegmentation==0.14.1 36 | ``` 37 | 38 | **e. Install timm.** 39 | ```shell 40 | pip install timm 41 | ``` 42 | 43 | **f. Install mmdet3d.** 44 | ```shell 45 | conda activate freead 46 | git clone https://github.com/open-mmlab/mmdetection3d.git 47 | cd /path/to/mmdetection3d 48 | git checkout -f v0.17.1 49 | python setup.py develop 50 | ``` 51 | 52 | **g. Install nuscenes-devkit.** 53 | ```shell 54 | pip install nuscenes-devkit==1.1.9 55 | ``` 56 | 57 | **h. Clone freead.** 58 | ```shell 59 | git clone https://github.com/hustvl/freead.git 60 | ``` 61 | 62 | **i. Prepare pretrained models.** 63 | ```shell 64 | cd /path/to/freead 65 | mkdir ckpts 66 | cd ckpts 67 | wget https://download.pytorch.org/models/resnet50-19c8e357.pth 68 | ``` 69 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 9 | dict(type='RandomFlip', flip_ratio=0.5), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='Pad', size_divisor=32), 12 | dict(type='DefaultFormatBundle'), 13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 14 | ] 15 | test_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict( 18 | type='MultiScaleFlipAug', 19 | img_scale=(1333, 800), 20 | flip=False, 21 | transforms=[ 22 | dict(type='Resize', keep_ratio=True), 23 | dict(type='RandomFlip'), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='Pad', size_divisor=32), 26 | dict(type='ImageToTensor', keys=['img']), 27 | dict(type='Collect', keys=['img']), 28 | ]) 29 | ] 30 | data = dict( 31 | samples_per_gpu=2, 32 | workers_per_gpu=2, 33 | train=dict( 34 | type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_val2017.json', 41 | img_prefix=data_root + 'val2017/', 42 | pipeline=test_pipeline), 43 | test=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline)) 48 | evaluation = dict(metric=['bbox', 'segm']) 49 | -------------------------------------------------------------------------------- /docs/prepare_dataset.md: -------------------------------------------------------------------------------- 1 | ## FreeWorld 2 | Download FreeWorld v1.0 full dataset data and CAN bus expansion data [HERE](https://huggingface.co/datasets/doraemon6666/FreeWorld). Freeworld need not additional operation to handle the format, you can use it with Freeworld and nuScene's API at onece. 3 | 4 | ## NuScenes 5 | Download nuScenes V1.0 full dataset data and CAN bus expansion data [HERE](https://www.nuscenes.org/download). Prepare nuscenes data as follows. 6 | 7 | **Download CAN bus expansion** 8 | ``` 9 | # download 'can_bus.zip' 10 | unzip can_bus.zip 11 | # move can_bus to data dir 12 | ``` 13 | 14 | **Prepare nuScenes data** 15 | 16 | *We genetate custom annotation files which are different from mmdet3d's* 17 | 18 | Directly download the [train](https://drive.google.com/file/d/1OVd6Rw2wYjT_ylihCixzF6_olrAQsctx/view?usp=sharing) file and [val](https://drive.google.com/file/d/16DZeA-iepMCaeyi57XSXL3vYyhrOQI9S/view?usp=sharing) file from google drive, or generate by yourself: 19 | ``` 20 | python tools/data_converter/vad_nuscenes_converter.py nuscenes --root-path ./data/nuscenes --out-dir ./data/nuscenes --extra-tag vad_nuscenes --version v1.0 --canbus ./data 21 | ``` 22 | 23 | Using the above code will generate `vad_nuscenes_infos_temporal_{train,val}.pkl`. 24 | 25 | **Folder structure** 26 | ``` 27 | FreeAD 28 | ├── projects/ 29 | ├── tools/ 30 | ├── configs/ 31 | ├── ckpts/ 32 | │ ├── resnet50-19c8e357.pth 33 | ├── data/ 34 | │ ├── can_bus/ 35 | │ ├── nuscenes/ 36 | │ │ ├── maps/ 37 | │ │ ├── samples/ 38 | │ │ ├── sweeps/ 39 | │ │ ├── v1.0-test/ 40 | | | ├── v1.0-trainval/ 41 | | | ├── vad_nuscenes_infos_temporal_train.pkl 42 | | | ├── vad_nuscenes_infos_temporal_val.pkl 43 | ``` 44 | 45 | 46 | ## Prepare dataset pkl mannually 47 | Be careful to set the params in prepare_data.sh 48 | ```shell 49 | cd /path/to/FreeAD/ 50 | conda activate freead 51 | sh prepare_data.sh 52 | ``` -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/VAD/apis/train.py: -------------------------------------------------------------------------------- 1 | from .mmdet_train import custom_train_detector 2 | from mmseg.apis import train_segmentor 3 | from mmdet.apis import train_detector 4 | 5 | def custom_train_model(model, 6 | dataset, 7 | cfg, 8 | distributed=False, 9 | validate=False, 10 | timestamp=None, 11 | eval_model=None, 12 | meta=None): 13 | """A function wrapper for launching model training according to cfg. 14 | 15 | Because we need different eval_hook in runner. Should be deprecated in the 16 | future. 17 | """ 18 | if cfg.model.type in ['EncoderDecoder3D']: 19 | assert False 20 | else: 21 | custom_train_detector( 22 | model, 23 | dataset, 24 | cfg, 25 | distributed=distributed, 26 | validate=validate, 27 | timestamp=timestamp, 28 | eval_model=eval_model, 29 | meta=meta) 30 | 31 | 32 | def train_model(model, 33 | dataset, 34 | cfg, 35 | distributed=False, 36 | validate=False, 37 | timestamp=None, 38 | meta=None): 39 | """A function wrapper for launching model training according to cfg. 40 | 41 | Because we need different eval_hook in runner. Should be deprecated in the 42 | future. 43 | """ 44 | if cfg.model.type in ['EncoderDecoder3D']: 45 | train_segmentor( 46 | model, 47 | dataset, 48 | cfg, 49 | distributed=distributed, 50 | validate=validate, 51 | timestamp=timestamp, 52 | meta=meta) 53 | else: 54 | train_detector( 55 | model, 56 | dataset, 57 | cfg, 58 | distributed=distributed, 59 | validate=validate, 60 | timestamp=timestamp, 61 | meta=meta) 62 | -------------------------------------------------------------------------------- /FreeWorld/tools/split_dataset.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | 4 | def read_names_from_json(file_path): 5 | # 打开并读取 JSON 文件 6 | with open(file_path, 'r') as file: 7 | data = json.load(file) # 加载 JSON 数据 8 | 9 | # 提取所有的 name 字段 10 | names = [entry['name'] for entry in data] 11 | 12 | return names 13 | 14 | def split_dataset(names, train_ratio=0.7, val_ratio=0.3, test_ratio=0): 15 | # 确保总比例是1 16 | assert train_ratio + val_ratio + test_ratio == 1, "Train, val and test ratios must sum to 1" 17 | 18 | # 打乱数据集 19 | random.shuffle(names) 20 | 21 | # 计算分割的索引 22 | total_count = len(names) 23 | train_end = int(train_ratio * total_count) 24 | val_end = train_end + int(val_ratio * total_count) 25 | 26 | # 划分数据集 27 | train_set = names[:train_end] 28 | val_set = names[train_end:val_end] 29 | test_set = names[val_end:] 30 | 31 | return train_set, val_set, test_set 32 | 33 | def map_names_to_split(names, train_set, val_set, test_set): 34 | # 为每个名字创建对应的映射 35 | name_to_split = {} 36 | for name in names: 37 | if name in train_set: 38 | name_to_split[name] = 'train' 39 | elif name in val_set: 40 | name_to_split[name] = 'val' 41 | else: 42 | name_to_split[name] = 'test' 43 | 44 | return name_to_split 45 | 46 | def save_mapping_to_file(mapping, file_path): 47 | # 将映射保存为 JSON 文件 48 | with open(file_path, 'w') as file: 49 | json.dump(mapping, file, indent=4) 50 | 51 | data_root = 'data/FreeWorld/' 52 | file_path = data_root + 'v1.0-trainval/' + 'scene.json' # 读取所有场景 53 | names = read_names_from_json(file_path) 54 | 55 | # 划分数据集 56 | train_set, val_set, test_set = split_dataset(names) 57 | 58 | # 创建名称到数据集的映射 59 | name_to_split = map_names_to_split(names, train_set, val_set, test_set) 60 | 61 | # 保存映射到文件 62 | mapping_file_path = data_root + 'FreeWorldDataSetSplitMapping.json' # 保存映射的文件路径 63 | save_mapping_to_file(name_to_split, mapping_file_path) 64 | 65 | print(f"Name to split mapping has been saved to {mapping_file_path}") 66 | 67 | # 打印映射前几个数据集 68 | print("Name to Split Mapping (first 5):", {k: name_to_split[k] for k in list(name_to_split)[:5]}) 69 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/paconv_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=9, # [xyz, rgb, normalized_xyz] 7 | num_points=(1024, 256, 64, 16), 8 | radius=(None, None, None, None), # use kNN instead of ball query 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d', momentum=0.1), 14 | sa_cfg=dict( 15 | type='PAConvSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False, 19 | paconv_num_kernels=[16, 16, 16], 20 | paconv_kernel_input='w_neighbor', 21 | scorenet_input='w_neighbor_dist', 22 | scorenet_cfg=dict( 23 | mlp_channels=[16, 16, 16], 24 | score_norm='softmax', 25 | temp_factor=1.0, 26 | last_bn=False))), 27 | decode_head=dict( 28 | type='PAConvHead', 29 | # PAConv model's decoder takes skip connections from beckbone 30 | # different from PointNet++, it also concats input features in the last 31 | # level of decoder, leading to `128 + 6` as the channel number 32 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 33 | (128 + 6, 128, 128, 128)), 34 | channels=128, 35 | dropout_ratio=0.5, 36 | conv_cfg=dict(type='Conv1d'), 37 | norm_cfg=dict(type='BN1d'), 38 | act_cfg=dict(type='ReLU'), 39 | loss_decode=dict( 40 | type='CrossEntropyLoss', 41 | use_sigmoid=False, 42 | class_weight=None, # should be modified with dataset 43 | loss_weight=1.0)), 44 | # correlation loss to regularize PAConv's kernel weights 45 | loss_regularization=dict( 46 | type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='slide')) 50 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/train.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------- 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | # --------------------------------------------- 4 | # Modified by Zhiqi Li 5 | # --------------------------------------------- 6 | 7 | from .mmdet_train import custom_train_detector 8 | from mmseg.apis import train_segmentor 9 | from mmdet.apis import train_detector 10 | 11 | def custom_train_model(model, 12 | dataset, 13 | cfg, 14 | distributed=False, 15 | validate=False, 16 | timestamp=None, 17 | eval_model=None, 18 | meta=None): 19 | """A function wrapper for launching model training according to cfg. 20 | 21 | Because we need different eval_hook in runner. Should be deprecated in the 22 | future. 23 | """ 24 | if cfg.model.type in ['EncoderDecoder3D']: 25 | assert False 26 | else: 27 | custom_train_detector( 28 | model, 29 | dataset, 30 | cfg, 31 | distributed=distributed, 32 | validate=validate, 33 | timestamp=timestamp, 34 | eval_model=eval_model, 35 | meta=meta) 36 | 37 | 38 | def train_model(model, 39 | dataset, 40 | cfg, 41 | distributed=False, 42 | validate=False, 43 | timestamp=None, 44 | meta=None): 45 | """A function wrapper for launching model training according to cfg. 46 | 47 | Because we need different eval_hook in runner. Should be deprecated in the 48 | future. 49 | """ 50 | if cfg.model.type in ['EncoderDecoder3D']: 51 | train_segmentor( 52 | model, 53 | dataset, 54 | cfg, 55 | distributed=distributed, 56 | validate=validate, 57 | timestamp=timestamp, 58 | meta=meta) 59 | else: 60 | train_detector( 61 | model, 62 | dataset, 63 | cfg, 64 | distributed=distributed, 65 | validate=validate, 66 | timestamp=timestamp, 67 | meta=meta) 68 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/nuim_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/nuimages/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | img_norm_cfg = dict( 8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 12 | dict( 13 | type='Resize', 14 | img_scale=[(1280, 720), (1920, 1080)], 15 | multiscale_mode='range', 16 | keep_ratio=True), 17 | dict(type='RandomFlip', flip_ratio=0.5), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 22 | ] 23 | test_pipeline = [ 24 | dict(type='LoadImageFromFile'), 25 | dict( 26 | type='MultiScaleFlipAug', 27 | img_scale=(1600, 900), 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='Pad', size_divisor=32), 34 | dict(type='ImageToTensor', keys=['img']), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | data = dict( 39 | samples_per_gpu=2, 40 | workers_per_gpu=2, 41 | train=dict( 42 | type=dataset_type, 43 | ann_file=data_root + 'annotations/nuimages_v1.0-train.json', 44 | img_prefix=data_root, 45 | classes=class_names, 46 | pipeline=train_pipeline), 47 | val=dict( 48 | type=dataset_type, 49 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 50 | img_prefix=data_root, 51 | classes=class_names, 52 | pipeline=test_pipeline), 53 | test=dict( 54 | type=dataset_type, 55 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 56 | img_prefix=data_root, 57 | classes=class_names, 58 | pipeline=test_pipeline)) 59 | evaluation = dict(metric=['bbox', 'segm']) 60 | -------------------------------------------------------------------------------- /tools/misc/fuse_conv_bn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import torch 4 | from mmcv.runner import save_checkpoint 5 | from torch import nn as nn 6 | 7 | from mmdet.apis import init_model 8 | 9 | 10 | def fuse_conv_bn(conv, bn): 11 | """During inference, the functionary of batch norm layers is turned off but 12 | only the mean and var alone channels are used, which exposes the chance to 13 | fuse it with the preceding conv layers to save computations and simplify 14 | network structures.""" 15 | conv_w = conv.weight 16 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like( 17 | bn.running_mean) 18 | 19 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) 20 | conv.weight = nn.Parameter(conv_w * 21 | factor.reshape([conv.out_channels, 1, 1, 1])) 22 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) 23 | return conv 24 | 25 | 26 | def fuse_module(m): 27 | last_conv = None 28 | last_conv_name = None 29 | 30 | for name, child in m.named_children(): 31 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)): 32 | if last_conv is None: # only fuse BN that is after Conv 33 | continue 34 | fused_conv = fuse_conv_bn(last_conv, child) 35 | m._modules[last_conv_name] = fused_conv 36 | # To reduce changes, set BN as Identity instead of deleting it. 37 | m._modules[name] = nn.Identity() 38 | last_conv = None 39 | elif isinstance(child, nn.Conv2d): 40 | last_conv = child 41 | last_conv_name = name 42 | else: 43 | fuse_module(child) 44 | return m 45 | 46 | 47 | def parse_args(): 48 | parser = argparse.ArgumentParser( 49 | description='fuse Conv and BN layers in a model') 50 | parser.add_argument('config', help='config file path') 51 | parser.add_argument('checkpoint', help='checkpoint file path') 52 | parser.add_argument('out', help='output path of the converted model') 53 | args = parser.parse_args() 54 | return args 55 | 56 | 57 | def main(): 58 | args = parse_args() 59 | # build the model from a config file and a checkpoint file 60 | model = init_model(args.config, args.checkpoint) 61 | # fuse conv and bn layers of the model 62 | fused_model = fuse_module(model) 63 | save_checkpoint(fused_model, args.out) 64 | 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/evaluation/metric_motion.py: -------------------------------------------------------------------------------- 1 | # 2 | 3 | """This module evaluates the forecasted trajectories against the ground truth.""" 4 | 5 | import math 6 | from typing import Dict, List, Optional 7 | 8 | import numpy as np 9 | import torch 10 | 11 | LOW_PROB_THRESHOLD_FOR_METRICS = 0.05 12 | 13 | 14 | def get_ade(forecasted_trajectory: torch.Tensor, gt_trajectory: torch.Tensor) -> float: 15 | """Compute Average Displacement Error. 16 | Args: 17 | forecasted_trajectory: Predicted trajectory with shape [fut_ts, 2] 18 | gt_trajectory: Ground truth trajectory with shape [fut_ts, 2] 19 | Returns: 20 | ade: Average Displacement Error 21 | """ 22 | pred_len = forecasted_trajectory.shape[0] 23 | ade = float( 24 | sum( 25 | torch.sqrt( 26 | (forecasted_trajectory[i, 0] - gt_trajectory[i, 0]) ** 2 27 | + (forecasted_trajectory[i, 1] - gt_trajectory[i, 1]) ** 2 28 | ) 29 | for i in range(pred_len) 30 | ) 31 | / pred_len 32 | ) 33 | return ade 34 | 35 | def get_best_preds( 36 | forecasted_trajectory: torch.Tensor, 37 | gt_trajectory: torch.Tensor 38 | ) -> float: 39 | """Compute min Average Displacement Error. 40 | Args: 41 | forecasted_trajectory: Predicted trajectory with shape [k, fut_ts, 2] 42 | gt_trajectory: Ground truth trajectory with shape [fut_ts, 2] 43 | gt_fut_masks: Ground truth traj mask with shape (fut_ts) 44 | Returns: 45 | best_forecasted_trajectory: Predicted trajectory with shape [fut_ts, 2] 46 | """ 47 | 48 | # [k, fut_ts] 49 | dist = torch.linalg.norm(gt_trajectory[None] - forecasted_trajectory, dim=-1) 50 | dist = dist[..., -1] 51 | dist[torch.isnan(dist)] = 0 52 | min_mode_idx = torch.argmin(dist, dim=-1) 53 | 54 | return forecasted_trajectory[min_mode_idx] 55 | 56 | def get_fde(forecasted_trajectory: torch.Tensor, gt_trajectory: torch.Tensor) -> float: 57 | """Compute Final Displacement Error. 58 | Args: 59 | forecasted_trajectory: Predicted trajectory with shape [fut_ts, 2] 60 | gt_trajectory: Ground truth trajectory with shape [fut_ts, 2] 61 | Returns: 62 | fde: Final Displacement Error 63 | """ 64 | fde = float( 65 | torch.sqrt( 66 | (forecasted_trajectory[-1, 0] - gt_trajectory[-1, 0]) ** 2 67 | + (forecasted_trajectory[-1, 1] - gt_trajectory[-1, 1]) ** 2 68 | ) 69 | ) 70 | return fde 71 | -------------------------------------------------------------------------------- /FreeWorld/tools/fix_isaac_annotation_coord.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | from pyquaternion import Quaternion 4 | 5 | def rotate_quaternion_around_z(quat, angle_degrees): 6 | """ 7 | 将四元数绕Z轴旋转指定角度 8 | 9 | 参数: 10 | quat: 原始四元数,格式为[w, x, y, z] 11 | angle_degrees: 旋转角度(度),正值表示逆时针旋转,负值表示顺时针旋转 12 | 13 | 返回: 14 | new_quat: 旋转后的四元数,格式为[w, x, y, z] 15 | """ 16 | # 创建原始四元数 17 | original_quat = Quaternion(quat[0], quat[1], quat[2], quat[3]) 18 | 19 | # 创建绕Z轴旋转的四元数 20 | angle_rad = np.radians(angle_degrees) 21 | z_rotation = Quaternion(axis=[0, 0, 1], angle=angle_rad) 22 | 23 | # 应用旋转 24 | new_quat = z_rotation * original_quat 25 | 26 | return [new_quat.w, new_quat.x, new_quat.y, new_quat.z] 27 | 28 | def process_json_file(json_path, z_rotate_angle, is_xyzw_to_wxyz): 29 | """ 30 | 读取JSON文件,绕Z轴旋转每个对象中的rotation四元数,并保存结果 31 | 32 | 参数: 33 | json_path: JSON文件路径 34 | """ 35 | # 读取JSON文件 36 | with open(json_path, 'r') as f: 37 | data = json.load(f) 38 | 39 | # 确保数据是列表类型 40 | if not isinstance(data, list): 41 | print("JSON数据不是一个列表") 42 | return 43 | 44 | # 旋转角度 45 | angle = z_rotate_angle 46 | 47 | # 遍历每个对象并旋转rotation 48 | for item in data: 49 | if 'rotation' in item and isinstance(item['rotation'], list) and len(item['rotation']) == 4: 50 | if is_xyzw_to_wxyz == True: 51 | # xyzw -> wxyz 52 | item['rotation'] = [ 53 | item['rotation'][3], # w 54 | item['rotation'][0], # x 55 | item['rotation'][1], # y 56 | item['rotation'][2] # z 57 | ] 58 | 59 | item['rotation'] = rotate_quaternion_around_z(item['rotation'], angle) 60 | 61 | # 保存修改后的JSON数据 62 | output_path = json_path.replace('.json', '_rotated.json') 63 | with open(output_path, 'w') as f: 64 | json.dump(data, f, indent=2) 65 | 66 | print(f"处理完成。已将结果保存至 {output_path}") 67 | 68 | # 使用示例 69 | if __name__ == "__main__": 70 | dataset_root = "/home/tsinghuaair/pengyh/FreeAD/my_project/FreeAD/data/NuscenesData" + "/v1.0-trainval" 71 | 72 | ego_pose_json_path = dataset_root + "/ego_pose.json" # 替换为你的JSON文件路径 73 | sample_annotation_json_path = dataset_root + "/sample_annotation.json" 74 | 75 | transfer_angle = -90 76 | process_json_file(ego_pose_json_path, transfer_angle, False) # 不需要xyzw-->wxyz 77 | process_json_file(sample_annotation_json_path, transfer_angle, False) -------------------------------------------------------------------------------- /projects/configs/_base_/models/fcos3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='FCOSMono3D', 3 | pretrained='open-mmlab://detectron2/resnet101_caffe', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=False), 11 | norm_eval=True, 12 | style='caffe'), 13 | neck=dict( 14 | type='FPN', 15 | in_channels=[256, 512, 1024, 2048], 16 | out_channels=256, 17 | start_level=1, 18 | add_extra_convs='on_output', 19 | num_outs=5, 20 | relu_before_extra_convs=True), 21 | bbox_head=dict( 22 | type='FCOSMono3DHead', 23 | num_classes=10, 24 | in_channels=256, 25 | stacked_convs=2, 26 | feat_channels=256, 27 | use_direction_classifier=True, 28 | diff_rad_by_sin=True, 29 | pred_attrs=True, 30 | pred_velo=True, 31 | dir_offset=0.7854, # pi/4 32 | strides=[8, 16, 32, 64, 128], 33 | group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo 34 | cls_branch=(256, ), 35 | reg_branch=( 36 | (256, ), # offset 37 | (256, ), # depth 38 | (256, ), # size 39 | (256, ), # rot 40 | () # velo 41 | ), 42 | dir_branch=(256, ), 43 | attr_branch=(256, ), 44 | loss_cls=dict( 45 | type='FocalLoss', 46 | use_sigmoid=True, 47 | gamma=2.0, 48 | alpha=0.25, 49 | loss_weight=1.0), 50 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 51 | loss_dir=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_attr=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 57 | norm_on_bbox=True, 58 | centerness_on_reg=True, 59 | center_sampling=True, 60 | conv_bias=True, 61 | dcn_on_last_conv=True), 62 | train_cfg=dict( 63 | allowed_border=0, 64 | code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05], 65 | pos_weight=-1, 66 | debug=False), 67 | test_cfg=dict( 68 | use_rotate_nms=True, 69 | nms_across_levels=False, 70 | nms_pre=1000, 71 | nms_thr=0.8, 72 | score_thr=0.05, 73 | min_bbox_size=0, 74 | max_per_img=200)) 75 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | 132 | # other folder don't have to be submitted 133 | ckpts/ 134 | data/ 135 | path/ 136 | val/ 137 | maps/ 138 | bevformer_results.json 139 | output_results.json 140 | model_structure.txt 141 | test/ -------------------------------------------------------------------------------- /projects/configs/_base_/models/votenet.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='VoteNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=4, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 256)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='VoteHead', 20 | vote_module_cfg=dict( 21 | in_channels=256, 22 | vote_per_seed=1, 23 | gt_per_seed=3, 24 | conv_channels=(256, 256), 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | norm_feats=True, 28 | vote_loss=dict( 29 | type='ChamferDistance', 30 | mode='l1', 31 | reduction='none', 32 | loss_dst_weight=10.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModule', 35 | num_point=256, 36 | radius=0.3, 37 | num_sample=16, 38 | mlp_channels=[256, 128, 128, 128], 39 | use_xyz=True, 40 | normalize_xyz=True), 41 | pred_layer_cfg=dict( 42 | in_channels=128, shared_conv_channels=(128, 128), bias=True), 43 | conv_cfg=dict(type='Conv1d'), 44 | norm_cfg=dict(type='BN1d'), 45 | objectness_loss=dict( 46 | type='CrossEntropyLoss', 47 | class_weight=[0.2, 0.8], 48 | reduction='sum', 49 | loss_weight=5.0), 50 | center_loss=dict( 51 | type='ChamferDistance', 52 | mode='l2', 53 | reduction='sum', 54 | loss_src_weight=10.0, 55 | loss_dst_weight=10.0), 56 | dir_class_loss=dict( 57 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 58 | dir_res_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 60 | size_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | size_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0), 64 | semantic_loss=dict( 65 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 66 | # model training and testing settings 67 | train_cfg=dict( 68 | pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'), 69 | test_cfg=dict( 70 | sample_mod='seed', 71 | nms_thr=0.25, 72 | score_thr=0.05, 73 | per_class_proposal=True)) 74 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/groupfree3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='GroupFree3DNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=3, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 288)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='GroupFree3DHead', 20 | in_channels=288, 21 | num_decoder_layers=6, 22 | num_proposal=256, 23 | transformerlayers=dict( 24 | type='BaseTransformerLayer', 25 | attn_cfgs=dict( 26 | type='GroupFree3DMHA', 27 | embed_dims=288, 28 | num_heads=8, 29 | attn_drop=0.1, 30 | dropout_layer=dict(type='Dropout', drop_prob=0.1)), 31 | ffn_cfgs=dict( 32 | embed_dims=288, 33 | feedforward_channels=2048, 34 | ffn_drop=0.1, 35 | act_cfg=dict(type='ReLU', inplace=True)), 36 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 37 | 'norm')), 38 | pred_layer_cfg=dict( 39 | in_channels=288, shared_conv_channels=(288, 288), bias=True), 40 | sampling_objectness_loss=dict( 41 | type='FocalLoss', 42 | use_sigmoid=True, 43 | gamma=2.0, 44 | alpha=0.25, 45 | loss_weight=8.0), 46 | objectness_loss=dict( 47 | type='FocalLoss', 48 | use_sigmoid=True, 49 | gamma=2.0, 50 | alpha=0.25, 51 | loss_weight=1.0), 52 | center_loss=dict( 53 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 54 | dir_class_loss=dict( 55 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 56 | dir_res_loss=dict( 57 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 58 | size_class_loss=dict( 59 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 60 | size_res_loss=dict( 61 | type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0), 62 | semantic_loss=dict( 63 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 64 | # model training and testing settings 65 | train_cfg=dict(sample_mod='kps'), 66 | test_cfg=dict( 67 | sample_mod='kps', 68 | nms_thr=0.25, 69 | score_thr=0.0, 70 | per_class_proposal=True, 71 | prediction_stages='last')) 72 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/position_embedding.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | 5 | class RelPositionEmbedding(nn.Module): 6 | def __init__(self, num_pos_feats=64, pos_norm=True): 7 | super().__init__() 8 | self.num_pos_feats = num_pos_feats 9 | self.fc = nn.Linear(4, self.num_pos_feats,bias=False) 10 | #nn.init.orthogonal_(self.fc.weight) 11 | #self.fc.weight.requires_grad = False 12 | self.pos_norm = pos_norm 13 | if self.pos_norm: 14 | self.norm = nn.LayerNorm(self.num_pos_feats) 15 | def forward(self, tensor): 16 | #mask = nesttensor.mask 17 | B,C,H,W = tensor.shape 18 | #print('tensor.shape', tensor.shape) 19 | y_range = (torch.arange(H) / float(H - 1)).to(tensor.device) 20 | #y_axis = torch.stack((y_range, 1-y_range),dim=1) 21 | y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1) 22 | y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2) 23 | 24 | x_range = (torch.arange(W) / float(W - 1)).to(tensor.device) 25 | #x_axis =torch.stack((x_range,1-x_range),dim=1) 26 | x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1) 27 | x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2) 28 | x_pos = torch.cat((y_axis, x_axis), dim=1) 29 | x_pos = self.fc(x_pos) 30 | 31 | if self.pos_norm: 32 | x_pos = self.norm(x_pos) 33 | #print('xpos,', x_pos.max(),x_pos.min()) 34 | return x_pos 35 | 36 | 37 | class SineEmbedding(nn.Module): 38 | def __init__(self, in_channels, N_freqs, logscale=True): 39 | """ 40 | Defines a function that embeds x to (x, sin(2^k x), cos(2^k x), ...) 41 | in_channels: number of input channels 42 | """ 43 | super(SineEmbedding, self).__init__() 44 | self.N_freqs = N_freqs 45 | self.in_channels = in_channels 46 | self.funcs = [torch.sin, torch.cos] 47 | self.out_channels = in_channels*(len(self.funcs)*N_freqs) 48 | 49 | if logscale: 50 | self.freq_bands = 2**torch.linspace(0, N_freqs-1, N_freqs) 51 | else: 52 | self.freq_bands = torch.linspace(1, 2**(N_freqs-1), N_freqs) 53 | 54 | def forward(self, x): 55 | """ 56 | Embeds x to (sin(2^k x), cos(2^k x), ...) 57 | Inputs: 58 | x: (B, self.in_channels) 59 | Outputs: 60 | out: (B, self.out_channels) 61 | """ 62 | out = [] 63 | for freq in self.freq_bands: 64 | for func in self.funcs: 65 | out += [func(freq*x)] 66 | 67 | return torch.cat(out, -1) 68 | 69 | 70 | # if __name__ == '__main__': 71 | # pe = Embedding(in_channels=2, N_freqs=64) 72 | # x_pe = pe(torch.randn(1, 4, 2)) 73 | # a = 0 -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/formating.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | import numpy as np 4 | from mmcv.parallel import DataContainer as DC 5 | 6 | from mmdet3d.core.bbox import BaseInstance3DBoxes 7 | from mmdet3d.core.points import BasePoints 8 | from mmdet.datasets.builder import PIPELINES 9 | from mmdet.datasets.pipelines import to_tensor 10 | from mmdet3d.datasets.pipelines import DefaultFormatBundle3D 11 | 12 | @PIPELINES.register_module() 13 | class CustomDefaultFormatBundle3D(DefaultFormatBundle3D): 14 | """Default formatting bundle. 15 | It simplifies the pipeline of formatting common fields for voxels, 16 | including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and 17 | "gt_semantic_seg". 18 | These fields are formatted as follows. 19 | - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) 20 | - proposals: (1)to tensor, (2)to DataContainer 21 | - gt_bboxes: (1)to tensor, (2)to DataContainer 22 | - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer 23 | - gt_labels: (1)to tensor, (2)to DataContainer 24 | """ 25 | def __init__(self, class_names, with_gt=True, with_label=True, with_ego=True): 26 | super(CustomDefaultFormatBundle3D, self).__init__(class_names, with_gt, with_label) 27 | self.with_ego = with_ego 28 | 29 | 30 | def __call__(self, results): 31 | """Call function to transform and format common fields in results. 32 | Args: 33 | results (dict): Result dict contains the data to convert. 34 | Returns: 35 | dict: The result dict contains the data that is formatted with 36 | default bundle. 37 | """ 38 | # Format 3D data 39 | results = super(CustomDefaultFormatBundle3D, self).__call__(results) 40 | # results['gt_map_masks'] = DC(to_tensor(results['gt_map_masks']), stack=True) 41 | if self.with_ego: 42 | if 'ego_his_trajs' in results: 43 | results['ego_his_trajs'] = DC(to_tensor(results['ego_his_trajs'][None, ...]), stack=True) 44 | if 'ego_fut_trajs' in results: 45 | results['ego_fut_trajs'] = DC(to_tensor(results['ego_fut_trajs'][None, ...]), stack=True) 46 | if 'ego_fut_masks' in results: 47 | results['ego_fut_masks'] = DC(to_tensor(results['ego_fut_masks'][None, None, ...]), stack=True) 48 | if 'ego_fut_cmd' in results: 49 | results['ego_fut_cmd'] = DC(to_tensor(results['ego_fut_cmd'][None, None, ...]), stack=True) 50 | if 'ego_lcf_feat' in results: 51 | results['ego_lcf_feat'] = DC(to_tensor(results['ego_lcf_feat'][None, None, ...]), stack=True) 52 | if 'gt_attr_labels' in results: 53 | results['gt_attr_labels'] = DC(to_tensor(results['gt_attr_labels']), cpu_only=False) 54 | 55 | return results -------------------------------------------------------------------------------- /FreeWorld/utils/ros_coord_to_bev_coord.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pyquaternion import Quaternion 3 | from FreeWorld.utils.coord_transformation import * 4 | 5 | def ROSCoord2NuscenesBEVCoord(data): 6 | """ 7 | Convert all BEV, map, and plan results from ROS(x, y) -> (-y, x) NuscenesBEV coordinate. 8 | Also converts the rotation quaternion for BEV and adjusts relevant points in map and plan data. 9 | """ 10 | 11 | # Helper function to transform a quaternion for a 90-degree counterclockwise rotation 12 | def transform_quaternion(quat): 13 | q = Quaternion(quat) # Create a quaternion object from the input 14 | rotate_90_deg = Quaternion(axis=[0, 0, 1], angle=-np.pi / 2) # 90-degree clockwise rotation around Z (left-hand) 15 | new_quat = rotate_90_deg * q # Apply the rotation 16 | return new_quat.elements # Return the rotated quaternion 17 | 18 | # Process BEV results (handles box translation and rotation) 19 | if 'results' in data: 20 | for sample_token, prediction in data['results'].items(): 21 | for obj in prediction: 22 | # Convert the box center from (x, y) to (-y, x) 23 | obj['translation'][0], obj['translation'][1] = -obj['translation'][1], obj['translation'][0] 24 | 25 | # Convert the rotation quaternion using the defined matrix 26 | if 'rotation' in obj: 27 | q = Quaternion(obj['rotation']) # Create a quaternion object from the rotation 28 | # Apply a 90-degree rotation to the quaternion (clockwise around Z-axis) 29 | rotate_90_deg = Quaternion(axis=[0, 0, 1], angle=-np.pi / 2) # Clockwise 90 degrees for left-hand coord 30 | new_rotation = rotate_90_deg * q 31 | obj['rotation'] = new_rotation.elements # Update the rotation with the new quaternion 32 | 33 | # Process map results (handles points defining boundaries or dividers) 34 | if 'map_results' in data: 35 | map_results = data['map_results'] 36 | for sample_token, vectors in map_results.items(): 37 | for vector in vectors['vectors']: 38 | if 'pts' in vector: 39 | for pt in vector['pts']: 40 | if len(pt) >= 2: 41 | # Convert map points: from right-hand to left-hand system 42 | pt[0], pt[1] = RightHandCoord2LeftHandCoord(pt[0], pt[1]) # Apply the coordinate transformation 43 | 44 | # Process plan results (handles nested tensors with planning points) 45 | if 'plan_results' in data: 46 | plan_results = data['plan_results'] 47 | for sample_id, tensors in plan_results.items(): 48 | for tensor in tensors: 49 | for points in tensor: 50 | for point in points: 51 | if len(point) >= 2: 52 | # Convert plan points: from right-hand to left-hand system 53 | point[0], point[1] = RightHandCoord2LeftHandCoord(point[0], point[1]) # Apply the coordinate transformation 54 | 55 | return data 56 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_second_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.05, 0.05, 0.1] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=5, 7 | point_cloud_range=[0, -40, -3, 70.4, 40, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000)), 10 | voxel_encoder=dict(type='HardSimpleVFE'), 11 | middle_encoder=dict( 12 | type='SparseEncoder', 13 | in_channels=4, 14 | sparse_shape=[41, 1600, 1408], 15 | order=('conv', 'norm', 'act')), 16 | backbone=dict( 17 | type='SECOND', 18 | in_channels=256, 19 | layer_nums=[5, 5], 20 | layer_strides=[1, 2], 21 | out_channels=[128, 256]), 22 | neck=dict( 23 | type='SECONDFPN', 24 | in_channels=[128, 256], 25 | upsample_strides=[1, 2], 26 | out_channels=[256, 256]), 27 | bbox_head=dict( 28 | type='Anchor3DHead', 29 | num_classes=3, 30 | in_channels=512, 31 | feat_channels=512, 32 | use_direction_classifier=True, 33 | anchor_generator=dict( 34 | type='Anchor3DRangeGenerator', 35 | ranges=[ 36 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 37 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 38 | [0, -40.0, -1.78, 70.4, 40.0, -1.78], 39 | ], 40 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], 41 | rotations=[0, 1.57], 42 | reshape_out=False), 43 | diff_rad_by_sin=True, 44 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 45 | loss_cls=dict( 46 | type='FocalLoss', 47 | use_sigmoid=True, 48 | gamma=2.0, 49 | alpha=0.25, 50 | loss_weight=1.0), 51 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 52 | loss_dir=dict( 53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 54 | # model training and testing settings 55 | train_cfg=dict( 56 | assigner=[ 57 | dict( # for Pedestrian 58 | type='MaxIoUAssigner', 59 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 60 | pos_iou_thr=0.35, 61 | neg_iou_thr=0.2, 62 | min_pos_iou=0.2, 63 | ignore_iof_thr=-1), 64 | dict( # for Cyclist 65 | type='MaxIoUAssigner', 66 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 67 | pos_iou_thr=0.35, 68 | neg_iou_thr=0.2, 69 | min_pos_iou=0.2, 70 | ignore_iof_thr=-1), 71 | dict( # for Car 72 | type='MaxIoUAssigner', 73 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 74 | pos_iou_thr=0.6, 75 | neg_iou_thr=0.45, 76 | min_pos_iou=0.45, 77 | ignore_iof_thr=-1), 78 | ], 79 | allowed_border=0, 80 | pos_weight=-1, 81 | debug=False), 82 | test_cfg=dict( 83 | use_rotate_nms=True, 84 | nms_across_levels=False, 85 | nms_thr=0.01, 86 | score_thr=0.1, 87 | min_bbox_size=0, 88 | nms_pre=100, 89 | max_num=50)) 90 | -------------------------------------------------------------------------------- /FreeWorld/can_bus/can_bus.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import List, Dict, Optional 3 | from bisect import bisect_right 4 | 5 | class CanBusData: 6 | """ 7 | 自定义 CAN 总线数据管理类,读取 can_bus.json 并提供数据访问接口。 8 | """ 9 | def __init__(self, json_path: str): 10 | """ 11 | 初始化 RobotCanBus 实例,读取并组织 JSON 数据。 12 | 13 | 参数: 14 | json_path (str): can_bus.json 文件的路径。 15 | """ 16 | self.data = self._load_json(json_path) 17 | self._sort_messages() 18 | 19 | def _load_json(self, json_path: str) -> Dict[str, List[Dict]]: 20 | """ 21 | 读取 JSON 文件并返回数据字典。 22 | 23 | 参数: 24 | json_path (str): JSON 文件路径。 25 | 26 | 返回: 27 | Dict[str, List[Dict]]: 按场景名称组织的消息列表。 28 | """ 29 | try: 30 | with open(json_path, 'r') as f: 31 | data = json.load(f) 32 | return data 33 | except Exception as e: 34 | print(f"Error loading JSON file {json_path}: {e}") 35 | return {} 36 | 37 | def _sort_messages(self): 38 | """ 39 | 对每个场景的消息按 utime 进行排序,确保消息按时间顺序排列。 40 | """ 41 | for scene, scene_data in self.data.items(): 42 | if 'data' in scene_data: 43 | scene_data['data'].sort(key=lambda msg: msg['utime']) 44 | 45 | def get_messages(self, scene_name: str, msg_type: str) -> List[Dict]: 46 | """ 47 | 获取指定场景中指定类型的所有消息。 48 | 49 | 参数: 50 | scene_name (str): 场景名称。 51 | msg_type (str): 消息类型,如 'pose' 或 'steer'。 52 | 53 | 返回: 54 | List[Dict]: 指定类型的消息列表。 55 | """ 56 | scene = self.data.get(scene_name, None) 57 | if not scene or 'data' not in scene or not scene['data']: 58 | print(f"Warning: No data found for scene {scene_name}.") 59 | return [] 60 | 61 | # 过滤出指定类型的消息 62 | return [msg for msg in scene['data'] if msg.get('type') == msg_type] 63 | 64 | def get_latest_before(self, scene_name: str, timestamp: int) -> Optional[Dict]: 65 | """ 66 | 获取指定场景中,时间戳小于等于给定 timestamp 的最新消息。 67 | 68 | 参数: 69 | scene_name (str): 场景名称。 70 | timestamp (int): 时间戳(微秒)。 71 | 72 | 返回: 73 | Optional[Dict]: 最新的消息,如果不存在则返回 None。 74 | """ 75 | scene = self.data.get(scene_name, None) 76 | if not scene or 'data' not in scene or not scene['data']: 77 | print(f"Warning: No data found for scene {scene_name}.") 78 | return None 79 | 80 | # 使用二分查找优化查找过程,寻找跟时间戳最匹配的数据 81 | utimes = [msg['utime'] for msg in scene['data']] 82 | # 使用 bisect_right 查找第一个大于 timestamp 的位置 83 | index = bisect_right(utimes, timestamp) - 1 84 | 85 | if index >= 0: 86 | # 确保找到的时间戳小于等于给定的 timestamp 87 | if utimes[index] <= timestamp: 88 | return scene['data'][index] 89 | else: 90 | print(f"Warning: No valid can_bus messages found for timestamp {timestamp} in scene {scene_name}.") 91 | return None 92 | else: 93 | print(f"Warning: No can_bus messages found for timestamp {timestamp} in scene {scene_name}.") 94 | return None 95 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/3dssd.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='SSD3DNet', 3 | backbone=dict( 4 | type='PointNet2SAMSG', 5 | in_channels=4, 6 | num_points=(4096, 512, (256, 256)), 7 | radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)), 8 | num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)), 9 | sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)), 10 | ((64, 64, 128), (64, 64, 128), (64, 96, 128)), 11 | ((128, 128, 256), (128, 192, 256), (128, 256, 256))), 12 | aggregation_channels=(64, 128, 256), 13 | fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')), 14 | fps_sample_range_lists=((-1), (-1), (512, -1)), 15 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 16 | sa_cfg=dict( 17 | type='PointSAModuleMSG', 18 | pool_mod='max', 19 | use_xyz=True, 20 | normalize_xyz=False)), 21 | bbox_head=dict( 22 | type='SSD3DHead', 23 | in_channels=256, 24 | vote_module_cfg=dict( 25 | in_channels=256, 26 | num_points=256, 27 | gt_per_seed=1, 28 | conv_channels=(128, ), 29 | conv_cfg=dict(type='Conv1d'), 30 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 31 | with_res_feat=False, 32 | vote_xyz_range=(3.0, 3.0, 2.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModuleMSG', 35 | num_point=256, 36 | radii=(4.8, 6.4), 37 | sample_nums=(16, 32), 38 | mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)), 39 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 40 | use_xyz=True, 41 | normalize_xyz=False, 42 | bias=True), 43 | pred_layer_cfg=dict( 44 | in_channels=1536, 45 | shared_conv_channels=(512, 128), 46 | cls_conv_channels=(128, ), 47 | reg_conv_channels=(128, ), 48 | conv_cfg=dict(type='Conv1d'), 49 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 50 | bias=True), 51 | conv_cfg=dict(type='Conv1d'), 52 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 53 | objectness_loss=dict( 54 | type='CrossEntropyLoss', 55 | use_sigmoid=True, 56 | reduction='sum', 57 | loss_weight=1.0), 58 | center_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 60 | dir_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | dir_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 64 | size_res_loss=dict( 65 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 66 | corner_loss=dict( 67 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 68 | vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05), 72 | test_cfg=dict( 73 | nms_cfg=dict(type='nms', iou_thr=0.1), 74 | sample_mod='spec', 75 | score_thr=0.0, 76 | per_class_proposal=True, 77 | max_output_num=100)) 78 | -------------------------------------------------------------------------------- /tools/model_converters/regnet2mmdet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import torch 4 | from collections import OrderedDict 5 | 6 | 7 | def convert_stem(model_key, model_weight, state_dict, converted_names): 8 | new_key = model_key.replace('stem.conv', 'conv1') 9 | new_key = new_key.replace('stem.bn', 'bn1') 10 | state_dict[new_key] = model_weight 11 | converted_names.add(model_key) 12 | print(f'Convert {model_key} to {new_key}') 13 | 14 | 15 | def convert_head(model_key, model_weight, state_dict, converted_names): 16 | new_key = model_key.replace('head.fc', 'fc') 17 | state_dict[new_key] = model_weight 18 | converted_names.add(model_key) 19 | print(f'Convert {model_key} to {new_key}') 20 | 21 | 22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names): 23 | split_keys = model_key.split('.') 24 | layer, block, module = split_keys[:3] 25 | block_id = int(block[1:]) 26 | layer_name = f'layer{int(layer[1:])}' 27 | block_name = f'{block_id - 1}' 28 | 29 | if block_id == 1 and module == 'bn': 30 | new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}' 31 | elif block_id == 1 and module == 'proj': 32 | new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}' 33 | elif module == 'f': 34 | if split_keys[3] == 'a_bn': 35 | module_name = 'bn1' 36 | elif split_keys[3] == 'b_bn': 37 | module_name = 'bn2' 38 | elif split_keys[3] == 'c_bn': 39 | module_name = 'bn3' 40 | elif split_keys[3] == 'a': 41 | module_name = 'conv1' 42 | elif split_keys[3] == 'b': 43 | module_name = 'conv2' 44 | elif split_keys[3] == 'c': 45 | module_name = 'conv3' 46 | new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}' 47 | else: 48 | raise ValueError(f'Unsupported conversion of key {model_key}') 49 | print(f'Convert {model_key} to {new_key}') 50 | state_dict[new_key] = model_weight 51 | converted_names.add(model_key) 52 | 53 | 54 | def convert(src, dst): 55 | """Convert keys in pycls pretrained RegNet models to mmdet style.""" 56 | # load caffe model 57 | regnet_model = torch.load(src) 58 | blobs = regnet_model['model_state'] 59 | # convert to pytorch style 60 | state_dict = OrderedDict() 61 | converted_names = set() 62 | for key, weight in blobs.items(): 63 | if 'stem' in key: 64 | convert_stem(key, weight, state_dict, converted_names) 65 | elif 'head' in key: 66 | convert_head(key, weight, state_dict, converted_names) 67 | elif key.startswith('s'): 68 | convert_reslayer(key, weight, state_dict, converted_names) 69 | 70 | # check if all layers are converted 71 | for key in blobs: 72 | if key not in converted_names: 73 | print(f'not converted: {key}') 74 | # save checkpoint 75 | checkpoint = dict() 76 | checkpoint['state_dict'] = state_dict 77 | torch.save(checkpoint, dst) 78 | 79 | 80 | def main(): 81 | parser = argparse.ArgumentParser(description='Convert model keys') 82 | parser.add_argument('src', help='src detectron model path') 83 | parser.add_argument('dst', help='save path') 84 | args = parser.parse_args() 85 | convert(args.src, args.dst) 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.16, 0.16, 4] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=32, # max_points_per_voxel 7 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000) # (training, testing) max_voxels 10 | ), 11 | voxel_encoder=dict( 12 | type='PillarFeatureNet', 13 | in_channels=4, 14 | feat_channels=[64], 15 | with_distance=False, 16 | voxel_size=voxel_size, 17 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]), 18 | middle_encoder=dict( 19 | type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=64, 23 | layer_nums=[3, 5, 5], 24 | layer_strides=[2, 2, 2], 25 | out_channels=[64, 128, 256]), 26 | neck=dict( 27 | type='SECONDFPN', 28 | in_channels=[64, 128, 256], 29 | upsample_strides=[1, 2, 4], 30 | out_channels=[128, 128, 128]), 31 | bbox_head=dict( 32 | type='Anchor3DHead', 33 | num_classes=3, 34 | in_channels=384, 35 | feat_channels=384, 36 | use_direction_classifier=True, 37 | anchor_generator=dict( 38 | type='Anchor3DRangeGenerator', 39 | ranges=[ 40 | [0, -39.68, -0.6, 70.4, 39.68, -0.6], 41 | [0, -39.68, -0.6, 70.4, 39.68, -0.6], 42 | [0, -39.68, -1.78, 70.4, 39.68, -1.78], 43 | ], 44 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], 45 | rotations=[0, 1.57], 46 | reshape_out=False), 47 | diff_rad_by_sin=True, 48 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 49 | loss_cls=dict( 50 | type='FocalLoss', 51 | use_sigmoid=True, 52 | gamma=2.0, 53 | alpha=0.25, 54 | loss_weight=1.0), 55 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 56 | loss_dir=dict( 57 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 58 | # model training and testing settings 59 | train_cfg=dict( 60 | assigner=[ 61 | dict( # for Pedestrian 62 | type='MaxIoUAssigner', 63 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 64 | pos_iou_thr=0.5, 65 | neg_iou_thr=0.35, 66 | min_pos_iou=0.35, 67 | ignore_iof_thr=-1), 68 | dict( # for Cyclist 69 | type='MaxIoUAssigner', 70 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 71 | pos_iou_thr=0.5, 72 | neg_iou_thr=0.35, 73 | min_pos_iou=0.35, 74 | ignore_iof_thr=-1), 75 | dict( # for Car 76 | type='MaxIoUAssigner', 77 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 78 | pos_iou_thr=0.6, 79 | neg_iou_thr=0.45, 80 | min_pos_iou=0.45, 81 | ignore_iof_thr=-1), 82 | ], 83 | allowed_border=0, 84 | pos_weight=-1, 85 | debug=False), 86 | test_cfg=dict( 87 | use_rotate_nms=True, 88 | nms_across_levels=False, 89 | nms_thr=0.01, 90 | score_thr=0.1, 91 | min_bbox_size=0, 92 | nms_pre=100, 93 | max_num=50)) 94 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.2, 0.2, 8] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)), 6 | pts_voxel_encoder=dict( 7 | type='PillarFeatureNet', 8 | in_channels=5, 9 | feat_channels=[64], 10 | with_distance=False, 11 | voxel_size=(0.2, 0.2, 8), 12 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), 13 | legacy=False), 14 | pts_middle_encoder=dict( 15 | type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)), 16 | pts_backbone=dict( 17 | type='SECOND', 18 | in_channels=64, 19 | out_channels=[64, 128, 256], 20 | layer_nums=[3, 5, 5], 21 | layer_strides=[2, 2, 2], 22 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 23 | conv_cfg=dict(type='Conv2d', bias=False)), 24 | pts_neck=dict( 25 | type='SECONDFPN', 26 | in_channels=[64, 128, 256], 27 | out_channels=[128, 128, 128], 28 | upsample_strides=[0.5, 1, 2], 29 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 30 | upsample_cfg=dict(type='deconv', bias=False), 31 | use_conv_for_no_stride=True), 32 | pts_bbox_head=dict( 33 | type='CenterHead', 34 | in_channels=sum([128, 128, 128]), 35 | tasks=[ 36 | dict(num_class=1, class_names=['car']), 37 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 38 | dict(num_class=2, class_names=['bus', 'trailer']), 39 | dict(num_class=1, class_names=['barrier']), 40 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 41 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 42 | ], 43 | common_heads=dict( 44 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 45 | share_conv_channel=64, 46 | bbox_coder=dict( 47 | type='CenterPointBBoxCoder', 48 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 49 | max_num=500, 50 | score_threshold=0.1, 51 | out_size_factor=4, 52 | voxel_size=voxel_size[:2], 53 | code_size=9), 54 | separate_head=dict( 55 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 56 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 57 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 58 | norm_bbox=True), 59 | # model training and testing settings 60 | train_cfg=dict( 61 | pts=dict( 62 | grid_size=[512, 512, 1], 63 | voxel_size=voxel_size, 64 | out_size_factor=4, 65 | dense_reg=1, 66 | gaussian_overlap=0.1, 67 | max_objs=500, 68 | min_radius=2, 69 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 70 | test_cfg=dict( 71 | pts=dict( 72 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 73 | max_per_img=500, 74 | max_pool_nms=False, 75 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 76 | score_threshold=0.1, 77 | pc_range=[-51.2, -51.2], 78 | out_size_factor=4, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.1, 0.1, 0.2] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)), 6 | pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 7 | pts_middle_encoder=dict( 8 | type='SparseEncoder', 9 | in_channels=5, 10 | sparse_shape=[41, 1024, 1024], 11 | output_channels=128, 12 | order=('conv', 'norm', 'act'), 13 | encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 14 | 128)), 15 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)), 16 | block_type='basicblock'), 17 | pts_backbone=dict( 18 | type='SECOND', 19 | in_channels=256, 20 | out_channels=[128, 256], 21 | layer_nums=[5, 5], 22 | layer_strides=[1, 2], 23 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 24 | conv_cfg=dict(type='Conv2d', bias=False)), 25 | pts_neck=dict( 26 | type='SECONDFPN', 27 | in_channels=[128, 256], 28 | out_channels=[256, 256], 29 | upsample_strides=[1, 2], 30 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 31 | upsample_cfg=dict(type='deconv', bias=False), 32 | use_conv_for_no_stride=True), 33 | pts_bbox_head=dict( 34 | type='CenterHead', 35 | in_channels=sum([256, 256]), 36 | tasks=[ 37 | dict(num_class=1, class_names=['car']), 38 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 39 | dict(num_class=2, class_names=['bus', 'trailer']), 40 | dict(num_class=1, class_names=['barrier']), 41 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 42 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 43 | ], 44 | common_heads=dict( 45 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 46 | share_conv_channel=64, 47 | bbox_coder=dict( 48 | type='CenterPointBBoxCoder', 49 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 50 | max_num=500, 51 | score_threshold=0.1, 52 | out_size_factor=8, 53 | voxel_size=voxel_size[:2], 54 | code_size=9), 55 | separate_head=dict( 56 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 57 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 58 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 59 | norm_bbox=True), 60 | # model training and testing settings 61 | train_cfg=dict( 62 | pts=dict( 63 | grid_size=[1024, 1024, 40], 64 | voxel_size=voxel_size, 65 | out_size_factor=8, 66 | dense_reg=1, 67 | gaussian_overlap=0.1, 68 | max_objs=500, 69 | min_radius=2, 70 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 71 | test_cfg=dict( 72 | pts=dict( 73 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 74 | max_per_img=500, 75 | max_pool_nms=False, 76 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 77 | score_threshold=0.1, 78 | out_size_factor=8, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /tools/analysis_tools/benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import time 4 | import torch 5 | from mmcv import Config 6 | from mmcv.parallel import MMDataParallel 7 | from mmcv.runner import load_checkpoint, wrap_fp16_model 8 | import sys 9 | sys.path.append('.') 10 | from projects.mmdet3d_plugin.datasets.builder import build_dataloader 11 | from projects.mmdet3d_plugin.datasets import custom_build_dataset 12 | # from mmdet3d.datasets import build_dataloader, build_dataset 13 | from mmdet3d.models import build_detector 14 | #from tools.misc.fuse_conv_bn import fuse_module 15 | 16 | 17 | def parse_args(): 18 | parser = argparse.ArgumentParser(description='MMDet benchmark a model') 19 | parser.add_argument('config', help='test config file path') 20 | parser.add_argument('--checkpoint', default=None, help='checkpoint file') 21 | parser.add_argument('--samples', default=2000, help='samples to benchmark') 22 | parser.add_argument( 23 | '--log-interval', default=50, help='interval of logging') 24 | parser.add_argument( 25 | '--fuse-conv-bn', 26 | action='store_true', 27 | help='Whether to fuse conv and bn, this will slightly increase' 28 | 'the inference speed') 29 | args = parser.parse_args() 30 | return args 31 | 32 | 33 | def main(): 34 | args = parse_args() 35 | 36 | cfg = Config.fromfile(args.config) 37 | # set cudnn_benchmark 38 | if cfg.get('cudnn_benchmark', False): 39 | torch.backends.cudnn.benchmark = True 40 | cfg.model.pretrained = None 41 | cfg.data.test.test_mode = True 42 | 43 | # build the dataloader 44 | # TODO: support multiple images per gpu (only minor changes are needed) 45 | print(cfg.data.test) 46 | dataset = custom_build_dataset(cfg.data.test) 47 | data_loader = build_dataloader( 48 | dataset, 49 | samples_per_gpu=1, 50 | workers_per_gpu=cfg.data.workers_per_gpu, 51 | dist=False, 52 | shuffle=False) 53 | 54 | # build the model and load checkpoint 55 | cfg.model.train_cfg = None 56 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) 57 | fp16_cfg = cfg.get('fp16', None) 58 | if fp16_cfg is not None: 59 | wrap_fp16_model(model) 60 | if args.checkpoint is not None: 61 | load_checkpoint(model, args.checkpoint, map_location='cpu') 62 | #if args.fuse_conv_bn: 63 | # model = fuse_module(model) 64 | 65 | model = MMDataParallel(model, device_ids=[0]) 66 | 67 | model.eval() 68 | 69 | # the first several iterations may be very slow so skip them 70 | num_warmup = 5 71 | pure_inf_time = 0 72 | 73 | # benchmark with several samples and take the average 74 | for i, data in enumerate(data_loader): 75 | torch.cuda.synchronize() 76 | start_time = time.perf_counter() 77 | with torch.no_grad(): 78 | model(return_loss=False, rescale=True, **data) 79 | 80 | torch.cuda.synchronize() 81 | elapsed = time.perf_counter() - start_time 82 | 83 | if i >= num_warmup: 84 | pure_inf_time += elapsed 85 | if (i + 1) % args.log_interval == 0: 86 | fps = (i + 1 - num_warmup) / pure_inf_time 87 | print(f'Done image [{i + 1:<3}/ {args.samples}], ' 88 | f'fps: {fps:.1f} img / s') 89 | 90 | if (i + 1) == args.samples: 91 | pure_inf_time += elapsed 92 | fps = (i + 1 - num_warmup) / pure_inf_time 93 | print(f'Overall fps: {fps:.1f} img / s') 94 | break 95 | 96 | 97 | if __name__ == '__main__': 98 | main() 99 | -------------------------------------------------------------------------------- /tools/analysis_tools/ros_coord_to_bev_coord.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pyquaternion import Quaternion 3 | 4 | 5 | def ROSCoord2NuscenesBEVCoord(data): 6 | """ 7 | right hand coord to left hand coord 8 | 9 | Convert all BEV, map, and plan results from ROS(x, y) -> (-y, x) NuscenesBEV coordinate. 10 | Also converts the rotation quaternion for BEV and adjusts relevant points in map and plan data. 11 | """ 12 | 13 | # Helper function to transform a point (x, y) -> (-y, x) 14 | def right_hand_coord_to_left_hand_coord(point): 15 | x = point[0] 16 | y = point[1] 17 | if len(point) < 2: 18 | raise ValueError(f"Point does not have enough dimensions for transformation: {point}") 19 | x, y = -y, x # Modify in place 20 | 21 | # Helper function to transform a quaternion for a 90-degree counterclockwise rotation 22 | def transform_quaternion(quat): 23 | q = Quaternion(quat) # Create a quaternion object from the input 24 | rotate_90_deg = Quaternion(axis=[0, 0, 1], angle=np.pi / 2) # 90-degree CCW rotation around Z 25 | new_quat = rotate_90_deg * q # Apply the rotation 26 | return new_quat.elements # Return the rotated quaternion 27 | 28 | # Process BEV results (handles box translation and rotation) 29 | if 'results' in data: 30 | # Iterate over the BEV results to apply the coordinate transformation 31 | for sample_token, prediction in data['results'].items(): 32 | for obj in prediction: 33 | # Convert the box center from (x, y) to (-y, x) 34 | obj['translation'][0], obj['translation'][1] = -obj['translation'][1], obj['translation'][0] 35 | 36 | # Convert the rotation quaternion using the defined matrix 37 | # Assuming the rotation quaternion is in obj['rotation'] 38 | if 'rotation' in obj: 39 | q = Quaternion(obj['rotation']) # Create a quaternion object from the rotation 40 | # Apply a 90-degree rotation to the quaternion 41 | # First, construct the quaternion corresponding to the 90-degree counterclockwise rotation in 2D. 42 | rotate_90_deg = Quaternion(axis=[0, 0, 1], angle=np.pi / 2) 43 | # Multiply the current quaternion by the 90-degree quaternion (apply the transformation) 44 | new_rotation = rotate_90_deg * q 45 | obj['rotation'] = new_rotation.elements # Update the rotation with the new quaternion 46 | 47 | 48 | # Process map results (handles points defining boundaries or dividers) 49 | if 'map_results' in data: 50 | map_results = data['map_results'] 51 | for sample_token, vectors in map_results.items(): 52 | for vector in vectors['vectors']: 53 | # Transform points defining boundaries or dividers 54 | if 'pts' in vector: 55 | for pt in vector['pts']: 56 | if len(pt) >= 2: 57 | right_hand_coord_to_left_hand_coord(pt) # Modify the point in place 58 | 59 | # Process plan results (handles nested tensors with planning points) 60 | if 'plan_results' in data: 61 | plan_results = data['plan_results'] 62 | for sample_id, tensors in plan_results.items(): 63 | for tensor in tensors: 64 | for points in tensor: 65 | for point in points: 66 | if len(point) >= 2: 67 | right_hand_coord_to_left_hand_coord(point[:2]) # Only modify the first two dimensions in place 68 | 69 | return data 70 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_nus.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.25, 0.25, 8] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=64, 11 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 12 | voxel_size=voxel_size, 13 | max_voxels=(30000, 40000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=4, 17 | feat_channels=[64, 64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[2, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='FPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | act_cfg=dict(type='ReLU'), 37 | in_channels=[64, 128, 256], 38 | out_channels=256, 39 | start_level=0, 40 | num_outs=3), 41 | pts_bbox_head=dict( 42 | type='Anchor3DHead', 43 | num_classes=10, 44 | in_channels=256, 45 | feat_channels=256, 46 | use_direction_classifier=True, 47 | anchor_generator=dict( 48 | type='AlignedAnchor3DRangeGenerator', 49 | ranges=[[-50, -50, -1.8, 50, 50, -1.8]], 50 | scales=[1, 2, 4], 51 | sizes=[ 52 | [0.8660, 2.5981, 1.], # 1.5/sqrt(3) 53 | [0.5774, 1.7321, 1.], # 1/sqrt(3) 54 | [1., 1., 1.], 55 | [0.4, 0.4, 1], 56 | ], 57 | custom_values=[0, 0], 58 | rotations=[0, 1.57], 59 | reshape_out=True), 60 | assigner_per_size=False, 61 | diff_rad_by_sin=True, 62 | dir_offset=0.7854, # pi/4 63 | dir_limit_offset=0, 64 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9), 65 | loss_cls=dict( 66 | type='FocalLoss', 67 | use_sigmoid=True, 68 | gamma=2.0, 69 | alpha=0.25, 70 | loss_weight=1.0), 71 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 72 | loss_dir=dict( 73 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 74 | # model training and testing settings 75 | train_cfg=dict( 76 | pts=dict( 77 | assigner=dict( 78 | type='MaxIoUAssigner', 79 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 80 | pos_iou_thr=0.6, 81 | neg_iou_thr=0.3, 82 | min_pos_iou=0.3, 83 | ignore_iof_thr=-1), 84 | allowed_border=0, 85 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], 86 | pos_weight=-1, 87 | debug=False)), 88 | test_cfg=dict( 89 | pts=dict( 90 | use_rotate_nms=True, 91 | nms_across_levels=False, 92 | nms_pre=1000, 93 | nms_thr=0.2, 94 | score_thr=0.05, 95 | min_bbox_size=0, 96 | max_num=500))) 97 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/nus-mono3d.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CustomNuScenesMonoDataset' 2 | data_root = 'data/nuscenes/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | # Input modality for nuScenes dataset, this is consistent with the submission 8 | # format which requires the information in input_modality. 9 | input_modality = dict( 10 | use_lidar=False, 11 | use_camera=True, 12 | use_radar=False, 13 | use_map=False, 14 | use_external=False) 15 | img_norm_cfg = dict( 16 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 17 | train_pipeline = [ 18 | dict(type='LoadImageFromFileMono3D'), 19 | dict( 20 | type='LoadAnnotations3D', 21 | with_bbox=True, 22 | with_label=True, 23 | with_attr_label=True, 24 | with_bbox_3d=True, 25 | with_label_3d=True, 26 | with_bbox_depth=True), 27 | dict(type='Resize', img_scale=(1600, 900), keep_ratio=True), 28 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='DefaultFormatBundle3D', class_names=class_names), 32 | dict( 33 | type='Collect3D', 34 | keys=[ 35 | 'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d', 36 | 'gt_labels_3d', 'centers2d', 'depths' 37 | ]), 38 | ] 39 | test_pipeline = [ 40 | dict(type='LoadImageFromFileMono3D'), 41 | dict( 42 | type='MultiScaleFlipAug', 43 | scale_factor=1.0, 44 | flip=False, 45 | transforms=[ 46 | dict(type='RandomFlip3D'), 47 | dict(type='Normalize', **img_norm_cfg), 48 | dict(type='Pad', size_divisor=32), 49 | dict( 50 | type='DefaultFormatBundle3D', 51 | class_names=class_names, 52 | with_label=False), 53 | dict(type='Collect3D', keys=['img']), 54 | ]) 55 | ] 56 | # construct a pipeline for data and gt loading in show function 57 | # please keep its loading function consistent with test_pipeline (e.g. client) 58 | eval_pipeline = [ 59 | dict(type='LoadImageFromFileMono3D'), 60 | dict( 61 | type='DefaultFormatBundle3D', 62 | class_names=class_names, 63 | with_label=False), 64 | dict(type='Collect3D', keys=['img']) 65 | ] 66 | 67 | data = dict( 68 | samples_per_gpu=2, 69 | workers_per_gpu=2, 70 | train=dict( 71 | type=dataset_type, 72 | data_root=data_root, 73 | ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json', 74 | img_prefix=data_root, 75 | classes=class_names, 76 | pipeline=train_pipeline, 77 | modality=input_modality, 78 | test_mode=False, 79 | box_type_3d='Camera'), 80 | val=dict( 81 | type=dataset_type, 82 | data_root=data_root, 83 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 84 | img_prefix=data_root, 85 | classes=class_names, 86 | pipeline=test_pipeline, 87 | modality=input_modality, 88 | test_mode=True, 89 | box_type_3d='Camera'), 90 | test=dict( 91 | type=dataset_type, 92 | data_root=data_root, 93 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 94 | img_prefix=data_root, 95 | classes=class_names, 96 | pipeline=test_pipeline, 97 | modality=input_modality, 98 | test_mode=True, 99 | box_type_3d='Camera')) 100 | evaluation = dict(interval=2) 101 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/sunrgbd-3d-10class.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'SUNRGBDDataset' 2 | data_root = 'data/sunrgbd/' 3 | class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 4 | 'night_stand', 'bookshelf', 'bathtub') 5 | train_pipeline = [ 6 | dict( 7 | type='LoadPointsFromFile', 8 | coord_type='DEPTH', 9 | shift_height=True, 10 | load_dim=6, 11 | use_dim=[0, 1, 2]), 12 | dict(type='LoadAnnotations3D'), 13 | dict( 14 | type='RandomFlip3D', 15 | sync_2d=False, 16 | flip_ratio_bev_horizontal=0.5, 17 | ), 18 | dict( 19 | type='GlobalRotScaleTrans', 20 | rot_range=[-0.523599, 0.523599], 21 | scale_ratio_range=[0.85, 1.15], 22 | shift_height=True), 23 | dict(type='PointSample', num_points=20000), 24 | dict(type='DefaultFormatBundle3D', class_names=class_names), 25 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 26 | ] 27 | test_pipeline = [ 28 | dict( 29 | type='LoadPointsFromFile', 30 | coord_type='DEPTH', 31 | shift_height=True, 32 | load_dim=6, 33 | use_dim=[0, 1, 2]), 34 | dict( 35 | type='MultiScaleFlipAug3D', 36 | img_scale=(1333, 800), 37 | pts_scale_ratio=1, 38 | flip=False, 39 | transforms=[ 40 | dict( 41 | type='GlobalRotScaleTrans', 42 | rot_range=[0, 0], 43 | scale_ratio_range=[1., 1.], 44 | translation_std=[0, 0, 0]), 45 | dict( 46 | type='RandomFlip3D', 47 | sync_2d=False, 48 | flip_ratio_bev_horizontal=0.5, 49 | ), 50 | dict(type='PointSample', num_points=20000), 51 | dict( 52 | type='DefaultFormatBundle3D', 53 | class_names=class_names, 54 | with_label=False), 55 | dict(type='Collect3D', keys=['points']) 56 | ]) 57 | ] 58 | # construct a pipeline for data and gt loading in show function 59 | # please keep its loading function consistent with test_pipeline (e.g. client) 60 | eval_pipeline = [ 61 | dict( 62 | type='LoadPointsFromFile', 63 | coord_type='DEPTH', 64 | shift_height=False, 65 | load_dim=6, 66 | use_dim=[0, 1, 2]), 67 | dict( 68 | type='DefaultFormatBundle3D', 69 | class_names=class_names, 70 | with_label=False), 71 | dict(type='Collect3D', keys=['points']) 72 | ] 73 | 74 | data = dict( 75 | samples_per_gpu=16, 76 | workers_per_gpu=4, 77 | train=dict( 78 | type='RepeatDataset', 79 | times=5, 80 | dataset=dict( 81 | type=dataset_type, 82 | data_root=data_root, 83 | ann_file=data_root + 'sunrgbd_infos_train.pkl', 84 | pipeline=train_pipeline, 85 | classes=class_names, 86 | filter_empty_gt=False, 87 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 88 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 89 | box_type_3d='Depth')), 90 | val=dict( 91 | type=dataset_type, 92 | data_root=data_root, 93 | ann_file=data_root + 'sunrgbd_infos_val.pkl', 94 | pipeline=test_pipeline, 95 | classes=class_names, 96 | test_mode=True, 97 | box_type_3d='Depth'), 98 | test=dict( 99 | type=dataset_type, 100 | data_root=data_root, 101 | ann_file=data_root + 'sunrgbd_infos_val.pkl', 102 | pipeline=test_pipeline, 103 | classes=class_names, 104 | test_mode=True, 105 | box_type_3d='Depth')) 106 | 107 | evaluation = dict(pipeline=eval_pipeline) 108 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/evaluation/eval_hooks.py: -------------------------------------------------------------------------------- 1 | 2 | # Note: Considering that MMCV's EvalHook updated its interface in V1.3.16, 3 | # in order to avoid strong version dependency, we did not directly 4 | # inherit EvalHook but BaseDistEvalHook. 5 | 6 | import bisect 7 | import os.path as osp 8 | 9 | import mmcv 10 | import torch.distributed as dist 11 | from mmcv.runner import DistEvalHook as BaseDistEvalHook 12 | from mmcv.runner import EvalHook as BaseEvalHook 13 | from torch.nn.modules.batchnorm import _BatchNorm 14 | from mmdet.core.evaluation.eval_hooks import DistEvalHook 15 | 16 | 17 | def _calc_dynamic_intervals(start_interval, dynamic_interval_list): 18 | assert mmcv.is_list_of(dynamic_interval_list, tuple) 19 | 20 | dynamic_milestones = [0] 21 | dynamic_milestones.extend( 22 | [dynamic_interval[0] for dynamic_interval in dynamic_interval_list]) 23 | dynamic_intervals = [start_interval] 24 | dynamic_intervals.extend( 25 | [dynamic_interval[1] for dynamic_interval in dynamic_interval_list]) 26 | return dynamic_milestones, dynamic_intervals 27 | 28 | 29 | class CustomDistEvalHook(BaseDistEvalHook): 30 | 31 | def __init__(self, *args, dynamic_intervals=None, **kwargs): 32 | super(CustomDistEvalHook, self).__init__(*args, **kwargs) 33 | self.use_dynamic_intervals = dynamic_intervals is not None 34 | if self.use_dynamic_intervals: 35 | self.dynamic_milestones, self.dynamic_intervals = \ 36 | _calc_dynamic_intervals(self.interval, dynamic_intervals) 37 | 38 | def _decide_interval(self, runner): 39 | if self.use_dynamic_intervals: 40 | progress = runner.epoch if self.by_epoch else runner.iter 41 | step = bisect.bisect(self.dynamic_milestones, (progress + 1)) 42 | # Dynamically modify the evaluation interval 43 | self.interval = self.dynamic_intervals[step - 1] 44 | 45 | def before_train_epoch(self, runner): 46 | """Evaluate the model only at the start of training by epoch.""" 47 | self._decide_interval(runner) 48 | super().before_train_epoch(runner) 49 | 50 | def before_train_iter(self, runner): 51 | self._decide_interval(runner) 52 | super().before_train_iter(runner) 53 | 54 | def _do_evaluate(self, runner): 55 | """perform evaluation and save ckpt.""" 56 | # Synchronization of BatchNorm's buffer (running_mean 57 | # and running_var) is not supported in the DDP of pytorch, 58 | # which may cause the inconsistent performance of models in 59 | # different ranks, so we broadcast BatchNorm's buffers 60 | # of rank 0 to other ranks to avoid this. 61 | if self.broadcast_bn_buffer: 62 | model = runner.model 63 | for name, module in model.named_modules(): 64 | if isinstance(module, 65 | _BatchNorm) and module.track_running_stats: 66 | dist.broadcast(module.running_var, 0) 67 | dist.broadcast(module.running_mean, 0) 68 | 69 | if not self._should_evaluate(runner): 70 | return 71 | 72 | tmpdir = self.tmpdir 73 | if tmpdir is None: 74 | tmpdir = osp.join(runner.work_dir, '.eval_hook') 75 | 76 | # from projects.mmdet3d_plugin.bevformer.apis.test import custom_multi_gpu_test # to solve circlur import 77 | from projects.mmdet3d_plugin.VAD.apis.test import custom_multi_gpu_test # to solve circlur import 78 | 79 | results = custom_multi_gpu_test( 80 | runner.model, 81 | self.dataloader, 82 | tmpdir=tmpdir, 83 | gpu_collect=self.gpu_collect) 84 | if runner.rank == 0: 85 | print('\n') 86 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) 87 | 88 | key_score = self.evaluate(runner, results) 89 | 90 | if self.save_best: 91 | self._save_ckpt(runner, key_score) 92 | 93 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/VAD/runner/epoch_based_runner.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import torch 3 | import mmcv 4 | from mmcv.runner.base_runner import BaseRunner 5 | from mmcv.runner.epoch_based_runner import EpochBasedRunner 6 | from mmcv.runner.builder import RUNNERS 7 | from mmcv.runner.checkpoint import save_checkpoint 8 | from mmcv.runner.utils import get_host_info 9 | from pprint import pprint 10 | from mmcv.parallel.data_container import DataContainer 11 | 12 | 13 | @RUNNERS.register_module() 14 | class EpochBasedRunner_video(EpochBasedRunner): 15 | 16 | ''' 17 | # basic logic 18 | 19 | input_sequence = [a, b, c] # given a sequence of samples 20 | 21 | prev_bev = None 22 | for each in input_sequcene[:-1] 23 | prev_bev = eval_model(each, prev_bev)) # inference only. 24 | 25 | model(input_sequcene[-1], prev_bev) # train the last sample. 26 | ''' 27 | 28 | def __init__(self, 29 | model, 30 | eval_model=None, 31 | batch_processor=None, 32 | optimizer=None, 33 | work_dir=None, 34 | logger=None, 35 | meta=None, 36 | keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'], 37 | max_iters=None, 38 | max_epochs=None): 39 | super().__init__(model, 40 | batch_processor, 41 | optimizer, 42 | work_dir, 43 | logger, 44 | meta, 45 | max_iters, 46 | max_epochs) 47 | keys.append('img_metas') 48 | self.keys = keys 49 | self.eval_model = eval_model 50 | self.eval_model.eval() 51 | 52 | def run_iter(self, data_batch, train_mode, **kwargs): 53 | if self.batch_processor is not None: 54 | assert False 55 | # outputs = self.batch_processor( 56 | # self.model, data_batch, train_mode=train_mode, **kwargs) 57 | elif train_mode: 58 | 59 | num_samples = data_batch['img'].data[0].size(1) 60 | data_list = [] 61 | prev_bev = None 62 | for i in range(num_samples): 63 | data = {} 64 | for key in self.keys: 65 | if key not in ['img_metas', 'img', 'points']: 66 | data[key] = data_batch[key] 67 | else: 68 | if key == 'img': 69 | data['img'] = DataContainer(data=[data_batch['img'].data[0][:, i]], cpu_only=data_batch['img'].cpu_only, stack=True) 70 | elif key == 'img_metas': 71 | data['img_metas'] = DataContainer(data=[[each[i] for each in data_batch['img_metas'].data[0]]], cpu_only=data_batch['img_metas'].cpu_only) 72 | else: 73 | assert False 74 | data_list.append(data) 75 | with torch.no_grad(): 76 | for i in range(num_samples-1): 77 | if i>0: data_list[i]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False) 78 | prev_bev = self.eval_model.val_step(data_list[i], self.optimizer, **kwargs) 79 | 80 | data_list[-1]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False) 81 | outputs = self.model.train_step(data_list[-1], self.optimizer, **kwargs) 82 | else: 83 | assert False 84 | # outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) 85 | 86 | if not isinstance(outputs, dict): 87 | raise TypeError('"batch_processor()" or "model.train_step()"' 88 | 'and "model.val_step()" must return a dict') 89 | if 'log_vars' in outputs: 90 | self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) 91 | self.outputs = outputs -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_second_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.08, 0.08, 0.1] 7 | model = dict( 8 | type='VoxelNet', 9 | voxel_layer=dict( 10 | max_num_points=10, 11 | point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(80000, 90000)), 14 | voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 15 | middle_encoder=dict( 16 | type='SparseEncoder', 17 | in_channels=5, 18 | sparse_shape=[61, 1280, 1920], 19 | order=('conv', 'norm', 'act')), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=384, 23 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 24 | layer_nums=[5, 5], 25 | layer_strides=[1, 2], 26 | out_channels=[128, 256]), 27 | neck=dict( 28 | type='SECONDFPN', 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | in_channels=[128, 256], 31 | upsample_strides=[1, 2], 32 | out_channels=[256, 256]), 33 | bbox_head=dict( 34 | type='Anchor3DHead', 35 | num_classes=3, 36 | in_channels=512, 37 | feat_channels=512, 38 | use_direction_classifier=True, 39 | anchor_generator=dict( 40 | type='AlignedAnchor3DRangeGenerator', 41 | ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345], 42 | [-76.8, -51.2, 0, 76.8, 51.2, 0], 43 | [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]], 44 | sizes=[ 45 | [2.08, 4.73, 1.77], # car 46 | [0.84, 0.91, 1.74], # pedestrian 47 | [0.84, 1.81, 1.77] # cyclist 48 | ], 49 | rotations=[0, 1.57], 50 | reshape_out=False), 51 | diff_rad_by_sin=True, 52 | dir_offset=0.7854, # pi/4 53 | dir_limit_offset=0, 54 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 55 | loss_cls=dict( 56 | type='FocalLoss', 57 | use_sigmoid=True, 58 | gamma=2.0, 59 | alpha=0.25, 60 | loss_weight=1.0), 61 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 62 | loss_dir=dict( 63 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 64 | # model training and testing settings 65 | train_cfg=dict( 66 | assigner=[ 67 | dict( # car 68 | type='MaxIoUAssigner', 69 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 70 | pos_iou_thr=0.55, 71 | neg_iou_thr=0.4, 72 | min_pos_iou=0.4, 73 | ignore_iof_thr=-1), 74 | dict( # pedestrian 75 | type='MaxIoUAssigner', 76 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 77 | pos_iou_thr=0.5, 78 | neg_iou_thr=0.3, 79 | min_pos_iou=0.3, 80 | ignore_iof_thr=-1), 81 | dict( # cyclist 82 | type='MaxIoUAssigner', 83 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.3, 86 | min_pos_iou=0.3, 87 | ignore_iof_thr=-1) 88 | ], 89 | allowed_border=0, 90 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 91 | pos_weight=-1, 92 | debug=False), 93 | test_cfg=dict( 94 | use_rotate_nms=True, 95 | nms_across_levels=False, 96 | nms_pre=4096, 97 | nms_thr=0.25, 98 | score_thr=0.1, 99 | min_bbox_size=0, 100 | max_num=500)) 101 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/detectors/bevformer_fp16.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------- 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | # --------------------------------------------- 4 | # Modified by Zhiqi Li 5 | # --------------------------------------------- 6 | 7 | from tkinter.messagebox import NO 8 | import torch 9 | from mmcv.runner import force_fp32, auto_fp16 10 | from mmdet.models import DETECTORS 11 | from mmdet3d.core import bbox3d2result 12 | from mmdet3d.models.detectors.mvx_two_stage import MVXTwoStageDetector 13 | from projects.mmdet3d_plugin.models.utils.grid_mask import GridMask 14 | from projects.mmdet3d_plugin.bevformer.detectors.bevformer import BEVFormer 15 | import time 16 | import copy 17 | import numpy as np 18 | import mmdet3d 19 | from projects.mmdet3d_plugin.models.utils.bricks import run_time 20 | 21 | 22 | @DETECTORS.register_module() 23 | class BEVFormer_fp16(BEVFormer): 24 | """ 25 | The default version BEVFormer currently can not support FP16. 26 | We provide this version to resolve this issue. 27 | """ 28 | 29 | @auto_fp16(apply_to=('img', 'prev_bev', 'points')) 30 | def forward_train(self, 31 | points=None, 32 | img_metas=None, 33 | gt_bboxes_3d=None, 34 | gt_labels_3d=None, 35 | gt_labels=None, 36 | gt_bboxes=None, 37 | img=None, 38 | proposals=None, 39 | gt_bboxes_ignore=None, 40 | img_depth=None, 41 | img_mask=None, 42 | prev_bev=None, 43 | ): 44 | """Forward training function. 45 | Args: 46 | points (list[torch.Tensor], optional): Points of each sample. 47 | Defaults to None. 48 | img_metas (list[dict], optional): Meta information of each sample. 49 | Defaults to None. 50 | gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional): 51 | Ground truth 3D boxes. Defaults to None. 52 | gt_labels_3d (list[torch.Tensor], optional): Ground truth labels 53 | of 3D boxes. Defaults to None. 54 | gt_labels (list[torch.Tensor], optional): Ground truth labels 55 | of 2D boxes in images. Defaults to None. 56 | gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in 57 | images. Defaults to None. 58 | img (torch.Tensor optional): Images of each sample with shape 59 | (N, C, H, W). Defaults to None. 60 | proposals ([list[torch.Tensor], optional): Predicted proposals 61 | used for training Fast RCNN. Defaults to None. 62 | gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth 63 | 2D boxes in images to be ignored. Defaults to None. 64 | Returns: 65 | dict: Losses of different branches. 66 | """ 67 | 68 | img_feats = self.extract_feat(img=img, img_metas=img_metas) 69 | 70 | losses = dict() 71 | losses_pts = self.forward_pts_train(img_feats, gt_bboxes_3d, 72 | gt_labels_3d, img_metas, 73 | gt_bboxes_ignore, prev_bev=prev_bev) 74 | losses.update(losses_pts) 75 | return losses 76 | 77 | 78 | def val_step(self, data, optimizer): 79 | """ 80 | In BEVFormer_fp16, we use this `val_step` function to inference the `prev_pev`. 81 | This is not the standard function of `val_step`. 82 | """ 83 | 84 | img = data['img'] 85 | img_metas = data['img_metas'] 86 | img_feats = self.extract_feat(img=img, img_metas=img_metas) 87 | prev_bev = data.get('prev_bev', None) 88 | prev_bev = self.pts_bbox_head(img_feats, img_metas, prev_bev=prev_bev, only_bev=True) 89 | return prev_bev -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/embed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn.functional as F 3 | from mmcv.cnn import build_conv_layer, build_norm_layer 4 | from mmcv.runner.base_module import BaseModule 5 | from torch.nn.modules.utils import _pair as to_2tuple 6 | 7 | 8 | # Modified from Pytorch-Image-Models 9 | class PatchEmbed(BaseModule): 10 | """Image to Patch Embedding V2. 11 | 12 | We use a conv layer to implement PatchEmbed. 13 | Args: 14 | in_channels (int): The num of input channels. Default: 3 15 | embed_dims (int): The dimensions of embedding. Default: 768 16 | conv_type (dict, optional): The config dict for conv layers type 17 | selection. Default: None. 18 | kernel_size (int): The kernel_size of embedding conv. Default: 16. 19 | stride (int): The slide stride of embedding conv. 20 | Default: None (Default to be equal with kernel_size). 21 | padding (int): The padding length of embedding conv. Default: 0. 22 | dilation (int): The dilation rate of embedding conv. Default: 1. 23 | pad_to_patch_size (bool, optional): Whether to pad feature map shape 24 | to multiple patch size. Default: True. 25 | norm_cfg (dict, optional): Config dict for normalization layer. 26 | init_cfg (`mmcv.ConfigDict`, optional): The Config for initialization. 27 | Default: None. 28 | """ 29 | 30 | def __init__(self, 31 | in_channels=3, 32 | embed_dims=768, 33 | conv_type=None, 34 | kernel_size=16, 35 | stride=16, 36 | padding=0, 37 | dilation=1, 38 | pad_to_patch_size=True, 39 | norm_cfg=None, 40 | init_cfg=None): 41 | super(PatchEmbed, self).__init__() 42 | 43 | self.embed_dims = embed_dims 44 | self.init_cfg = init_cfg 45 | 46 | if stride is None: 47 | stride = kernel_size 48 | 49 | self.pad_to_patch_size = pad_to_patch_size 50 | 51 | # The default setting of patch size is equal to kernel size. 52 | patch_size = kernel_size 53 | if isinstance(patch_size, int): 54 | patch_size = to_2tuple(patch_size) 55 | elif isinstance(patch_size, tuple): 56 | if len(patch_size) == 1: 57 | patch_size = to_2tuple(patch_size[0]) 58 | assert len(patch_size) == 2, \ 59 | f'The size of patch should have length 1 or 2, ' \ 60 | f'but got {len(patch_size)}' 61 | 62 | self.patch_size = patch_size 63 | 64 | # Use conv layer to embed 65 | conv_type = conv_type or 'Conv2d' 66 | self.projection = build_conv_layer( 67 | dict(type=conv_type), 68 | in_channels=in_channels, 69 | out_channels=embed_dims, 70 | kernel_size=kernel_size, 71 | stride=stride, 72 | padding=padding, 73 | dilation=dilation) 74 | 75 | if norm_cfg is not None: 76 | self.norm = build_norm_layer(norm_cfg, embed_dims)[1] 77 | else: 78 | self.norm = None 79 | 80 | def forward(self, x): 81 | H, W = x.shape[2], x.shape[3] 82 | 83 | # TODO: Process overlapping op 84 | if self.pad_to_patch_size: 85 | # Modify H, W to multiple of patch size. 86 | if H % self.patch_size[0] != 0: 87 | x = F.pad( 88 | x, (0, 0, 0, self.patch_size[0] - H % self.patch_size[0])) 89 | if W % self.patch_size[1] != 0: 90 | x = F.pad( 91 | x, (0, self.patch_size[1] - W % self.patch_size[1], 0, 0)) 92 | 93 | x = self.projection(x) 94 | self.DH, self.DW = x.shape[2], x.shape[3] 95 | x = x.flatten(2).transpose(1, 2) 96 | 97 | if self.norm is not None: 98 | x = self.norm(x) 99 | 100 | return x -------------------------------------------------------------------------------- /projects/configs/_base_/models/imvotenet_image.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='ImVoteNet', 3 | img_backbone=dict( 4 | type='ResNet', 5 | depth=50, 6 | num_stages=4, 7 | out_indices=(0, 1, 2, 3), 8 | frozen_stages=1, 9 | norm_cfg=dict(type='BN', requires_grad=False), 10 | norm_eval=True, 11 | style='caffe'), 12 | img_neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | img_rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[8], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[4, 8, 16, 32, 64]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | img_roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=256, 39 | featmap_strides=[4, 8, 16, 32]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=256, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=10, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 54 | 55 | # model training and testing settings 56 | train_cfg=dict( 57 | img_rpn=dict( 58 | assigner=dict( 59 | type='MaxIoUAssigner', 60 | pos_iou_thr=0.7, 61 | neg_iou_thr=0.3, 62 | min_pos_iou=0.3, 63 | match_low_quality=True, 64 | ignore_iof_thr=-1), 65 | sampler=dict( 66 | type='RandomSampler', 67 | num=256, 68 | pos_fraction=0.5, 69 | neg_pos_ub=-1, 70 | add_gt_as_proposals=False), 71 | allowed_border=-1, 72 | pos_weight=-1, 73 | debug=False), 74 | img_rpn_proposal=dict( 75 | nms_across_levels=False, 76 | nms_pre=2000, 77 | nms_post=1000, 78 | max_per_img=1000, 79 | nms=dict(type='nms', iou_threshold=0.7), 80 | min_bbox_size=0), 81 | img_rcnn=dict( 82 | assigner=dict( 83 | type='MaxIoUAssigner', 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.5, 86 | min_pos_iou=0.5, 87 | match_low_quality=False, 88 | ignore_iof_thr=-1), 89 | sampler=dict( 90 | type='RandomSampler', 91 | num=512, 92 | pos_fraction=0.25, 93 | neg_pos_ub=-1, 94 | add_gt_as_proposals=True), 95 | pos_weight=-1, 96 | debug=False)), 97 | test_cfg=dict( 98 | img_rpn=dict( 99 | nms_across_levels=False, 100 | nms_pre=1000, 101 | nms_post=1000, 102 | max_per_img=1000, 103 | nms=dict(type='nms', iou_threshold=0.7), 104 | min_bbox_size=0), 105 | img_rcnn=dict( 106 | score_thr=0.05, 107 | nms=dict(type='nms', iou_threshold=0.5), 108 | max_per_img=100))) 109 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/s3dis-3d-5class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'S3DISDataset' 3 | data_root = './data/s3dis/' 4 | class_names = ('table', 'chair', 'sofa', 'bookcase', 'board') 5 | train_area = [1, 2, 3, 4, 6] 6 | test_area = 5 7 | 8 | train_pipeline = [ 9 | dict( 10 | type='LoadPointsFromFile', 11 | coord_type='DEPTH', 12 | shift_height=True, 13 | load_dim=6, 14 | use_dim=[0, 1, 2, 3, 4, 5]), 15 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 16 | dict(type='PointSample', num_points=40000), 17 | dict( 18 | type='RandomFlip3D', 19 | sync_2d=False, 20 | flip_ratio_bev_horizontal=0.5, 21 | flip_ratio_bev_vertical=0.5), 22 | dict( 23 | type='GlobalRotScaleTrans', 24 | # following ScanNet dataset the rotation range is 5 degrees 25 | rot_range=[-0.087266, 0.087266], 26 | scale_ratio_range=[1.0, 1.0], 27 | shift_height=True), 28 | dict(type='DefaultFormatBundle3D', class_names=class_names), 29 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 30 | ] 31 | test_pipeline = [ 32 | dict( 33 | type='LoadPointsFromFile', 34 | coord_type='DEPTH', 35 | shift_height=True, 36 | load_dim=6, 37 | use_dim=[0, 1, 2, 3, 4, 5]), 38 | dict( 39 | type='MultiScaleFlipAug3D', 40 | img_scale=(1333, 800), 41 | pts_scale_ratio=1, 42 | flip=False, 43 | transforms=[ 44 | dict( 45 | type='GlobalRotScaleTrans', 46 | rot_range=[0, 0], 47 | scale_ratio_range=[1., 1.], 48 | translation_std=[0, 0, 0]), 49 | dict( 50 | type='RandomFlip3D', 51 | sync_2d=False, 52 | flip_ratio_bev_horizontal=0.5, 53 | flip_ratio_bev_vertical=0.5), 54 | dict(type='PointSample', num_points=40000), 55 | dict( 56 | type='DefaultFormatBundle3D', 57 | class_names=class_names, 58 | with_label=False), 59 | dict(type='Collect3D', keys=['points']) 60 | ]) 61 | ] 62 | # construct a pipeline for data and gt loading in show function 63 | # please keep its loading function consistent with test_pipeline (e.g. client) 64 | eval_pipeline = [ 65 | dict( 66 | type='LoadPointsFromFile', 67 | coord_type='DEPTH', 68 | shift_height=False, 69 | load_dim=6, 70 | use_dim=[0, 1, 2, 3, 4, 5]), 71 | dict( 72 | type='DefaultFormatBundle3D', 73 | class_names=class_names, 74 | with_label=False), 75 | dict(type='Collect3D', keys=['points']) 76 | ] 77 | 78 | data = dict( 79 | samples_per_gpu=8, 80 | workers_per_gpu=4, 81 | train=dict( 82 | type='RepeatDataset', 83 | times=5, 84 | dataset=dict( 85 | type='ConcatDataset', 86 | datasets=[ 87 | dict( 88 | type=dataset_type, 89 | data_root=data_root, 90 | ann_file=data_root + f's3dis_infos_Area_{i}.pkl', 91 | pipeline=train_pipeline, 92 | filter_empty_gt=False, 93 | classes=class_names, 94 | box_type_3d='Depth') for i in train_area 95 | ], 96 | separate_eval=False)), 97 | val=dict( 98 | type=dataset_type, 99 | data_root=data_root, 100 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 101 | pipeline=test_pipeline, 102 | classes=class_names, 103 | test_mode=True, 104 | box_type_3d='Depth'), 105 | test=dict( 106 | type=dataset_type, 107 | data_root=data_root, 108 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 109 | pipeline=test_pipeline, 110 | classes=class_names, 111 | test_mode=True, 112 | box_type_3d='Depth')) 113 | 114 | evaluation = dict(pipeline=eval_pipeline) 115 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # --------------------------------------------- 3 | # Modified by Zhiqi Li 4 | # --------------------------------------------- 5 | 6 | import os.path as osp 7 | import torch 8 | import mmcv 9 | from mmcv.runner.base_runner import BaseRunner 10 | from mmcv.runner.epoch_based_runner import EpochBasedRunner 11 | from mmcv.runner.builder import RUNNERS 12 | from mmcv.runner.checkpoint import save_checkpoint 13 | from mmcv.runner.utils import get_host_info 14 | from pprint import pprint 15 | from mmcv.parallel.data_container import DataContainer 16 | 17 | 18 | @RUNNERS.register_module() 19 | class EpochBasedRunner_video(EpochBasedRunner): 20 | 21 | ''' 22 | # basic logic 23 | 24 | input_sequence = [a, b, c] # given a sequence of samples 25 | 26 | prev_bev = None 27 | for each in input_sequcene[:-1] 28 | prev_bev = eval_model(each, prev_bev)) # inference only. 29 | 30 | model(input_sequcene[-1], prev_bev) # train the last sample. 31 | ''' 32 | 33 | def __init__(self, 34 | model, 35 | eval_model=None, 36 | batch_processor=None, 37 | optimizer=None, 38 | work_dir=None, 39 | logger=None, 40 | meta=None, 41 | keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'], 42 | max_iters=None, 43 | max_epochs=None): 44 | super().__init__(model, 45 | batch_processor, 46 | optimizer, 47 | work_dir, 48 | logger, 49 | meta, 50 | max_iters, 51 | max_epochs) 52 | keys.append('img_metas') 53 | self.keys = keys 54 | self.eval_model = eval_model 55 | self.eval_model.eval() 56 | 57 | def run_iter(self, data_batch, train_mode, **kwargs): 58 | if self.batch_processor is not None: 59 | assert False 60 | # outputs = self.batch_processor( 61 | # self.model, data_batch, train_mode=train_mode, **kwargs) 62 | elif train_mode: 63 | 64 | num_samples = data_batch['img'].data[0].size(1) 65 | data_list = [] 66 | prev_bev = None 67 | for i in range(num_samples): 68 | data = {} 69 | for key in self.keys: 70 | if key not in ['img_metas', 'img', 'points']: 71 | data[key] = data_batch[key] 72 | else: 73 | if key == 'img': 74 | data['img'] = DataContainer(data=[data_batch['img'].data[0][:, i]], cpu_only=data_batch['img'].cpu_only, stack=True) 75 | elif key == 'img_metas': 76 | data['img_metas'] = DataContainer(data=[[each[i] for each in data_batch['img_metas'].data[0]]], cpu_only=data_batch['img_metas'].cpu_only) 77 | else: 78 | assert False 79 | data_list.append(data) 80 | with torch.no_grad(): 81 | for i in range(num_samples-1): 82 | if i>0: data_list[i]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False) 83 | prev_bev = self.eval_model.val_step(data_list[i], self.optimizer, **kwargs) 84 | 85 | data_list[-1]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False) 86 | outputs = self.model.train_step(data_list[-1], self.optimizer, **kwargs) 87 | else: 88 | assert False 89 | # outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) 90 | 91 | if not isinstance(outputs, dict): 92 | raise TypeError('"batch_processor()" or "model.train_step()"' 93 | 'and "model.val_step()" must return a dict') 94 | if 'log_vars' in outputs: 95 | self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) 96 | self.outputs = outputs -------------------------------------------------------------------------------- /projects/configs/datasets/custom_waymo-3d.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | # D5 in the config name means the whole dataset is divided into 5 folds 3 | # We only use one fold for efficient experiments 4 | dataset_type = 'CustomWaymoDataset' 5 | data_root = 'data/waymo/kitti_format/' 6 | file_client_args = dict(backend='disk') 7 | # Uncomment the following if use ceph or other file clients. 8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 9 | # for more details. 10 | # file_client_args = dict( 11 | # backend='petrel', path_mapping=dict(data='s3://waymo_data/')) 12 | 13 | img_norm_cfg = dict( 14 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 15 | class_names = ['Car', 'Pedestrian', 'Cyclist'] 16 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4] 17 | input_modality = dict(use_lidar=False, use_camera=True) 18 | db_sampler = dict( 19 | data_root=data_root, 20 | info_path=data_root + 'waymo_dbinfos_train.pkl', 21 | rate=1.0, 22 | prepare=dict( 23 | filter_by_difficulty=[-1], 24 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)), 25 | classes=class_names, 26 | sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10), 27 | points_loader=dict( 28 | type='LoadPointsFromFile', 29 | coord_type='LIDAR', 30 | load_dim=5, 31 | use_dim=[0, 1, 2, 3, 4], 32 | file_client_args=file_client_args)) 33 | 34 | 35 | 36 | train_pipeline = [ 37 | dict(type='LoadMultiViewImageFromFiles', to_float32=True), 38 | dict(type='PhotoMetricDistortionMultiViewImage'), 39 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_attr_label=False), 40 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 41 | dict(type='ObjectNameFilter', classes=class_names), 42 | dict(type='NormalizeMultiviewImage', **img_norm_cfg), 43 | dict(type='PadMultiViewImage', size_divisor=32), 44 | dict(type='DefaultFormatBundle3D', class_names=class_names), 45 | dict(type='CustomCollect3D', keys=['gt_bboxes_3d', 'gt_labels_3d', 'img']) 46 | ] 47 | 48 | 49 | test_pipeline = [ 50 | dict(type='LoadMultiViewImageFromFiles', to_float32=True), 51 | dict(type='NormalizeMultiviewImage', **img_norm_cfg), 52 | dict(type='PadMultiViewImage', size_divisor=32), 53 | dict( 54 | type='MultiScaleFlipAug3D', 55 | img_scale=(1920, 1280), 56 | pts_scale_ratio=1, 57 | flip=False, 58 | transforms=[ 59 | dict( 60 | type='DefaultFormatBundle3D', 61 | class_names=class_names, 62 | with_label=False), 63 | dict(type='CustomCollect3D', keys=['img']) 64 | ]) 65 | ] 66 | 67 | 68 | # construct a pipeline for data and gt loading in show function 69 | # please keep its loading function consistent with test_pipeline (e.g. client) 70 | 71 | data = dict( 72 | samples_per_gpu=2, 73 | workers_per_gpu=4, 74 | train=dict( 75 | type='RepeatDataset', 76 | times=2, 77 | dataset=dict( 78 | type=dataset_type, 79 | data_root=data_root, 80 | ann_file=data_root + 'waymo_infos_train.pkl', 81 | split='training', 82 | pipeline=train_pipeline, 83 | modality=input_modality, 84 | classes=class_names, 85 | test_mode=False, 86 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 87 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 88 | box_type_3d='LiDAR', 89 | # load one frame every five frames 90 | load_interval=5)), 91 | val=dict( 92 | type=dataset_type, 93 | data_root=data_root, 94 | ann_file=data_root + 'waymo_infos_val.pkl', 95 | split='training', 96 | pipeline=test_pipeline, 97 | modality=input_modality, 98 | classes=class_names, 99 | test_mode=True, 100 | box_type_3d='LiDAR'), 101 | test=dict( 102 | type=dataset_type, 103 | data_root=data_root, 104 | ann_file=data_root + 'waymo_infos_val.pkl', 105 | split='training', 106 | pipeline=test_pipeline, 107 | modality=input_modality, 108 | classes=class_names, 109 | test_mode=True, 110 | box_type_3d='LiDAR')) 111 | 112 | evaluation = dict(interval=24, pipeline=test_pipeline) -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/group_sampler.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | import math 4 | 5 | import numpy as np 6 | import torch 7 | from mmcv.runner import get_dist_info 8 | from torch.utils.data import Sampler 9 | from .sampler import SAMPLER 10 | import random 11 | from IPython import embed 12 | 13 | 14 | @SAMPLER.register_module() 15 | class DistributedGroupSampler(Sampler): 16 | """Sampler that restricts data loading to a subset of the dataset. 17 | It is especially useful in conjunction with 18 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 19 | process can pass a DistributedSampler instance as a DataLoader sampler, 20 | and load a subset of the original dataset that is exclusive to it. 21 | .. note:: 22 | Dataset is assumed to be of constant size. 23 | Arguments: 24 | dataset: Dataset used for sampling. 25 | num_replicas (optional): Number of processes participating in 26 | distributed training. 27 | rank (optional): Rank of the current process within num_replicas. 28 | seed (int, optional): random seed used to shuffle the sampler if 29 | ``shuffle=True``. This number should be identical across all 30 | processes in the distributed group. Default: 0. 31 | """ 32 | 33 | def __init__(self, 34 | dataset, 35 | samples_per_gpu=1, 36 | num_replicas=None, 37 | rank=None, 38 | seed=0): 39 | _rank, _num_replicas = get_dist_info() 40 | if num_replicas is None: 41 | num_replicas = _num_replicas 42 | if rank is None: 43 | rank = _rank 44 | self.dataset = dataset 45 | self.samples_per_gpu = samples_per_gpu 46 | self.num_replicas = num_replicas 47 | self.rank = rank 48 | self.epoch = 0 49 | self.seed = seed if seed is not None else 0 50 | 51 | assert hasattr(self.dataset, 'flag') 52 | self.flag = self.dataset.flag 53 | self.group_sizes = np.bincount(self.flag) 54 | 55 | self.num_samples = 0 56 | for i, j in enumerate(self.group_sizes): 57 | self.num_samples += int( 58 | math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu / 59 | self.num_replicas)) * self.samples_per_gpu 60 | self.total_size = self.num_samples * self.num_replicas 61 | 62 | def __iter__(self): 63 | # deterministically shuffle based on epoch 64 | g = torch.Generator() 65 | g.manual_seed(self.epoch + self.seed) 66 | 67 | indices = [] 68 | for i, size in enumerate(self.group_sizes): 69 | if size > 0: 70 | indice = np.where(self.flag == i)[0] 71 | assert len(indice) == size 72 | # add .numpy() to avoid bug when selecting indice in parrots. 73 | # TODO: check whether torch.randperm() can be replaced by 74 | # numpy.random.permutation(). 75 | indice = indice[list( 76 | torch.randperm(int(size), generator=g).numpy())].tolist() 77 | extra = int( 78 | math.ceil( 79 | size * 1.0 / self.samples_per_gpu / self.num_replicas) 80 | ) * self.samples_per_gpu * self.num_replicas - len(indice) 81 | # pad indice 82 | tmp = indice.copy() 83 | for _ in range(extra // size): 84 | indice.extend(tmp) 85 | indice.extend(tmp[:extra % size]) 86 | indices.extend(indice) 87 | 88 | assert len(indices) == self.total_size 89 | 90 | indices = [ 91 | indices[j] for i in list( 92 | torch.randperm( 93 | len(indices) // self.samples_per_gpu, generator=g)) 94 | for j in range(i * self.samples_per_gpu, (i + 1) * 95 | self.samples_per_gpu) 96 | ] 97 | 98 | # subsample 99 | offset = self.num_samples * self.rank 100 | indices = indices[offset:offset + self.num_samples] 101 | assert len(indices) == self.num_samples 102 | 103 | return iter(indices) 104 | 105 | def __len__(self): 106 | return self.num_samples 107 | 108 | def set_epoch(self, epoch): 109 | self.epoch = epoch 110 | 111 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.32, 0.32, 6] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(32000, 32000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=5, 17 | feat_channels=[64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[1, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='SECONDFPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | in_channels=[64, 128, 256], 37 | upsample_strides=[1, 2, 4], 38 | out_channels=[128, 128, 128]), 39 | pts_bbox_head=dict( 40 | type='Anchor3DHead', 41 | num_classes=3, 42 | in_channels=384, 43 | feat_channels=384, 44 | use_direction_classifier=True, 45 | anchor_generator=dict( 46 | type='AlignedAnchor3DRangeGenerator', 47 | ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345], 48 | [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188], 49 | [-74.88, -74.88, 0, 74.88, 74.88, 0]], 50 | sizes=[ 51 | [2.08, 4.73, 1.77], # car 52 | [0.84, 1.81, 1.77], # cyclist 53 | [0.84, 0.91, 1.74] # pedestrian 54 | ], 55 | rotations=[0, 1.57], 56 | reshape_out=False), 57 | diff_rad_by_sin=True, 58 | dir_offset=0.7854, # pi/4 59 | dir_limit_offset=0, 60 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 61 | loss_cls=dict( 62 | type='FocalLoss', 63 | use_sigmoid=True, 64 | gamma=2.0, 65 | alpha=0.25, 66 | loss_weight=1.0), 67 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 68 | loss_dir=dict( 69 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 70 | # model training and testing settings 71 | train_cfg=dict( 72 | pts=dict( 73 | assigner=[ 74 | dict( # car 75 | type='MaxIoUAssigner', 76 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 77 | pos_iou_thr=0.55, 78 | neg_iou_thr=0.4, 79 | min_pos_iou=0.4, 80 | ignore_iof_thr=-1), 81 | dict( # cyclist 82 | type='MaxIoUAssigner', 83 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.3, 86 | min_pos_iou=0.3, 87 | ignore_iof_thr=-1), 88 | dict( # pedestrian 89 | type='MaxIoUAssigner', 90 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 91 | pos_iou_thr=0.5, 92 | neg_iou_thr=0.3, 93 | min_pos_iou=0.3, 94 | ignore_iof_thr=-1), 95 | ], 96 | allowed_border=0, 97 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 98 | pos_weight=-1, 99 | debug=False)), 100 | test_cfg=dict( 101 | pts=dict( 102 | use_rotate_nms=True, 103 | nms_across_levels=False, 104 | nms_pre=4096, 105 | nms_thr=0.25, 106 | score_thr=0.1, 107 | min_bbox_size=0, 108 | max_num=500))) 109 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/grid_mask.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from PIL import Image 5 | from mmcv.runner import force_fp32, auto_fp16 6 | 7 | class Grid(object): 8 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 9 | self.use_h = use_h 10 | self.use_w = use_w 11 | self.rotate = rotate 12 | self.offset = offset 13 | self.ratio = ratio 14 | self.mode=mode 15 | self.st_prob = prob 16 | self.prob = prob 17 | 18 | def set_prob(self, epoch, max_epoch): 19 | self.prob = self.st_prob * epoch / max_epoch 20 | 21 | def __call__(self, img, label): 22 | if np.random.rand() > self.prob: 23 | return img, label 24 | h = img.size(1) 25 | w = img.size(2) 26 | self.d1 = 2 27 | self.d2 = min(h, w) 28 | hh = int(1.5*h) 29 | ww = int(1.5*w) 30 | d = np.random.randint(self.d1, self.d2) 31 | if self.ratio == 1: 32 | self.l = np.random.randint(1, d) 33 | else: 34 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 35 | mask = np.ones((hh, ww), np.float32) 36 | st_h = np.random.randint(d) 37 | st_w = np.random.randint(d) 38 | if self.use_h: 39 | for i in range(hh//d): 40 | s = d*i + st_h 41 | t = min(s+self.l, hh) 42 | mask[s:t,:] *= 0 43 | if self.use_w: 44 | for i in range(ww//d): 45 | s = d*i + st_w 46 | t = min(s+self.l, ww) 47 | mask[:,s:t] *= 0 48 | 49 | r = np.random.randint(self.rotate) 50 | mask = Image.fromarray(np.uint8(mask)) 51 | mask = mask.rotate(r) 52 | mask = np.asarray(mask) 53 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 54 | 55 | mask = torch.from_numpy(mask).float() 56 | if self.mode == 1: 57 | mask = 1-mask 58 | 59 | mask = mask.expand_as(img) 60 | if self.offset: 61 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float() 62 | offset = (1 - mask) * offset 63 | img = img * mask + offset 64 | else: 65 | img = img * mask 66 | 67 | return img, label 68 | 69 | 70 | class GridMask(nn.Module): 71 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 72 | super(GridMask, self).__init__() 73 | self.use_h = use_h 74 | self.use_w = use_w 75 | self.rotate = rotate 76 | self.offset = offset 77 | self.ratio = ratio 78 | self.mode = mode 79 | self.st_prob = prob 80 | self.prob = prob 81 | self.fp16_enable = False 82 | def set_prob(self, epoch, max_epoch): 83 | self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5 84 | @auto_fp16() 85 | def forward(self, x): 86 | if np.random.rand() > self.prob or not self.training: 87 | return x 88 | n,c,h,w = x.size() 89 | x = x.view(-1,h,w) 90 | hh = int(1.5*h) 91 | ww = int(1.5*w) 92 | d = np.random.randint(2, h) 93 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 94 | mask = np.ones((hh, ww), np.float32) 95 | st_h = np.random.randint(d) 96 | st_w = np.random.randint(d) 97 | if self.use_h: 98 | for i in range(hh//d): 99 | s = d*i + st_h 100 | t = min(s+self.l, hh) 101 | mask[s:t,:] *= 0 102 | if self.use_w: 103 | for i in range(ww//d): 104 | s = d*i + st_w 105 | t = min(s+self.l, ww) 106 | mask[:,s:t] *= 0 107 | 108 | r = np.random.randint(self.rotate) 109 | mask = Image.fromarray(np.uint8(mask)) 110 | mask = mask.rotate(r) 111 | mask = np.asarray(mask) 112 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 113 | 114 | mask = torch.from_numpy(mask).to(x.dtype).cuda() 115 | if self.mode == 1: 116 | mask = 1-mask 117 | mask = mask.expand_as(x) 118 | if self.offset: 119 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).to(x.dtype).cuda() 120 | x = x * mask + offset * (1 - mask) 121 | else: 122 | x = x * mask 123 | 124 | return x.view(n,c,h,w) -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/scannet-3d-18class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ScanNetDataset' 3 | data_root = './data/scannet/' 4 | class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 5 | 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 6 | 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 7 | 'garbagebin') 8 | train_pipeline = [ 9 | dict( 10 | type='LoadPointsFromFile', 11 | coord_type='DEPTH', 12 | shift_height=True, 13 | load_dim=6, 14 | use_dim=[0, 1, 2]), 15 | dict( 16 | type='LoadAnnotations3D', 17 | with_bbox_3d=True, 18 | with_label_3d=True, 19 | with_mask_3d=True, 20 | with_seg_3d=True), 21 | dict(type='GlobalAlignment', rotation_axis=2), 22 | dict( 23 | type='PointSegClassMapping', 24 | valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 25 | 36, 39), 26 | max_cat_id=40), 27 | dict(type='PointSample', num_points=40000), 28 | dict( 29 | type='RandomFlip3D', 30 | sync_2d=False, 31 | flip_ratio_bev_horizontal=0.5, 32 | flip_ratio_bev_vertical=0.5), 33 | dict( 34 | type='GlobalRotScaleTrans', 35 | rot_range=[-0.087266, 0.087266], 36 | scale_ratio_range=[1.0, 1.0], 37 | shift_height=True), 38 | dict(type='DefaultFormatBundle3D', class_names=class_names), 39 | dict( 40 | type='Collect3D', 41 | keys=[ 42 | 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 43 | 'pts_instance_mask' 44 | ]) 45 | ] 46 | test_pipeline = [ 47 | dict( 48 | type='LoadPointsFromFile', 49 | coord_type='DEPTH', 50 | shift_height=True, 51 | load_dim=6, 52 | use_dim=[0, 1, 2]), 53 | dict(type='GlobalAlignment', rotation_axis=2), 54 | dict( 55 | type='MultiScaleFlipAug3D', 56 | img_scale=(1333, 800), 57 | pts_scale_ratio=1, 58 | flip=False, 59 | transforms=[ 60 | dict( 61 | type='GlobalRotScaleTrans', 62 | rot_range=[0, 0], 63 | scale_ratio_range=[1., 1.], 64 | translation_std=[0, 0, 0]), 65 | dict( 66 | type='RandomFlip3D', 67 | sync_2d=False, 68 | flip_ratio_bev_horizontal=0.5, 69 | flip_ratio_bev_vertical=0.5), 70 | dict(type='PointSample', num_points=40000), 71 | dict( 72 | type='DefaultFormatBundle3D', 73 | class_names=class_names, 74 | with_label=False), 75 | dict(type='Collect3D', keys=['points']) 76 | ]) 77 | ] 78 | # construct a pipeline for data and gt loading in show function 79 | # please keep its loading function consistent with test_pipeline (e.g. client) 80 | eval_pipeline = [ 81 | dict( 82 | type='LoadPointsFromFile', 83 | coord_type='DEPTH', 84 | shift_height=False, 85 | load_dim=6, 86 | use_dim=[0, 1, 2]), 87 | dict(type='GlobalAlignment', rotation_axis=2), 88 | dict( 89 | type='DefaultFormatBundle3D', 90 | class_names=class_names, 91 | with_label=False), 92 | dict(type='Collect3D', keys=['points']) 93 | ] 94 | 95 | data = dict( 96 | samples_per_gpu=8, 97 | workers_per_gpu=4, 98 | train=dict( 99 | type='RepeatDataset', 100 | times=5, 101 | dataset=dict( 102 | type=dataset_type, 103 | data_root=data_root, 104 | ann_file=data_root + 'scannet_infos_train.pkl', 105 | pipeline=train_pipeline, 106 | filter_empty_gt=False, 107 | classes=class_names, 108 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 109 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 110 | box_type_3d='Depth')), 111 | val=dict( 112 | type=dataset_type, 113 | data_root=data_root, 114 | ann_file=data_root + 'scannet_infos_val.pkl', 115 | pipeline=test_pipeline, 116 | classes=class_names, 117 | test_mode=True, 118 | box_type_3d='Depth'), 119 | test=dict( 120 | type=dataset_type, 121 | data_root=data_root, 122 | ann_file=data_root + 'scannet_infos_val.pkl', 123 | pipeline=test_pipeline, 124 | classes=class_names, 125 | test_mode=True, 126 | box_type_3d='Depth')) 127 | 128 | evaluation = dict(pipeline=eval_pipeline) 129 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 56 | mask_roi_extractor=dict( 57 | type='SingleRoIExtractor', 58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 59 | out_channels=256, 60 | featmap_strides=[4, 8, 16, 32]), 61 | mask_head=dict( 62 | type='FCNMaskHead', 63 | num_convs=4, 64 | in_channels=256, 65 | conv_out_channels=256, 66 | num_classes=80, 67 | loss_mask=dict( 68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | rpn=dict( 72 | assigner=dict( 73 | type='MaxIoUAssigner', 74 | pos_iou_thr=0.7, 75 | neg_iou_thr=0.3, 76 | min_pos_iou=0.3, 77 | match_low_quality=True, 78 | ignore_iof_thr=-1), 79 | sampler=dict( 80 | type='RandomSampler', 81 | num=256, 82 | pos_fraction=0.5, 83 | neg_pos_ub=-1, 84 | add_gt_as_proposals=False), 85 | allowed_border=-1, 86 | pos_weight=-1, 87 | debug=False), 88 | rpn_proposal=dict( 89 | nms_across_levels=False, 90 | nms_pre=2000, 91 | nms_post=1000, 92 | max_num=1000, 93 | nms_thr=0.7, 94 | min_bbox_size=0), 95 | rcnn=dict( 96 | assigner=dict( 97 | type='MaxIoUAssigner', 98 | pos_iou_thr=0.5, 99 | neg_iou_thr=0.5, 100 | min_pos_iou=0.5, 101 | match_low_quality=True, 102 | ignore_iof_thr=-1), 103 | sampler=dict( 104 | type='RandomSampler', 105 | num=512, 106 | pos_fraction=0.25, 107 | neg_pos_ub=-1, 108 | add_gt_as_proposals=True), 109 | mask_size=28, 110 | pos_weight=-1, 111 | debug=False)), 112 | test_cfg=dict( 113 | rpn=dict( 114 | nms_across_levels=False, 115 | nms_pre=1000, 116 | nms_post=1000, 117 | max_num=1000, 118 | nms_thr=0.7, 119 | min_bbox_size=0), 120 | rcnn=dict( 121 | score_thr=0.05, 122 | nms=dict(type='nms', iou_threshold=0.5), 123 | max_per_img=100, 124 | mask_thr_binary=0.5))) 125 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/scannet_seg-3d-20class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ScanNetSegDataset' 3 | data_root = './data/scannet/' 4 | class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 5 | 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 6 | 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 7 | 'bathtub', 'otherfurniture') 8 | num_points = 8192 9 | train_pipeline = [ 10 | dict( 11 | type='LoadPointsFromFile', 12 | coord_type='DEPTH', 13 | shift_height=False, 14 | use_color=True, 15 | load_dim=6, 16 | use_dim=[0, 1, 2, 3, 4, 5]), 17 | dict( 18 | type='LoadAnnotations3D', 19 | with_bbox_3d=False, 20 | with_label_3d=False, 21 | with_mask_3d=False, 22 | with_seg_3d=True), 23 | dict( 24 | type='PointSegClassMapping', 25 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 26 | 33, 34, 36, 39), 27 | max_cat_id=40), 28 | dict( 29 | type='IndoorPatchPointSample', 30 | num_points=num_points, 31 | block_size=1.5, 32 | ignore_index=len(class_names), 33 | use_normalized_coord=False, 34 | enlarge_size=0.2, 35 | min_unique_num=None), 36 | dict(type='NormalizePointsColor', color_mean=None), 37 | dict(type='DefaultFormatBundle3D', class_names=class_names), 38 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 39 | ] 40 | test_pipeline = [ 41 | dict( 42 | type='LoadPointsFromFile', 43 | coord_type='DEPTH', 44 | shift_height=False, 45 | use_color=True, 46 | load_dim=6, 47 | use_dim=[0, 1, 2, 3, 4, 5]), 48 | dict(type='NormalizePointsColor', color_mean=None), 49 | dict( 50 | # a wrapper in order to successfully call test function 51 | # actually we don't perform test-time-aug 52 | type='MultiScaleFlipAug3D', 53 | img_scale=(1333, 800), 54 | pts_scale_ratio=1, 55 | flip=False, 56 | transforms=[ 57 | dict( 58 | type='GlobalRotScaleTrans', 59 | rot_range=[0, 0], 60 | scale_ratio_range=[1., 1.], 61 | translation_std=[0, 0, 0]), 62 | dict( 63 | type='RandomFlip3D', 64 | sync_2d=False, 65 | flip_ratio_bev_horizontal=0.0, 66 | flip_ratio_bev_vertical=0.0), 67 | dict( 68 | type='DefaultFormatBundle3D', 69 | class_names=class_names, 70 | with_label=False), 71 | dict(type='Collect3D', keys=['points']) 72 | ]) 73 | ] 74 | # construct a pipeline for data and gt loading in show function 75 | # please keep its loading function consistent with test_pipeline (e.g. client) 76 | # we need to load gt seg_mask! 77 | eval_pipeline = [ 78 | dict( 79 | type='LoadPointsFromFile', 80 | coord_type='DEPTH', 81 | shift_height=False, 82 | use_color=True, 83 | load_dim=6, 84 | use_dim=[0, 1, 2, 3, 4, 5]), 85 | dict( 86 | type='LoadAnnotations3D', 87 | with_bbox_3d=False, 88 | with_label_3d=False, 89 | with_mask_3d=False, 90 | with_seg_3d=True), 91 | dict( 92 | type='PointSegClassMapping', 93 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 94 | 33, 34, 36, 39), 95 | max_cat_id=40), 96 | dict( 97 | type='DefaultFormatBundle3D', 98 | with_label=False, 99 | class_names=class_names), 100 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 101 | ] 102 | 103 | data = dict( 104 | samples_per_gpu=8, 105 | workers_per_gpu=4, 106 | train=dict( 107 | type=dataset_type, 108 | data_root=data_root, 109 | ann_file=data_root + 'scannet_infos_train.pkl', 110 | pipeline=train_pipeline, 111 | classes=class_names, 112 | test_mode=False, 113 | ignore_index=len(class_names), 114 | scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy'), 115 | val=dict( 116 | type=dataset_type, 117 | data_root=data_root, 118 | ann_file=data_root + 'scannet_infos_val.pkl', 119 | pipeline=test_pipeline, 120 | classes=class_names, 121 | test_mode=True, 122 | ignore_index=len(class_names)), 123 | test=dict( 124 | type=dataset_type, 125 | data_root=data_root, 126 | ann_file=data_root + 'scannet_infos_val.pkl', 127 | pipeline=test_pipeline, 128 | classes=class_names, 129 | test_mode=True, 130 | ignore_index=len(class_names))) 131 | 132 | evaluation = dict(pipeline=eval_pipeline) 133 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/s3dis_seg-3d-13class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'S3DISSegDataset' 3 | data_root = './data/s3dis/' 4 | class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', 5 | 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter') 6 | num_points = 4096 7 | train_area = [1, 2, 3, 4, 6] 8 | test_area = 5 9 | train_pipeline = [ 10 | dict( 11 | type='LoadPointsFromFile', 12 | coord_type='DEPTH', 13 | shift_height=False, 14 | use_color=True, 15 | load_dim=6, 16 | use_dim=[0, 1, 2, 3, 4, 5]), 17 | dict( 18 | type='LoadAnnotations3D', 19 | with_bbox_3d=False, 20 | with_label_3d=False, 21 | with_mask_3d=False, 22 | with_seg_3d=True), 23 | dict( 24 | type='PointSegClassMapping', 25 | valid_cat_ids=tuple(range(len(class_names))), 26 | max_cat_id=13), 27 | dict( 28 | type='IndoorPatchPointSample', 29 | num_points=num_points, 30 | block_size=1.0, 31 | ignore_index=len(class_names), 32 | use_normalized_coord=True, 33 | enlarge_size=0.2, 34 | min_unique_num=None), 35 | dict(type='NormalizePointsColor', color_mean=None), 36 | dict(type='DefaultFormatBundle3D', class_names=class_names), 37 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 38 | ] 39 | test_pipeline = [ 40 | dict( 41 | type='LoadPointsFromFile', 42 | coord_type='DEPTH', 43 | shift_height=False, 44 | use_color=True, 45 | load_dim=6, 46 | use_dim=[0, 1, 2, 3, 4, 5]), 47 | dict(type='NormalizePointsColor', color_mean=None), 48 | dict( 49 | # a wrapper in order to successfully call test function 50 | # actually we don't perform test-time-aug 51 | type='MultiScaleFlipAug3D', 52 | img_scale=(1333, 800), 53 | pts_scale_ratio=1, 54 | flip=False, 55 | transforms=[ 56 | dict( 57 | type='GlobalRotScaleTrans', 58 | rot_range=[0, 0], 59 | scale_ratio_range=[1., 1.], 60 | translation_std=[0, 0, 0]), 61 | dict( 62 | type='RandomFlip3D', 63 | sync_2d=False, 64 | flip_ratio_bev_horizontal=0.0, 65 | flip_ratio_bev_vertical=0.0), 66 | dict( 67 | type='DefaultFormatBundle3D', 68 | class_names=class_names, 69 | with_label=False), 70 | dict(type='Collect3D', keys=['points']) 71 | ]) 72 | ] 73 | # construct a pipeline for data and gt loading in show function 74 | # please keep its loading function consistent with test_pipeline (e.g. client) 75 | # we need to load gt seg_mask! 76 | eval_pipeline = [ 77 | dict( 78 | type='LoadPointsFromFile', 79 | coord_type='DEPTH', 80 | shift_height=False, 81 | use_color=True, 82 | load_dim=6, 83 | use_dim=[0, 1, 2, 3, 4, 5]), 84 | dict( 85 | type='LoadAnnotations3D', 86 | with_bbox_3d=False, 87 | with_label_3d=False, 88 | with_mask_3d=False, 89 | with_seg_3d=True), 90 | dict( 91 | type='PointSegClassMapping', 92 | valid_cat_ids=tuple(range(len(class_names))), 93 | max_cat_id=13), 94 | dict( 95 | type='DefaultFormatBundle3D', 96 | with_label=False, 97 | class_names=class_names), 98 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 99 | ] 100 | 101 | data = dict( 102 | samples_per_gpu=8, 103 | workers_per_gpu=4, 104 | # train on area 1, 2, 3, 4, 6 105 | # test on area 5 106 | train=dict( 107 | type=dataset_type, 108 | data_root=data_root, 109 | ann_files=[ 110 | data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area 111 | ], 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | test_mode=False, 115 | ignore_index=len(class_names), 116 | scene_idxs=[ 117 | data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy' 118 | for i in train_area 119 | ]), 120 | val=dict( 121 | type=dataset_type, 122 | data_root=data_root, 123 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', 124 | pipeline=test_pipeline, 125 | classes=class_names, 126 | test_mode=True, 127 | ignore_index=len(class_names), 128 | scene_idxs=data_root + 129 | f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'), 130 | test=dict( 131 | type=dataset_type, 132 | data_root=data_root, 133 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', 134 | pipeline=test_pipeline, 135 | classes=class_names, 136 | test_mode=True, 137 | ignore_index=len(class_names))) 138 | 139 | evaluation = dict(pipeline=eval_pipeline) 140 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.core.bbox import BaseBBoxCoder 4 | from mmdet.core.bbox.builder import BBOX_CODERS 5 | from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox 6 | import numpy as np 7 | 8 | 9 | @BBOX_CODERS.register_module() 10 | class NMSFreeCoder(BaseBBoxCoder): 11 | """Bbox coder for NMS-free detector. 12 | Args: 13 | pc_range (list[float]): Range of point cloud. 14 | post_center_range (list[float]): Limit of the center. 15 | Default: None. 16 | max_num (int): Max number to be kept. Default: 100. 17 | score_threshold (float): Threshold to filter boxes based on score. 18 | Default: None. 19 | code_size (int): Code size of bboxes. Default: 9 20 | """ 21 | 22 | def __init__(self, 23 | pc_range, 24 | voxel_size=None, 25 | post_center_range=None, 26 | max_num=100, 27 | score_threshold=None, 28 | num_classes=10): 29 | self.pc_range = pc_range 30 | self.voxel_size = voxel_size 31 | self.post_center_range = post_center_range 32 | self.max_num = max_num 33 | self.score_threshold = score_threshold 34 | self.num_classes = num_classes 35 | 36 | def encode(self): 37 | 38 | pass 39 | 40 | def decode_single(self, cls_scores, bbox_preds): 41 | """Decode bboxes. 42 | Args: 43 | cls_scores (Tensor): Outputs from the classification head, \ 44 | shape [num_query, cls_out_channels]. Note \ 45 | cls_out_channels should includes background. 46 | bbox_preds (Tensor): Outputs from the regression \ 47 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 48 | Shape [num_query, 9]. 49 | Returns: 50 | list[dict]: Decoded boxes. 51 | """ 52 | max_num = self.max_num 53 | 54 | cls_scores = cls_scores.sigmoid() 55 | scores, indexs = cls_scores.view(-1).topk(max_num) 56 | labels = indexs % self.num_classes 57 | bbox_index = indexs // self.num_classes 58 | bbox_preds = bbox_preds[bbox_index] 59 | 60 | final_box_preds = denormalize_bbox(bbox_preds, self.pc_range) 61 | final_scores = scores 62 | final_preds = labels 63 | 64 | # use score threshold 65 | if self.score_threshold is not None: 66 | thresh_mask = final_scores > self.score_threshold 67 | tmp_score = self.score_threshold 68 | while thresh_mask.sum() == 0: 69 | tmp_score *= 0.9 70 | if tmp_score < 0.01: 71 | thresh_mask = final_scores > -1 72 | break 73 | thresh_mask = final_scores >= tmp_score 74 | 75 | if self.post_center_range is not None: 76 | self.post_center_range = torch.tensor( 77 | self.post_center_range, device=scores.device) 78 | mask = (final_box_preds[..., :3] >= 79 | self.post_center_range[:3]).all(1) 80 | mask &= (final_box_preds[..., :3] <= 81 | self.post_center_range[3:]).all(1) 82 | 83 | if self.score_threshold: 84 | mask &= thresh_mask 85 | 86 | boxes3d = final_box_preds[mask] 87 | scores = final_scores[mask] 88 | 89 | labels = final_preds[mask] 90 | predictions_dict = { 91 | 'bboxes': boxes3d, 92 | 'scores': scores, 93 | 'labels': labels 94 | } 95 | 96 | else: 97 | raise NotImplementedError( 98 | 'Need to reorganize output as a batch, only ' 99 | 'support post_center_range is not None for now!') 100 | return predictions_dict 101 | 102 | def decode(self, preds_dicts): 103 | """Decode bboxes. 104 | Args: 105 | all_cls_scores (Tensor): Outputs from the classification head, \ 106 | shape [nb_dec, bs, num_query, cls_out_channels]. Note \ 107 | cls_out_channels should includes background. 108 | all_bbox_preds (Tensor): Sigmoid outputs from the regression \ 109 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 110 | Shape [nb_dec, bs, num_query, 9]. 111 | Returns: 112 | list[dict]: Decoded boxes. 113 | """ 114 | all_cls_scores = preds_dicts['all_cls_scores'][-1] 115 | all_bbox_preds = preds_dicts['all_bbox_preds'][-1] 116 | 117 | batch_size = all_cls_scores.size()[0] 118 | predictions_list = [] 119 | for i in range(batch_size): 120 | predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i])) 121 | return predictions_list 122 | 123 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/kitti-3d-car.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'KittiDataset' 3 | data_root = 'data/kitti/' 4 | class_names = ['Car'] 5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1] 6 | input_modality = dict(use_lidar=True, use_camera=False) 7 | db_sampler = dict( 8 | data_root=data_root, 9 | info_path=data_root + 'kitti_dbinfos_train.pkl', 10 | rate=1.0, 11 | prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)), 12 | classes=class_names, 13 | sample_groups=dict(Car=15)) 14 | 15 | file_client_args = dict(backend='disk') 16 | # Uncomment the following if use ceph or other file clients. 17 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 18 | # for more details. 19 | # file_client_args = dict( 20 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/')) 21 | 22 | train_pipeline = [ 23 | dict( 24 | type='LoadPointsFromFile', 25 | coord_type='LIDAR', 26 | load_dim=4, 27 | use_dim=4, 28 | file_client_args=file_client_args), 29 | dict( 30 | type='LoadAnnotations3D', 31 | with_bbox_3d=True, 32 | with_label_3d=True, 33 | file_client_args=file_client_args), 34 | dict(type='ObjectSample', db_sampler=db_sampler), 35 | dict( 36 | type='ObjectNoise', 37 | num_try=100, 38 | translation_std=[1.0, 1.0, 0.5], 39 | global_rot_range=[0.0, 0.0], 40 | rot_range=[-0.78539816, 0.78539816]), 41 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 42 | dict( 43 | type='GlobalRotScaleTrans', 44 | rot_range=[-0.78539816, 0.78539816], 45 | scale_ratio_range=[0.95, 1.05]), 46 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 47 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='PointShuffle'), 49 | dict(type='DefaultFormatBundle3D', class_names=class_names), 50 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 51 | ] 52 | test_pipeline = [ 53 | dict( 54 | type='LoadPointsFromFile', 55 | coord_type='LIDAR', 56 | load_dim=4, 57 | use_dim=4, 58 | file_client_args=file_client_args), 59 | dict( 60 | type='MultiScaleFlipAug3D', 61 | img_scale=(1333, 800), 62 | pts_scale_ratio=1, 63 | flip=False, 64 | transforms=[ 65 | dict( 66 | type='GlobalRotScaleTrans', 67 | rot_range=[0, 0], 68 | scale_ratio_range=[1., 1.], 69 | translation_std=[0, 0, 0]), 70 | dict(type='RandomFlip3D'), 71 | dict( 72 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 73 | dict( 74 | type='DefaultFormatBundle3D', 75 | class_names=class_names, 76 | with_label=False), 77 | dict(type='Collect3D', keys=['points']) 78 | ]) 79 | ] 80 | # construct a pipeline for data and gt loading in show function 81 | # please keep its loading function consistent with test_pipeline (e.g. client) 82 | eval_pipeline = [ 83 | dict( 84 | type='LoadPointsFromFile', 85 | coord_type='LIDAR', 86 | load_dim=4, 87 | use_dim=4, 88 | file_client_args=file_client_args), 89 | dict( 90 | type='DefaultFormatBundle3D', 91 | class_names=class_names, 92 | with_label=False), 93 | dict(type='Collect3D', keys=['points']) 94 | ] 95 | 96 | data = dict( 97 | samples_per_gpu=6, 98 | workers_per_gpu=4, 99 | train=dict( 100 | type='RepeatDataset', 101 | times=2, 102 | dataset=dict( 103 | type=dataset_type, 104 | data_root=data_root, 105 | ann_file=data_root + 'kitti_infos_train.pkl', 106 | split='training', 107 | pts_prefix='velodyne_reduced', 108 | pipeline=train_pipeline, 109 | modality=input_modality, 110 | classes=class_names, 111 | test_mode=False, 112 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 113 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 114 | box_type_3d='LiDAR')), 115 | val=dict( 116 | type=dataset_type, 117 | data_root=data_root, 118 | ann_file=data_root + 'kitti_infos_val.pkl', 119 | split='training', 120 | pts_prefix='velodyne_reduced', 121 | pipeline=test_pipeline, 122 | modality=input_modality, 123 | classes=class_names, 124 | test_mode=True, 125 | box_type_3d='LiDAR'), 126 | test=dict( 127 | type=dataset_type, 128 | data_root=data_root, 129 | ann_file=data_root + 'kitti_infos_val.pkl', 130 | split='training', 131 | pts_prefix='velodyne_reduced', 132 | pipeline=test_pipeline, 133 | modality=input_modality, 134 | classes=class_names, 135 | test_mode=True, 136 | box_type_3d='LiDAR')) 137 | 138 | evaluation = dict(interval=1, pipeline=eval_pipeline) 139 | --------------------------------------------------------------------------------