├── media └── framework.png ├── tools ├── data_converter │ ├── __init__.py │ ├── lyft_data_fixer.py │ ├── indoor_converter.py │ ├── nuimage_converter.py │ ├── sunrgbd_data_utils.py │ ├── s3dis_data_utils.py │ └── lyft_converter.py ├── build-dataset.py ├── dist_train.sh ├── dist_test.sh ├── create_data.sh ├── slurm_test.sh ├── slurm_train.sh ├── misc │ ├── print_config.py │ ├── visualize_results.py │ ├── fuse_conv_bn.py │ └── browse_dataset.py ├── model_converters │ ├── publish_model.py │ ├── regnet2mmdet.py │ └── convert_votenet_checkpoints.py ├── rename_depth_cache.py ├── analysis_tools │ ├── benchmark.py │ ├── get_flops.py │ └── analyze_logs.py ├── test.py └── train.py ├── projects ├── mmdet3d_plugin │ ├── models │ │ ├── losses │ │ │ ├── __init__.py │ │ │ └── Sigmoid_ce_loss.py │ │ ├── dense_heads │ │ │ └── __init__.py │ │ ├── necks │ │ │ ├── __init__.py │ │ │ └── cp_fpn.py │ │ ├── detectors │ │ │ ├── __init__.py │ │ │ └── vedet.py │ │ ├── backbones │ │ │ └── __init__.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── grid_mask.py │ │ │ └── vedet_transformer.py │ ├── core │ │ └── bbox │ │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ └── hungarian_assigner_3d.py │ │ │ ├── coders │ │ │ ├── __init__.py │ │ │ └── nms_free_coder.py │ │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ └── match_cost.py │ │ │ ├── iou_calculators │ │ │ └── __init__.py │ │ │ └── util.py │ ├── datasets │ │ ├── __init__.py │ │ ├── pipelines │ │ │ ├── __init__.py │ │ │ └── loading.py │ │ └── nuscenes_dataset.py │ └── __init__.py └── configs │ └── vedet_vovnet_p4_1600x640_2vview_2frame.py ├── Makefile ├── .gitignore ├── docker └── Dockerfile-mmlab-cu111 ├── docs └── INSTALL.md └── README.md /media/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TRI-ML/VEDet/HEAD/media/framework.png -------------------------------------------------------------------------------- /tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .Sigmoid_ce_loss import Sigmoid_ce_loss 2 | 3 | __all__ = ['Sigmoid_ce_loss'] 4 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .hungarian_assigner_3d import HungarianAssigner3D 2 | 3 | __all__ = ['HungarianAssigner3D'] 4 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_free_coder import NMSFreeCoder, NMSFreeClsCoder 2 | __all__ = ['NMSFreeCoder', 'NMSFreeClsCoder'] 3 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox.match_costs import build_match_cost 2 | from .match_cost import BBox3DL1Cost 3 | 4 | __all__ = ['build_match_cost', 'BBox3DL1Cost'] 5 | -------------------------------------------------------------------------------- /tools/build-dataset.py: -------------------------------------------------------------------------------- 1 | from data_converter.nuscenes_converter_seg import create_nuscenes_infos 2 | 3 | 4 | 5 | if __name__ == '__main__': 6 | # Training settings 7 | create_nuscenes_infos( '/data/Dataset/nuScenes/','HDmaps-nocovers') 8 | 9 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-28500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | from .vedet_head import VEDetHead 5 | 6 | __all__ = ['VEDetHead'] 7 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from mmdetection (https://github.com/open-mmlab/mmdetection) 5 | # Copyright (c) OpenMMLab. All rights reserved. 6 | # ------------------------------------------------------------------------ 7 | from .cp_fpn import CPFPN 8 | __all__ = ['CPFPN'] 9 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D, 3 | BboxOverlapsNearest3D, 4 | axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d, 5 | bbox_overlaps_nearest_3d) 6 | 7 | __all__ = [ 8 | 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 9 | 'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D', 10 | 'axis_aligned_bbox_overlaps_3d' 11 | ] -------------------------------------------------------------------------------- /tools/create_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | export PYTHONPATH=`pwd`:$PYTHONPATH 5 | 6 | PARTITION=$1 7 | JOB_NAME=$2 8 | CONFIG=$3 9 | WORK_DIR=$4 10 | GPUS=${GPUS:-1} 11 | GPUS_PER_NODE=${GPUS_PER_NODE:-1} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | JOB_NAME=create_data 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --kill-on-bad-exit=1 \ 21 | ${SRUN_ARGS} \ 22 | python -u tools/create_data.py kitti \ 23 | --root-path ./data/kitti \ 24 | --out-dir ./data/kitti \ 25 | --extra-tag kitti 26 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | from .vedet import VEDet 11 | 12 | __all__ = ['VEDet'] 13 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | from .nuscenes_dataset import CustomNuScenesDataset 11 | 12 | __all__ = ['CustomNuScenesDataset'] 13 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 5 | # ------------------------------------------------------------------------ 6 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 7 | # Copyright (c) 2021 Wang, Yue 8 | # ------------------------------------------------------------------------ 9 | from .vovnet import VoVNet 10 | from .vovnetcp import VoVNetCP 11 | from .convnext import ConvNeXt 12 | 13 | __all__ = ['VoVNet', 'VoVNetCP', 'ConvNeXt'] 14 | -------------------------------------------------------------------------------- /tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | from mmcv import Config, DictAction 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser(description='Print the whole config') 8 | parser.add_argument('config', help='config file path') 9 | parser.add_argument( 10 | '--options', nargs='+', action=DictAction, help='arguments in dict') 11 | args = parser.parse_args() 12 | 13 | return args 14 | 15 | 16 | def main(): 17 | args = parse_args() 18 | 19 | cfg = Config.fromfile(args.config) 20 | if args.options is not None: 21 | cfg.merge_from_dict(args.options) 22 | print(f'Config:\n{cfg.pretty_text}') 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 5 | # ------------------------------------------------------------------------ 6 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 7 | # Copyright (c) 2021 Wang, Yue 8 | # ------------------------------------------------------------------------ 9 | from .petr_transformer import PETRTransformer, PETRMultiheadAttention, PETRTransformerEncoder, PETRTransformerDecoder 10 | from .vedet_transformer import VETransformer 11 | from .positional_encoding import FourierMLPEncoding 12 | 13 | __all__ = [ 14 | 'PETRTransformer', 'PETRMultiheadAttention', 'PETRTransformerEncoder', 'PETRTransformerDecoder', 'VETransformer', 15 | 'FourierMLPEncoding' 16 | ] 17 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | WORK_DIR=${PWD} 2 | PROJECT=vedet 3 | DOCKER_IMAGE=${PROJECT}:latest 4 | DOCKER_FILE=docker/Dockerfile-mmlab-cu111 5 | DATA_ROOT?=/mnt/fsx-2/datasets 6 | CKPTS_ROOT?=/mnt/fsx-2/ckpts 7 | SAVE_ROOT?=/mnt/fsx-2/experiments 8 | 9 | DOCKER_OPTS = \ 10 | -it \ 11 | --rm \ 12 | -e DISPLAY=${DISPLAY} \ 13 | -v /tmp:/tmp \ 14 | -v /tmp/.X11-unix:/tmp/.X11-unix \ 15 | -v ~/.ssh:/root/.ssh \ 16 | -v ~/.aws:/root/.aws \ 17 | -v ${WORK_DIR}:/workspace/${PROJECT} \ 18 | -v ${DATA_ROOT}:/workspace/${PROJECT}/data \ 19 | -v ${CKPTS_ROOT}:/workspace/${PROJECT}/ckpts \ 20 | -v ${SAVE_ROOT}:/workspace/${PROJECT}/work_dirs \ 21 | --shm-size=8G \ 22 | --ipc=host \ 23 | --network=host \ 24 | --pid=host \ 25 | --privileged 26 | 27 | DOCKER_BUILD_ARGS = \ 28 | --build-arg WANDB_ENTITY \ 29 | --build-arg WANDB_API_KEY \ 30 | 31 | docker-build: 32 | nvidia-docker image build -f $(DOCKER_FILE) -t $(DOCKER_IMAGE) \ 33 | $(DOCKER_BUILD_ARGS) . 34 | 35 | docker-dev: 36 | nvidia-docker run --name $(PROJECT) \ 37 | $(DOCKER_OPTS) \ 38 | $(DOCKER_IMAGE) bash 39 | 40 | clean: 41 | find . -name '"*.pyc' | xargs sudo rm -f && \ 42 | find . -name '__pycache__' | xargs sudo rm -rf 43 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D 11 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder 12 | from .core.bbox.match_costs import BBox3DL1Cost 13 | from .datasets import CustomNuScenesDataset 14 | from .datasets.pipelines import (PhotoMetricDistortionMultiViewImage, PadMultiViewImage, NormalizeMultiviewImage) 15 | from .models.backbones.vovnet import VoVNet 16 | from .models.dense_heads import * 17 | from .models.detectors import * 18 | from .models.necks import * 19 | from .models.losses import * 20 | from .models.utils import * 21 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | from .transform_3d import (PadMultiViewImage, NormalizeMultiviewImage, PhotoMetricDistortionMultiViewImage, 11 | ResizeMultiview3D, AlbuMultiview3D, ResizeCropFlipImage, GlobalRotScaleTransImage) 12 | from .loading import LoadMultiViewImageFromMultiSweepsFiles, LoadMapsFromFiles 13 | 14 | __all__ = [ 15 | 'PadMultiViewImage', 'NormalizeMultiviewImage', 'PhotoMetricDistortionMultiViewImage', 16 | 'LoadMultiViewImageFromMultiSweepsFiles', 'LoadMapsFromFiles', 'ResizeMultiview3D', 'AlbuMultiview3D', 17 | 'ResizeCropFlipImage', 'GlobalRotScaleTransImage' 18 | ] 19 | -------------------------------------------------------------------------------- /tools/model_converters/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | import torch 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='Process a checkpoint to be published') 10 | parser.add_argument('in_file', help='input checkpoint filename') 11 | parser.add_argument('out_file', help='output checkpoint filename') 12 | args = parser.parse_args() 13 | return args 14 | 15 | 16 | def process_checkpoint(in_file, out_file): 17 | checkpoint = torch.load(in_file, map_location='cpu') 18 | # remove optimizer for smaller file size 19 | if 'optimizer' in checkpoint: 20 | del checkpoint['optimizer'] 21 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 22 | # add the code here. 23 | torch.save(checkpoint, out_file) 24 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 25 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 26 | subprocess.Popen(['mv', out_file, final_file]) 27 | 28 | 29 | def main(): 30 | args = parse_args() 31 | process_checkpoint(args.in_file, args.out_file) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | import torch 5 | from mmdet.core.bbox.match_costs.builder import MATCH_COST 6 | from mmdet.core.bbox.iou_calculators import bbox_overlaps 7 | 8 | 9 | @MATCH_COST.register_module() 10 | class BBox3DL1Cost(object): 11 | """BBox3DL1Cost. 12 | Args: 13 | weight (int | float, optional): loss_weight 14 | """ 15 | 16 | def __init__(self, weight=1.): 17 | self.weight = weight 18 | 19 | def __call__(self, bbox_pred, gt_bboxes): 20 | """ 21 | Args: 22 | bbox_pred (Tensor): Predicted boxes with normalized coordinates 23 | (cx, cy, w, h), which are all in range [0, 1]. Shape 24 | [num_query, 4]. 25 | gt_bboxes (Tensor): Ground truth boxes with normalized 26 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 27 | Returns: 28 | torch.Tensor: bbox_cost value with weight 29 | """ 30 | bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1) 31 | return bbox_cost * self.weight 32 | -------------------------------------------------------------------------------- /tools/rename_depth_cache.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from glob import glob 3 | import os 4 | from tqdm import tqdm 5 | 6 | if __name__ == "__main__": 7 | parser = argparse.ArgumentParser(description='Rename cache from dd3dv2') 8 | parser.add_argument('--source-dir', type=str, required=True, help='source dir of cache') 9 | parser.add_argument('--target-dir', type=str, required=True, help='source dir of cache') 10 | args = parser.parse_args() 11 | 12 | source_dir = args.source_dir 13 | target_dir = args.target_dir 14 | os.makedirs(target_dir, exist_ok=True) 15 | 16 | cache_files = sorted(glob(os.path.join(source_dir, "*.npz")), key=lambda x: int(os.path.basename(x).split('_')[1])) 17 | visited = set() 18 | for cache_file in tqdm(cache_files): 19 | cache_name = os.path.basename(cache_file) 20 | components = cache_name.split('_') 21 | scene_name, global_idx = components[:2] 22 | if scene_name not in visited: 23 | global_start_idx = int(global_idx) 24 | visited.add(scene_name) 25 | 26 | sample_id = int(global_idx) - int(global_start_idx) 27 | components[1] = f"{sample_id:03d}" 28 | cache_name = "_".join(components) 29 | symlink_path = os.path.join(target_dir, cache_name) 30 | os.system(f'ln -s {cache_file} {symlink_path}') 31 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/losses/Sigmoid_ce_loss.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 5 | # ------------------------------------------------------------------------ 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from mmdet.models.builder import LOSSES 9 | 10 | 11 | @LOSSES.register_module() 12 | class Sigmoid_ce_loss(nn.Module): 13 | 14 | def __init__(self, loss_weight=1.0): 15 | super(Sigmoid_ce_loss, self).__init__() 16 | self.loss_weight = loss_weight 17 | 18 | def forward( 19 | self, 20 | inputs, 21 | targets, 22 | ): 23 | """Forward function to calculate accuracy. 24 | 25 | Args: 26 | pred (torch.Tensor): Prediction of models. 27 | target (torch.Tensor): Target for each prediction. 28 | 29 | Returns: 30 | tuple[float]: The accuracies under different topk criterions. 31 | """ 32 | # inputs=inputs.sigmoid() 33 | pos_weight = (targets == 0).float().sum(dim=1) / (targets == 1).float().sum(dim=1).clamp(min=1.0) 34 | pos_weight = pos_weight.unsqueeze(1) 35 | weight_loss = targets * pos_weight + (1 - targets) 36 | loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="mean", weight=weight_loss) 37 | return self.loss_weight * loss 38 | -------------------------------------------------------------------------------- /tools/data_converter/lyft_data_fixer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import numpy as np 4 | import os 5 | 6 | 7 | def fix_lyft(root_folder='./data/lyft', version='v1.01'): 8 | # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000 # noqa 9 | lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin' 10 | root_folder = os.path.join(root_folder, f'{version}-train') 11 | lidar_path = os.path.join(root_folder, lidar_path) 12 | assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \ 13 | f'dataset and make sure {lidar_path} is present.' 14 | points = np.fromfile(lidar_path, dtype=np.float32, count=-1) 15 | try: 16 | points.reshape([-1, 5]) 17 | print(f'This fix is not required for version {version}.') 18 | except ValueError: 19 | new_points = np.array(list(points) + [100.0, 1.0], dtype='float32') 20 | new_points.tofile(lidar_path) 21 | print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.') 22 | 23 | 24 | parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser') 25 | parser.add_argument( 26 | '--root-folder', 27 | type=str, 28 | default='./data/lyft', 29 | help='specify the root path of Lyft dataset') 30 | parser.add_argument( 31 | '--version', 32 | type=str, 33 | default='v1.01', 34 | help='specify Lyft dataset version') 35 | args = parser.parse_args() 36 | 37 | if __name__ == '__main__': 38 | fix_lyft(root_folder=args.root_folder, version=args.version) 39 | -------------------------------------------------------------------------------- /tools/misc/visualize_results.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import mmcv 4 | from mmcv import Config 5 | 6 | from mmdet3d.datasets import build_dataset 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser( 11 | description='MMDet3D visualize the results') 12 | parser.add_argument('config', help='test config file path') 13 | parser.add_argument('--result', help='results file in pickle format') 14 | parser.add_argument( 15 | '--show-dir', help='directory where visualize results will be saved') 16 | args = parser.parse_args() 17 | 18 | return args 19 | 20 | 21 | def main(): 22 | args = parse_args() 23 | 24 | if args.result is not None and \ 25 | not args.result.endswith(('.pkl', '.pickle')): 26 | raise ValueError('The results file must be a pkl file.') 27 | 28 | cfg = Config.fromfile(args.config) 29 | cfg.data.test.test_mode = True 30 | 31 | # build the dataset 32 | dataset = build_dataset(cfg.data.test) 33 | results = mmcv.load(args.result) 34 | 35 | if getattr(dataset, 'show', None) is not None: 36 | # data loading pipeline for showing 37 | eval_pipeline = cfg.get('eval_pipeline', {}) 38 | if eval_pipeline: 39 | dataset.show(results, args.show_dir, pipeline=eval_pipeline) 40 | else: 41 | dataset.show(results, args.show_dir) # use default pipeline 42 | else: 43 | raise NotImplementedError( 44 | 'Show is not implemented for dataset {}!'.format( 45 | type(dataset).__name__)) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | *.jpg 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | *.ipynb 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/en/_build/ 72 | docs/zh_cn/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | 111 | # cython generated cpp 112 | data 113 | .vscode 114 | .idea 115 | 116 | # custom 117 | *.pkl 118 | *.pkl.json 119 | *.log.json 120 | work_dirs/ 121 | ckpts/ 122 | data/ 123 | mmdetection3d/ 124 | exps/ 125 | *~ 126 | mmdet3d/.mim 127 | wandb/ 128 | 129 | # Pytorch 130 | *.pth 131 | 132 | # demo 133 | *.jpg 134 | data/s3dis/Stanford3dDataset_v1.2_Aligned_Version/ 135 | data/scannet/scans/ 136 | data/sunrgbd/OFFICIAL_SUNRGBD/ 137 | *.obj 138 | *.ply 139 | 140 | # Waymo evaluation 141 | mmdet3d/core/evaluation/waymo_utils/compute_detection_metrics_main -------------------------------------------------------------------------------- /tools/misc/fuse_conv_bn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import torch 4 | from mmcv.runner import save_checkpoint 5 | from torch import nn as nn 6 | 7 | from mmdet.apis import init_model 8 | 9 | 10 | def fuse_conv_bn(conv, bn): 11 | """During inference, the functionary of batch norm layers is turned off but 12 | only the mean and var alone channels are used, which exposes the chance to 13 | fuse it with the preceding conv layers to save computations and simplify 14 | network structures.""" 15 | conv_w = conv.weight 16 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like( 17 | bn.running_mean) 18 | 19 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) 20 | conv.weight = nn.Parameter(conv_w * 21 | factor.reshape([conv.out_channels, 1, 1, 1])) 22 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) 23 | return conv 24 | 25 | 26 | def fuse_module(m): 27 | last_conv = None 28 | last_conv_name = None 29 | 30 | for name, child in m.named_children(): 31 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)): 32 | if last_conv is None: # only fuse BN that is after Conv 33 | continue 34 | fused_conv = fuse_conv_bn(last_conv, child) 35 | m._modules[last_conv_name] = fused_conv 36 | # To reduce changes, set BN as Identity instead of deleting it. 37 | m._modules[name] = nn.Identity() 38 | last_conv = None 39 | elif isinstance(child, nn.Conv2d): 40 | last_conv = child 41 | last_conv_name = name 42 | else: 43 | fuse_module(child) 44 | return m 45 | 46 | 47 | def parse_args(): 48 | parser = argparse.ArgumentParser( 49 | description='fuse Conv and BN layers in a model') 50 | parser.add_argument('config', help='config file path') 51 | parser.add_argument('checkpoint', help='checkpoint file path') 52 | parser.add_argument('out', help='output path of the converted model') 53 | args = parser.parse_args() 54 | return args 55 | 56 | 57 | def main(): 58 | args = parse_args() 59 | # build the model from a config file and a checkpoint file 60 | model = init_model(args.config, args.checkpoint) 61 | # fuse conv and bn layers of the model 62 | fused_model = fuse_module(model) 63 | save_checkpoint(fused_model, args.out) 64 | 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /docker/Dockerfile-mmlab-cu111: -------------------------------------------------------------------------------- 1 | # Base image 2 | ARG PYTORCH="1.9.0" 3 | ARG CUDA="11.1" 4 | ARG CUDNN="8" 5 | 6 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel 7 | 8 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" 9 | ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" 10 | ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" 11 | 12 | # To fix GPG key error when running apt-get update 13 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub 14 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub 15 | 16 | # Core tools 17 | RUN apt-get update && apt-get install -y \ 18 | cmake \ 19 | curl \ 20 | docker.io \ 21 | ffmpeg \ 22 | git \ 23 | htop \ 24 | libsm6 \ 25 | libxext6 \ 26 | libglib2.0-0 \ 27 | libsm6 \ 28 | libxrender-dev \ 29 | libxext6 \ 30 | ninja-build \ 31 | unzip \ 32 | vim \ 33 | wget \ 34 | sudo \ 35 | && apt-get clean \ 36 | && rm -rf /var/lib/apt/lists/* 37 | 38 | # ------------------------- 39 | # Optional: W&B credentials 40 | # ------------------------- 41 | ARG WANDB_ENTITY 42 | ENV WANDB_ENTITY=${WANDB_ENTITY} 43 | 44 | ARG WANDB_API_KEY 45 | ENV WANDB_API_KEY=${WANDB_API_KEY} 46 | 47 | # Python tools 48 | RUN pip install \ 49 | wandb==0.12.17 \ 50 | einops==0.4.1 \ 51 | pytorch3d==0.3.0 \ 52 | pycocotools==2.0.4 \ 53 | nuscenes-devkit==1.1.7 \ 54 | timm==0.6.11 55 | 56 | # Install OpenMMLab packages 57 | ARG MMCV="1.4.0" 58 | ARG MMDET="v2.25.0" 59 | ARG MMSEG="v0.20.2" 60 | ARG MMDET3D="v0.17.1" 61 | ENV FORCE_CUDA="1" 62 | 63 | RUN pip install mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html 64 | 65 | WORKDIR /workspace/mmlab 66 | RUN git clone https://github.com/open-mmlab/mmdetection.git && cd mmdetection && \ 67 | git checkout ${MMDET} && \ 68 | pip install -r requirements/build.txt && pip install -e . 69 | ENV PYTHONPATH="/workspace/mmlab/mmdetection:$PYTHONPATH" 70 | 71 | RUN git clone https://github.com/open-mmlab/mmsegmentation.git && cd mmsegmentation && \ 72 | git checkout ${MMSEG} && \ 73 | pip install -e . 74 | ENV PYTHONPATH="/workspace/mmlab/mmsegmentation:$PYTHONPATH" 75 | 76 | RUN git clone https://github.com/open-mmlab/mmdetection3d.git && cd mmdetection3d && \ 77 | git checkout ${MMDET3D} && \ 78 | pip install -e . 79 | ENV PYTHONPATH="/workspace/mmlab/mmdetection3d:$PYTHONPATH" 80 | 81 | WORKDIR /workspace/vedet 82 | RUN git config --global --add safe.directory /workspace/vedet 83 | -------------------------------------------------------------------------------- /docs/INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Use Docker Environment 2 | We provide a self-contained dockerfile and recommend preparing the environment using docker. To build the image, run the following command in this directory: 3 | ```bash 4 | make docker-build 5 | ``` 6 | 7 | We use [Weights & Biases](https://wandb.ai/site) to log the training. To optionally build your wandb credentials into the docker, run (if you don't build credentials into the docker, you can still manually log into wandb after entering the docker container): 8 | ```bash 9 | make docker-build WANDB_API_KEY= WANDB_ENTITY= 10 | ``` 11 | 12 | After the image is built, run the following command with your paths on the host machine for data, checkpoints, and logging, to enter the dockerized environment: 13 | ```bash 14 | # DATA_ROOT will be mounted as /workspace/vedet/data 15 | # CKPTS_ROOT will be mounted as /workspace/vedet/ckpts 16 | # SAVE_ROOT will be mounted as /workspace/vedet/work_dirs 17 | make docker-dev DATA_ROOT= CKPTS_ROOT= SAVE_ROOT= 18 | ``` 19 | 20 | Inside the docker the folder structure will look like this, with data, checkpoints, logging paths mounted under `/workspace/vedet/`: 21 | ``` 22 | /workspace/ 23 | |-- mmlab 24 | | |-- mmdetection 25 | | |-- mmdetection3d 26 | | `-- mmsegmentation 27 | `-- vedet 28 | |-- LICENSE.md 29 | |-- Makefile 30 | |-- README.md 31 | |-- ckpts 32 | |-- data 33 | |-- docker 34 | |-- docs 35 | |-- projects 36 | |-- requirements.txt 37 | |-- tools 38 | ``` 39 | 40 | ## Use Pip/Conda Environment 41 | The pytorch version we use in this project is `1.9.0` with CUDA `11.1`, CUDNN `8`. After install the right version in your environment, please install the following dependencies. 42 | 43 | ### Python tools 44 | ```bash 45 | pip install \ 46 | wandb==0.12.17 \ 47 | einops==0.4.1 \ 48 | pytorch3d==0.3.0 \ 49 | pycocotools==2.0.4 \ 50 | nuscenes-devkit==1.1.7 \ 51 | timm==0.6.11 52 | ``` 53 | 54 | ### OpenMMLab packages 55 | ```bash 56 | export MMCV="1.4.0" 57 | export MMDET="v2.25.0" 58 | export MMSEG="v0.20.2" 59 | export MMDET3D="v0.17.1" 60 | export FORCE_CUDA="1" 61 | 62 | # install mmcv 63 | pip install mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html 64 | 65 | # install mmdetection 66 | git clone https://github.com/open-mmlab/mmdetection.git 67 | cd mmdetection && git checkout ${MMDET} 68 | pip install -r requirements/build.txt && pip install -e . 69 | 70 | # install mmsegmentation 71 | git clone https://github.com/open-mmlab/mmsegmentation.git 72 | cd mmsegmentation && git checkout ${MMSEG} 73 | pip install -e . 74 | 75 | # install mmdetection3d 76 | git clone https://github.com/open-mmlab/mmdetection3d.git 77 | cd mmdetection3d && git checkout ${MMDET3D} 78 | pip install -e . 79 | ``` 80 | 81 | ### Data, checkpoints, logging paths 82 | ```bash 83 | # enter the project top-level directory 84 | cd vedet 85 | ln -s $DATA_ROOT data/ 86 | ln -s $CKPTS_ROOT ckpts/ 87 | ln -s $SAVE_ROOT work_dirs/ 88 | ``` 89 | -------------------------------------------------------------------------------- /tools/analysis_tools/benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import time 4 | import torch 5 | from mmcv import Config 6 | from mmcv.parallel import MMDataParallel 7 | from mmcv.runner import load_checkpoint, wrap_fp16_model 8 | 9 | from mmdet3d.datasets import build_dataloader, build_dataset 10 | from mmdet3d.models import build_detector 11 | from tools.misc.fuse_conv_bn import fuse_module 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='MMDet benchmark a model') 16 | parser.add_argument('config', help='test config file path') 17 | parser.add_argument('checkpoint', help='checkpoint file') 18 | parser.add_argument('--samples', default=2000, help='samples to benchmark') 19 | parser.add_argument( 20 | '--log-interval', default=50, help='interval of logging') 21 | parser.add_argument( 22 | '--fuse-conv-bn', 23 | action='store_true', 24 | help='Whether to fuse conv and bn, this will slightly increase' 25 | 'the inference speed') 26 | args = parser.parse_args() 27 | return args 28 | 29 | 30 | def main(): 31 | args = parse_args() 32 | 33 | cfg = Config.fromfile(args.config) 34 | # set cudnn_benchmark 35 | if cfg.get('cudnn_benchmark', False): 36 | torch.backends.cudnn.benchmark = True 37 | cfg.model.pretrained = None 38 | cfg.data.test.test_mode = True 39 | 40 | # build the dataloader 41 | # TODO: support multiple images per gpu (only minor changes are needed) 42 | dataset = build_dataset(cfg.data.test) 43 | data_loader = build_dataloader( 44 | dataset, 45 | samples_per_gpu=1, 46 | workers_per_gpu=cfg.data.workers_per_gpu, 47 | dist=False, 48 | shuffle=False) 49 | 50 | # build the model and load checkpoint 51 | cfg.model.train_cfg = None 52 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) 53 | fp16_cfg = cfg.get('fp16', None) 54 | if fp16_cfg is not None: 55 | wrap_fp16_model(model) 56 | load_checkpoint(model, args.checkpoint, map_location='cpu') 57 | if args.fuse_conv_bn: 58 | model = fuse_module(model) 59 | 60 | model = MMDataParallel(model, device_ids=[0]) 61 | 62 | model.eval() 63 | 64 | # the first several iterations may be very slow so skip them 65 | num_warmup = 5 66 | pure_inf_time = 0 67 | 68 | # benchmark with several samples and take the average 69 | for i, data in enumerate(data_loader): 70 | 71 | torch.cuda.synchronize() 72 | start_time = time.perf_counter() 73 | 74 | with torch.no_grad(): 75 | model(return_loss=False, rescale=True, **data) 76 | 77 | torch.cuda.synchronize() 78 | elapsed = time.perf_counter() - start_time 79 | 80 | if i >= num_warmup: 81 | pure_inf_time += elapsed 82 | if (i + 1) % args.log_interval == 0: 83 | fps = (i + 1 - num_warmup) / pure_inf_time 84 | print(f'Done image [{i + 1:<3}/ {args.samples}], ' 85 | f'fps: {fps:.1f} img / s') 86 | 87 | if (i + 1) == args.samples: 88 | pure_inf_time += elapsed 89 | fps = (i + 1 - num_warmup) / pure_inf_time 90 | print(f'Overall fps: {fps:.1f} img / s') 91 | break 92 | 93 | 94 | if __name__ == '__main__': 95 | main() 96 | -------------------------------------------------------------------------------- /tools/model_converters/regnet2mmdet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import torch 4 | from collections import OrderedDict 5 | 6 | 7 | def convert_stem(model_key, model_weight, state_dict, converted_names): 8 | new_key = model_key.replace('stem.conv', 'conv1') 9 | new_key = new_key.replace('stem.bn', 'bn1') 10 | state_dict[new_key] = model_weight 11 | converted_names.add(model_key) 12 | print(f'Convert {model_key} to {new_key}') 13 | 14 | 15 | def convert_head(model_key, model_weight, state_dict, converted_names): 16 | new_key = model_key.replace('head.fc', 'fc') 17 | state_dict[new_key] = model_weight 18 | converted_names.add(model_key) 19 | print(f'Convert {model_key} to {new_key}') 20 | 21 | 22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names): 23 | split_keys = model_key.split('.') 24 | layer, block, module = split_keys[:3] 25 | block_id = int(block[1:]) 26 | layer_name = f'layer{int(layer[1:])}' 27 | block_name = f'{block_id - 1}' 28 | 29 | if block_id == 1 and module == 'bn': 30 | new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}' 31 | elif block_id == 1 and module == 'proj': 32 | new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}' 33 | elif module == 'f': 34 | if split_keys[3] == 'a_bn': 35 | module_name = 'bn1' 36 | elif split_keys[3] == 'b_bn': 37 | module_name = 'bn2' 38 | elif split_keys[3] == 'c_bn': 39 | module_name = 'bn3' 40 | elif split_keys[3] == 'a': 41 | module_name = 'conv1' 42 | elif split_keys[3] == 'b': 43 | module_name = 'conv2' 44 | elif split_keys[3] == 'c': 45 | module_name = 'conv3' 46 | new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}' 47 | else: 48 | raise ValueError(f'Unsupported conversion of key {model_key}') 49 | print(f'Convert {model_key} to {new_key}') 50 | state_dict[new_key] = model_weight 51 | converted_names.add(model_key) 52 | 53 | 54 | def convert(src, dst): 55 | """Convert keys in pycls pretrained RegNet models to mmdet style.""" 56 | # load caffe model 57 | regnet_model = torch.load(src) 58 | blobs = regnet_model['model_state'] 59 | # convert to pytorch style 60 | state_dict = OrderedDict() 61 | converted_names = set() 62 | for key, weight in blobs.items(): 63 | if 'stem' in key: 64 | convert_stem(key, weight, state_dict, converted_names) 65 | elif 'head' in key: 66 | convert_head(key, weight, state_dict, converted_names) 67 | elif key.startswith('s'): 68 | convert_reslayer(key, weight, state_dict, converted_names) 69 | 70 | # check if all layers are converted 71 | for key in blobs: 72 | if key not in converted_names: 73 | print(f'not converted: {key}') 74 | # save checkpoint 75 | checkpoint = dict() 76 | checkpoint['state_dict'] = state_dict 77 | torch.save(checkpoint, dst) 78 | 79 | 80 | def main(): 81 | parser = argparse.ArgumentParser(description='Convert model keys') 82 | parser.add_argument('src', help='src detectron model path') 83 | parser.add_argument('dst', help='save path') 84 | args = parser.parse_args() 85 | convert(args.src, args.dst) 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /tools/analysis_tools/get_flops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import torch 4 | from mmcv import Config, DictAction 5 | 6 | from mmdet3d.models import build_model 7 | 8 | try: 9 | from mmcv.cnn import get_model_complexity_info 10 | except ImportError: 11 | raise ImportError('Please upgrade mmcv to >0.6.2') 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Train a detector') 16 | parser.add_argument('config', help='train config file path') 17 | parser.add_argument( 18 | '--shape', 19 | type=int, 20 | nargs='+', 21 | default=[40000, 4], 22 | help='input point cloud size') 23 | parser.add_argument( 24 | '--modality', 25 | type=str, 26 | default='point', 27 | choices=['point', 'image', 'multi'], 28 | help='input data modality') 29 | parser.add_argument( 30 | '--cfg-options', 31 | nargs='+', 32 | action=DictAction, 33 | help='override some settings in the used config, the key-value pair ' 34 | 'in xxx=yyy format will be merged into config file. If the value to ' 35 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 36 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 37 | 'Note that the quotation marks are necessary and that no white space ' 38 | 'is allowed.') 39 | args = parser.parse_args() 40 | return args 41 | 42 | 43 | def main(): 44 | 45 | args = parse_args() 46 | 47 | if args.modality == 'point': 48 | assert len(args.shape) == 2, 'invalid input shape' 49 | input_shape = tuple(args.shape) 50 | elif args.modality == 'image': 51 | if len(args.shape) == 1: 52 | input_shape = (3, args.shape[0], args.shape[0]) 53 | elif len(args.shape) == 2: 54 | input_shape = (3, ) + tuple(args.shape) 55 | else: 56 | raise ValueError('invalid input shape') 57 | elif args.modality == 'multi': 58 | raise NotImplementedError( 59 | 'FLOPs counter is currently not supported for models with ' 60 | 'multi-modality input') 61 | 62 | cfg = Config.fromfile(args.config) 63 | if args.cfg_options is not None: 64 | cfg.merge_from_dict(args.cfg_options) 65 | # import modules from string list. 66 | if cfg.get('custom_imports', None): 67 | from mmcv.utils import import_modules_from_strings 68 | import_modules_from_strings(**cfg['custom_imports']) 69 | 70 | model = build_model( 71 | cfg.model, 72 | train_cfg=cfg.get('train_cfg'), 73 | test_cfg=cfg.get('test_cfg')) 74 | if torch.cuda.is_available(): 75 | model.cuda() 76 | model.eval() 77 | 78 | if hasattr(model, 'forward_dummy'): 79 | model.forward = model.forward_dummy 80 | else: 81 | raise NotImplementedError( 82 | 'FLOPs counter is currently not supported for {}'.format( 83 | model.__class__.__name__)) 84 | 85 | flops, params = get_model_complexity_info(model, input_shape) 86 | split_line = '=' * 30 87 | print(f'{split_line}\nInput shape: {input_shape}\n' 88 | f'Flops: {flops}\nParams: {params}\n{split_line}') 89 | print('!!!Please be cautious if you use the results in papers. ' 90 | 'You may need to check if all ops are supported and verify that the ' 91 | 'flops computation is correct.') 92 | 93 | 94 | if __name__ == '__main__': 95 | main() 96 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .array_converter import array_converter 3 | 4 | 5 | @array_converter(apply_to=('points', 'cam2img')) 6 | def points_img2cam(points, cam2img): 7 | """Project points in image coordinates to camera coordinates. 8 | 9 | Args: 10 | points (torch.Tensor): 2.5D points in 2D images, [N, 3], 11 | 3 corresponds with x, y in the image and depth. 12 | cam2img (torch.Tensor): Camera intrinsic matrix. The shape can be 13 | [3, 3], [3, 4] or [4, 4]. 14 | 15 | Returns: 16 | torch.Tensor: points in 3D space. [N, 3], 17 | 3 corresponds with x, y, z in 3D space. 18 | """ 19 | assert cam2img.shape[0] <= 4 20 | assert cam2img.shape[1] <= 4 21 | assert points.shape[1] == 3 22 | 23 | xys = points[:, :2] 24 | depths = points[:, 2].view(-1, 1) 25 | unnormed_xys = torch.cat([xys * depths, depths], dim=1) 26 | 27 | pad_cam2img = torch.eye(4, dtype=xys.dtype, device=xys.device) 28 | pad_cam2img[:cam2img.shape[0], :cam2img.shape[1]] = cam2img 29 | inv_pad_cam2img = torch.inverse(pad_cam2img).transpose(0, 1) 30 | 31 | # Do operation in homogeneous coordinates. 32 | num_points = unnormed_xys.shape[0] 33 | homo_xys = torch.cat([unnormed_xys, xys.new_ones((num_points, 1))], dim=1) 34 | points3D = torch.mm(homo_xys, inv_pad_cam2img)[:, :3] 35 | 36 | return points3D 37 | 38 | 39 | def normalize_bbox(bboxes, pc_range): 40 | include_velocity = (bboxes.shape[-1] % 9 == 0) 41 | num_properties = 9 if include_velocity else 7 42 | num_views = bboxes.shape[-1] // num_properties 43 | 44 | cx = bboxes[..., 0::num_properties] 45 | cy = bboxes[..., 1::num_properties] 46 | cz = bboxes[..., 2::num_properties] 47 | w = bboxes[..., 3::num_properties].log() 48 | l = bboxes[..., 4::num_properties].log() 49 | h = bboxes[..., 5::num_properties].log() 50 | 51 | rot = bboxes[..., 6::num_properties] 52 | if include_velocity: 53 | vx = bboxes[..., 7::num_properties] 54 | vy = bboxes[..., 8::num_properties] 55 | # (..., 10 x V) 56 | normalized_bboxes = torch.cat((cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1) 57 | else: 58 | # (..., 8 x V) 59 | normalized_bboxes = torch.cat((cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1) 60 | 61 | normalized_bboxes = normalized_bboxes.reshape(*normalized_bboxes.shape[:-1], num_properties + 1, num_views) 62 | # (..., V x P) 63 | normalized_bboxes = normalized_bboxes.transpose(-1, -2).flatten(-2) 64 | 65 | return normalized_bboxes 66 | 67 | 68 | def denormalize_bbox(normalized_bboxes, pc_range): 69 | include_velocity = (normalized_bboxes.shape[-1] % 10 == 0) 70 | num_properties = 10 if include_velocity else 8 71 | num_views = normalized_bboxes.shape[-1] // num_properties 72 | 73 | # rotation 74 | rot_sin = normalized_bboxes[..., 6::num_properties] 75 | rot_cos = normalized_bboxes[..., 7::num_properties] 76 | rot = torch.atan2(rot_sin, rot_cos) 77 | 78 | # center in the bev 79 | cx = normalized_bboxes[..., 0::num_properties] 80 | cy = normalized_bboxes[..., 1::num_properties] 81 | cz = normalized_bboxes[..., 4::num_properties] 82 | 83 | # size 84 | w = normalized_bboxes[..., 2::num_properties] 85 | l = normalized_bboxes[..., 3::num_properties] 86 | h = normalized_bboxes[..., 5::num_properties] 87 | 88 | w = w.exp() 89 | l = l.exp() 90 | h = h.exp() 91 | if include_velocity: 92 | # velocity 93 | vx = normalized_bboxes[:, 8::num_properties] 94 | vy = normalized_bboxes[:, 9::num_properties] 95 | # (..., 9 x V) 96 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1) 97 | else: 98 | # (..., 7 x V) 99 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1) 100 | 101 | denormalized_bboxes = denormalized_bboxes.reshape(*denormalized_bboxes.shape[:-1], num_properties - 1, num_views) 102 | # (..., V * P) 103 | denormalized_bboxes = denormalized_bboxes.transpose(-1, -2).flatten(-2) 104 | 105 | return denormalized_bboxes -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/grid_mask.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from PIL import Image 5 | 6 | class Grid(object): 7 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 8 | self.use_h = use_h 9 | self.use_w = use_w 10 | self.rotate = rotate 11 | self.offset = offset 12 | self.ratio = ratio 13 | self.mode=mode 14 | self.st_prob = prob 15 | self.prob = prob 16 | 17 | def set_prob(self, epoch, max_epoch): 18 | self.prob = self.st_prob * epoch / max_epoch 19 | 20 | def __call__(self, img, label): 21 | if np.random.rand() > self.prob: 22 | return img, label 23 | h = img.size(1) 24 | w = img.size(2) 25 | self.d1 = 2 26 | self.d2 = min(h, w) 27 | hh = int(1.5*h) 28 | ww = int(1.5*w) 29 | d = np.random.randint(self.d1, self.d2) 30 | if self.ratio == 1: 31 | self.l = np.random.randint(1, d) 32 | else: 33 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 34 | mask = np.ones((hh, ww), np.float32) 35 | st_h = np.random.randint(d) 36 | st_w = np.random.randint(d) 37 | if self.use_h: 38 | for i in range(hh//d): 39 | s = d*i + st_h 40 | t = min(s+self.l, hh) 41 | mask[s:t,:] *= 0 42 | if self.use_w: 43 | for i in range(ww//d): 44 | s = d*i + st_w 45 | t = min(s+self.l, ww) 46 | mask[:,s:t] *= 0 47 | 48 | r = np.random.randint(self.rotate) 49 | mask = Image.fromarray(np.uint8(mask)) 50 | mask = mask.rotate(r) 51 | mask = np.asarray(mask) 52 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 53 | 54 | mask = torch.from_numpy(mask).float() 55 | if self.mode == 1: 56 | mask = 1-mask 57 | 58 | mask = mask.expand_as(img) 59 | if self.offset: 60 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float() 61 | offset = (1 - mask) * offset 62 | img = img * mask + offset 63 | else: 64 | img = img * mask 65 | 66 | return img, label 67 | 68 | 69 | class GridMask(nn.Module): 70 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 71 | super(GridMask, self).__init__() 72 | self.use_h = use_h 73 | self.use_w = use_w 74 | self.rotate = rotate 75 | self.offset = offset 76 | self.ratio = ratio 77 | self.mode = mode 78 | self.st_prob = prob 79 | self.prob = prob 80 | 81 | def set_prob(self, epoch, max_epoch): 82 | self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5 83 | 84 | def forward(self, x): 85 | if np.random.rand() > self.prob or not self.training: 86 | return x 87 | n,c,h,w = x.size() 88 | x = x.view(-1,h,w) 89 | hh = int(1.5*h) 90 | ww = int(1.5*w) 91 | d = np.random.randint(2, h) 92 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 93 | mask = np.ones((hh, ww), np.float32) 94 | st_h = np.random.randint(d) 95 | st_w = np.random.randint(d) 96 | if self.use_h: 97 | for i in range(hh//d): 98 | s = d*i + st_h 99 | t = min(s+self.l, hh) 100 | mask[s:t,:] *= 0 101 | if self.use_w: 102 | for i in range(ww//d): 103 | s = d*i + st_w 104 | t = min(s+self.l, ww) 105 | mask[:,s:t] *= 0 106 | 107 | r = np.random.randint(self.rotate) 108 | mask = Image.fromarray(np.uint8(mask)) 109 | mask = mask.rotate(r) 110 | mask = np.asarray(mask) 111 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 112 | 113 | mask = torch.from_numpy(mask).float().cuda() 114 | if self.mode == 1: 115 | mask = 1-mask 116 | mask = mask.expand_as(x) 117 | if self.offset: 118 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float().cuda() 119 | x = x * mask + offset * (1 - mask) 120 | else: 121 | x = x * mask 122 | 123 | return x.view(n,c,h,w) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # VEDet: Viewpoint Equivariance for Multi-View 3D Object Detection (CVPR 2023) 3 | 4 | This is the official implementation of CVPR 2023 paper [**Viewpoint Equivariance for Multi-View 3D Object Detection**](https://arxiv.org/abs/2303.14548) authored by [Dian Chen](https://scholar.google.com/citations?user=zdAyna8AAAAJ&hl=en), [Jie Li](https://scholar.google.com/citations?user=_I3COxAAAAAJ&hl=en), [Vitor Guizilini](https://scholar.google.com/citations?user=UH9tP6QAAAAJ&hl=en), [Rares Ambrus](https://scholar.google.com/citations?user=2xjjS3oAAAAJ&hl=en), and [Adrien Gaidon](https://scholar.google.com/citations?user=2StUgf4AAAAJ&hl=en), at [Toyota Research Institute](https://www.tri.global/). We introduce viewpoint equivariance on view-conditioned object queries achieving state-of-the-art 3D object performance. 5 | 6 | ![framework](media/framework.png) 7 | - [May 4, 2023] Our code and models are released! 8 | - [Mar. 27, 2023] ~~Our code and models will be released soon. Please stay tuned!~~ 9 | 10 | 11 | ## Contents 12 | - [Install](#install) 13 | - [Dataset preparation](#dataset-preparation) 14 | - [Training](#training) 15 | - [Inference](#inference) 16 | - [License](#license) 17 | - [Reference](#reference) 18 | 19 | 20 | ## Install 21 | 22 | We provide instructions for using docker environment and pip/conda environment (docker is recommended for portability and reproducibility). Please refer to [INSTALL.md](docs/INSTALL.md) for detailed instructions. 23 | 24 | ## Dataset preparation 25 | Please download the full [NuScenes dataset from the official website](https://www.nuscenes.org/nuscenes#download), and preprocess the meta data following the [instructions from MMDetection3D](https://github.com/open-mmlab/mmdetection3d/blob/master/docs/en/data_preparation.md) to obtain the `.pkl` files with mmdet3d format. For convenience we provide the preprocessed `.pkl` files for nuscenes dataset [here](https://tri-ml-public.s3.amazonaws.com/github/vedet/nuscenes_infos.zip). Put the `.pkl` files under the NuScenes folder. 26 | 27 | ## Training 28 | To train a model with the provided configs, please run the following: 29 | ```bash 30 | # run distributed training with 8 GPUs 31 | # tools/dist_train.sh 8 --work-dir --cfg-options 32 | 33 | # for example: 34 | tools/dist_train.sh projects/configs/vedet_vovnet_p4_1600x640_2vview_2frame.py 8 --work-dir work_dirs/vedet_vovnet_p4_1600x640_2vview_2frame/ 35 | ``` 36 | Before running the training with V2-99 backbone, please download the [DD3D](https://arxiv.org/abs/2108.06417) pre-trained weights from [here](https://tri-ml-public.s3.amazonaws.com/github/vedet/fcos3d_vovnet_imgbackbone-remapped.pth). 37 | 38 | We provide results on the NuScenes `val` set from the paper, as summarized below. 39 | 40 | | config | mAP | NDS | resolution | backbone | context | download | 41 | |:------:|:---:|:---:|:----------:|:-------:|:-----:|:-----:| 42 | | [vedet_vovnet_p4_1600x640_2vview_2frame](projects/configs/vedet_vovnet_p4_1600x640_2vview_2frame.py) | 0.451 | 0.527 | 1600x640 | V2-99 | current + 1 past frame | [model](https://tri-ml-public.s3.amazonaws.com/github/vedet/vedet_vovnet_p4_1600x640_2vview_2frame/latest.pth) / [log](https://tri-ml-public.s3.amazonaws.com/github/vedet/vedet_vovnet_p4_1600x640_2vview_2frame/20230130_000443.log) | 43 | 44 | 45 | ## Inference 46 | To run inference with a checkpoint, please run the following: 47 | ```bash 48 | # run distributed evaluation with 8 GPUs 49 | # tools/dist_test.sh 8 --eval bbox 50 | 51 | # for example: 52 | tools/dist_test.sh projects/configs/vedet_vovnet_p4_1600x640_2vview_2frame.py work_dirs/vedet_vovnet_p4_1600x640_2vview_2frame/latest.pth 8 --eval bbox 53 | ``` 54 | 55 | ## License 56 | We release this repo under the [CC BY-NC 4.0](LICENSE.md) license. 57 | 58 | ## Reference 59 | If you have any questions, feel free to open an issue under this repo, or contact us at . 60 | If you find this work helpful to your research, please consider citing us: 61 | 62 | ``` 63 | @article{chen2023viewpoint, 64 | title={Viewpoint Equivariance for Multi-View 3D Object Detection}, 65 | author={Chen, Dian and Li, Jie and Guizilini, Vitor and Ambrus, Rares and Gaidon, Adrien}, 66 | journal={arXiv preprint arXiv:2303.14548}, 67 | year={2023} 68 | } 69 | ``` 70 | We also thank the authors of [detr3d](https://github.com/WangYueFt/detr3d) and [petr/petrv2](https://github.com/megvii-research/PETR). 71 | -------------------------------------------------------------------------------- /tools/data_converter/indoor_converter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | import numpy as np 4 | import os 5 | 6 | from tools.data_converter.s3dis_data_utils import S3DISData, S3DISSegData 7 | from tools.data_converter.scannet_data_utils import ScanNetData, ScanNetSegData 8 | from tools.data_converter.sunrgbd_data_utils import SUNRGBDData 9 | 10 | 11 | def create_indoor_info_file(data_path, 12 | pkl_prefix='sunrgbd', 13 | save_path=None, 14 | use_v1=False, 15 | workers=4): 16 | """Create indoor information file. 17 | 18 | Get information of the raw data and save it to the pkl file. 19 | 20 | Args: 21 | data_path (str): Path of the data. 22 | pkl_prefix (str): Prefix of the pkl to be saved. Default: 'sunrgbd'. 23 | save_path (str): Path of the pkl to be saved. Default: None. 24 | use_v1 (bool): Whether to use v1. Default: False. 25 | workers (int): Number of threads to be used. Default: 4. 26 | """ 27 | assert os.path.exists(data_path) 28 | assert pkl_prefix in ['sunrgbd', 'scannet', 's3dis'], \ 29 | f'unsupported indoor dataset {pkl_prefix}' 30 | save_path = data_path if save_path is None else save_path 31 | assert os.path.exists(save_path) 32 | 33 | # generate infos for both detection and segmentation task 34 | if pkl_prefix in ['sunrgbd', 'scannet']: 35 | train_filename = os.path.join(save_path, 36 | f'{pkl_prefix}_infos_train.pkl') 37 | val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl') 38 | if pkl_prefix == 'sunrgbd': 39 | # SUN RGB-D has a train-val split 40 | train_dataset = SUNRGBDData( 41 | root_path=data_path, split='train', use_v1=use_v1) 42 | val_dataset = SUNRGBDData( 43 | root_path=data_path, split='val', use_v1=use_v1) 44 | else: 45 | # ScanNet has a train-val-test split 46 | train_dataset = ScanNetData(root_path=data_path, split='train') 47 | val_dataset = ScanNetData(root_path=data_path, split='val') 48 | test_dataset = ScanNetData(root_path=data_path, split='test') 49 | test_filename = os.path.join(save_path, 50 | f'{pkl_prefix}_infos_test.pkl') 51 | 52 | infos_train = train_dataset.get_infos( 53 | num_workers=workers, has_label=True) 54 | mmcv.dump(infos_train, train_filename, 'pkl') 55 | print(f'{pkl_prefix} info train file is saved to {train_filename}') 56 | 57 | infos_val = val_dataset.get_infos(num_workers=workers, has_label=True) 58 | mmcv.dump(infos_val, val_filename, 'pkl') 59 | print(f'{pkl_prefix} info val file is saved to {val_filename}') 60 | 61 | if pkl_prefix == 'scannet': 62 | infos_test = test_dataset.get_infos( 63 | num_workers=workers, has_label=False) 64 | mmcv.dump(infos_test, test_filename, 'pkl') 65 | print(f'{pkl_prefix} info test file is saved to {test_filename}') 66 | 67 | # generate infos for the semantic segmentation task 68 | # e.g. re-sampled scene indexes and label weights 69 | # scene indexes are used to re-sample rooms with different number of points 70 | # label weights are used to balance classes with different number of points 71 | if pkl_prefix == 'scannet': 72 | # label weight computation function is adopted from 73 | # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24 74 | train_dataset = ScanNetSegData( 75 | data_root=data_path, 76 | ann_file=train_filename, 77 | split='train', 78 | num_points=8192, 79 | label_weight_func=lambda x: 1.0 / np.log(1.2 + x)) 80 | # TODO: do we need to generate on val set? 81 | val_dataset = ScanNetSegData( 82 | data_root=data_path, 83 | ann_file=val_filename, 84 | split='val', 85 | num_points=8192, 86 | label_weight_func=lambda x: 1.0 / np.log(1.2 + x)) 87 | # no need to generate for test set 88 | train_dataset.get_seg_infos() 89 | val_dataset.get_seg_infos() 90 | elif pkl_prefix == 's3dis': 91 | # S3DIS doesn't have a fixed train-val split 92 | # it has 6 areas instead, so we generate info file for each of them 93 | # in training, we will use dataset to wrap different areas 94 | splits = [f'Area_{i}' for i in [1, 2, 3, 4, 5, 6]] 95 | for split in splits: 96 | dataset = S3DISData(root_path=data_path, split=split) 97 | info = dataset.get_infos(num_workers=workers, has_label=True) 98 | filename = os.path.join(save_path, 99 | f'{pkl_prefix}_infos_{split}.pkl') 100 | mmcv.dump(info, filename, 'pkl') 101 | print(f'{pkl_prefix} info {split} file is saved to {filename}') 102 | seg_dataset = S3DISSegData( 103 | data_root=data_path, 104 | ann_file=filename, 105 | split=split, 106 | num_points=4096, 107 | label_weight_func=lambda x: 1.0 / np.log(1.2 + x)) 108 | seg_dataset.get_seg_infos() 109 | -------------------------------------------------------------------------------- /tools/model_converters/convert_votenet_checkpoints.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import tempfile 4 | import torch 5 | from mmcv import Config 6 | from mmcv.runner import load_state_dict 7 | 8 | from mmdet3d.models import build_detector 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | description='MMDet3D upgrade model version(before v0.6.0) of VoteNet') 14 | parser.add_argument('checkpoint', help='checkpoint file') 15 | parser.add_argument('--out', help='path of the output checkpoint file') 16 | args = parser.parse_args() 17 | return args 18 | 19 | 20 | def parse_config(config_strings): 21 | """Parse config from strings. 22 | 23 | Args: 24 | config_strings (string): strings of model config. 25 | 26 | Returns: 27 | Config: model config 28 | """ 29 | temp_file = tempfile.NamedTemporaryFile() 30 | config_path = f'{temp_file.name}.py' 31 | with open(config_path, 'w') as f: 32 | f.write(config_strings) 33 | 34 | config = Config.fromfile(config_path) 35 | 36 | # Update backbone config 37 | if 'pool_mod' in config.model.backbone: 38 | config.model.backbone.pop('pool_mod') 39 | 40 | if 'sa_cfg' not in config.model.backbone: 41 | config.model.backbone['sa_cfg'] = dict( 42 | type='PointSAModule', 43 | pool_mod='max', 44 | use_xyz=True, 45 | normalize_xyz=True) 46 | 47 | if 'type' not in config.model.bbox_head.vote_aggregation_cfg: 48 | config.model.bbox_head.vote_aggregation_cfg['type'] = 'PointSAModule' 49 | 50 | # Update bbox_head config 51 | if 'pred_layer_cfg' not in config.model.bbox_head: 52 | config.model.bbox_head['pred_layer_cfg'] = dict( 53 | in_channels=128, shared_conv_channels=(128, 128), bias=True) 54 | 55 | if 'feat_channels' in config.model.bbox_head: 56 | config.model.bbox_head.pop('feat_channels') 57 | 58 | if 'vote_moudule_cfg' in config.model.bbox_head: 59 | config.model.bbox_head['vote_module_cfg'] = config.model.bbox_head.pop( 60 | 'vote_moudule_cfg') 61 | 62 | if config.model.bbox_head.vote_aggregation_cfg.use_xyz: 63 | config.model.bbox_head.vote_aggregation_cfg.mlp_channels[0] -= 3 64 | 65 | temp_file.close() 66 | 67 | return config 68 | 69 | 70 | def main(): 71 | """Convert keys in checkpoints for VoteNet. 72 | 73 | There can be some breaking changes during the development of mmdetection3d, 74 | and this tool is used for upgrading checkpoints trained with old versions 75 | (before v0.6.0) to the latest one. 76 | """ 77 | args = parse_args() 78 | checkpoint = torch.load(args.checkpoint) 79 | cfg = parse_config(checkpoint['meta']['config']) 80 | # Build the model and load checkpoint 81 | model = build_detector( 82 | cfg.model, 83 | train_cfg=cfg.get('train_cfg'), 84 | test_cfg=cfg.get('test_cfg')) 85 | orig_ckpt = checkpoint['state_dict'] 86 | converted_ckpt = orig_ckpt.copy() 87 | 88 | if cfg['dataset_type'] == 'ScanNetDataset': 89 | NUM_CLASSES = 18 90 | elif cfg['dataset_type'] == 'SUNRGBDDataset': 91 | NUM_CLASSES = 10 92 | else: 93 | raise NotImplementedError 94 | 95 | RENAME_PREFIX = { 96 | 'bbox_head.conv_pred.0': 'bbox_head.conv_pred.shared_convs.layer0', 97 | 'bbox_head.conv_pred.1': 'bbox_head.conv_pred.shared_convs.layer1' 98 | } 99 | 100 | DEL_KEYS = [ 101 | 'bbox_head.conv_pred.0.bn.num_batches_tracked', 102 | 'bbox_head.conv_pred.1.bn.num_batches_tracked' 103 | ] 104 | 105 | EXTRACT_KEYS = { 106 | 'bbox_head.conv_pred.conv_cls.weight': 107 | ('bbox_head.conv_pred.conv_out.weight', [(0, 2), (-NUM_CLASSES, -1)]), 108 | 'bbox_head.conv_pred.conv_cls.bias': 109 | ('bbox_head.conv_pred.conv_out.bias', [(0, 2), (-NUM_CLASSES, -1)]), 110 | 'bbox_head.conv_pred.conv_reg.weight': 111 | ('bbox_head.conv_pred.conv_out.weight', [(2, -NUM_CLASSES)]), 112 | 'bbox_head.conv_pred.conv_reg.bias': 113 | ('bbox_head.conv_pred.conv_out.bias', [(2, -NUM_CLASSES)]) 114 | } 115 | 116 | # Delete some useless keys 117 | for key in DEL_KEYS: 118 | converted_ckpt.pop(key) 119 | 120 | # Rename keys with specific prefix 121 | RENAME_KEYS = dict() 122 | for old_key in converted_ckpt.keys(): 123 | for rename_prefix in RENAME_PREFIX.keys(): 124 | if rename_prefix in old_key: 125 | new_key = old_key.replace(rename_prefix, 126 | RENAME_PREFIX[rename_prefix]) 127 | RENAME_KEYS[new_key] = old_key 128 | for new_key, old_key in RENAME_KEYS.items(): 129 | converted_ckpt[new_key] = converted_ckpt.pop(old_key) 130 | 131 | # Extract weights and rename the keys 132 | for new_key, (old_key, indices) in EXTRACT_KEYS.items(): 133 | cur_layers = orig_ckpt[old_key] 134 | converted_layers = [] 135 | for (start, end) in indices: 136 | if end != -1: 137 | converted_layers.append(cur_layers[start:end]) 138 | else: 139 | converted_layers.append(cur_layers[start:]) 140 | converted_layers = torch.cat(converted_layers, 0) 141 | converted_ckpt[new_key] = converted_layers 142 | if old_key in converted_ckpt.keys(): 143 | converted_ckpt.pop(old_key) 144 | 145 | # Check the converted checkpoint by loading to the model 146 | load_state_dict(model, converted_ckpt, strict=True) 147 | checkpoint['state_dict'] = converted_ckpt 148 | torch.save(checkpoint, args.out) 149 | 150 | 151 | if __name__ == '__main__': 152 | main() 153 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Copyright (c) 2021 megvii-model. All Rights Reserved. 5 | # ------------------------------------------------------------------------ 6 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 7 | # Copyright (c) 2021 Wang, Yue 8 | # ------------------------------------------------------------------------ 9 | # Modified from mmdetection (https://github.com/open-mmlab/mmdetection) 10 | # Copyright (c) OpenMMLab. All rights reserved. 11 | # ------------------------------------------------------------------------ 12 | import torch 13 | 14 | from mmdet.core.bbox.builder import BBOX_ASSIGNERS 15 | from mmdet.core.bbox.assigners import AssignResult 16 | from mmdet.core.bbox.assigners import BaseAssigner 17 | from mmdet.core.bbox.match_costs import build_match_cost 18 | from mmdet.models.utils.transformer import inverse_sigmoid 19 | from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox 20 | 21 | try: 22 | from scipy.optimize import linear_sum_assignment 23 | except ImportError: 24 | linear_sum_assignment = None 25 | 26 | 27 | @BBOX_ASSIGNERS.register_module() 28 | class HungarianAssigner3D(BaseAssigner): 29 | """Computes one-to-one matching between predictions and ground truth. 30 | This class computes an assignment between the targets and the predictions 31 | based on the costs. The costs are weighted sum of three components: 32 | classification cost, regression L1 cost and regression iou cost. The 33 | targets don't include the no_object, so generally there are more 34 | predictions than targets. After the one-to-one matching, the un-matched 35 | are treated as backgrounds. Thus each query prediction will be assigned 36 | with `0` or a positive integer indicating the ground truth index: 37 | - 0: negative sample, no assigned gt 38 | - positive integer: positive sample, index (1-based) of assigned gt 39 | Args: 40 | cls_weight (int | float, optional): The scale factor for classification 41 | cost. Default 1.0. 42 | bbox_weight (int | float, optional): The scale factor for regression 43 | L1 cost. Default 1.0. 44 | iou_weight (int | float, optional): The scale factor for regression 45 | iou cost. Default 1.0. 46 | iou_calculator (dict | optional): The config for the iou calculation. 47 | Default type `BboxOverlaps2D`. 48 | iou_mode (str | optional): "iou" (intersection over union), "iof" 49 | (intersection over foreground), or "giou" (generalized 50 | intersection over union). Default "giou". 51 | """ 52 | 53 | def __init__(self, 54 | cls_cost=dict(type='ClassificationCost', weight=1.), 55 | reg_cost=dict(type='BBoxL1Cost', weight=1.0), 56 | iou_cost=dict(type='IoUCost', weight=0.0), 57 | align_with_loss=False, 58 | pc_range=None): 59 | self.cls_cost = build_match_cost(cls_cost) 60 | self.reg_cost = build_match_cost(reg_cost) 61 | self.iou_cost = build_match_cost(iou_cost) 62 | self.align_with_loss = align_with_loss 63 | self.pc_range = pc_range 64 | 65 | def assign(self, bbox_pred, cls_pred, gt_bboxes, gt_labels, gt_bboxes_ignore=None, code_weights=None, eps=1e-7): 66 | """Computes one-to-one matching based on the weighted costs. 67 | This method assign each query prediction to a ground truth or 68 | background. The `assigned_gt_inds` with -1 means don't care, 69 | 0 means negative sample, and positive number is the index (1-based) 70 | of assigned gt. 71 | The assignment is done in the following steps, the order matters. 72 | 1. assign every prediction to -1 73 | 2. compute the weighted costs 74 | 3. do Hungarian matching on CPU based on the costs 75 | 4. assign all to 0 (background) first, then for each matched pair 76 | between predictions and gts, treat this prediction as foreground 77 | and assign the corresponding gt index (plus 1) to it. 78 | Args: 79 | bbox_pred (Tensor): Predicted boxes with normalized coordinates 80 | (cx, cy, w, h), which are all in range [0, 1]. Shape 81 | [num_query, 4]. 82 | cls_pred (Tensor): Predicted classification logits, shape 83 | [num_query, num_class]. 84 | gt_bboxes (Tensor): Ground truth boxes with unnormalized 85 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 86 | gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,). 87 | gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are 88 | labelled as `ignored`. Default None. 89 | eps (int | float, optional): A value added to the denominator for 90 | numerical stability. Default 1e-7. 91 | Returns: 92 | :obj:`AssignResult`: The assigned result. 93 | """ 94 | assert gt_bboxes_ignore is None, \ 95 | 'Only case when gt_bboxes_ignore is None is supported.' 96 | num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0) 97 | 98 | # 1. assign -1 by default 99 | assigned_gt_inds = bbox_pred.new_full((num_bboxes, ), -1, dtype=torch.long) 100 | assigned_labels = bbox_pred.new_full((num_bboxes, ), -1, dtype=torch.long) 101 | if num_gts == 0 or num_bboxes == 0: 102 | # No ground truth or boxes, return empty assignment 103 | if num_gts == 0: 104 | # No ground truth, assign all to background 105 | assigned_gt_inds[:] = 0 106 | return AssignResult(num_gts, assigned_gt_inds, None, labels=assigned_labels) 107 | 108 | # 2. compute the weighted costs 109 | # classification and bboxcost. 110 | cls_cost = self.cls_cost(cls_pred, gt_labels) 111 | # regression L1 cost 112 | normalized_gt_bboxes = normalize_bbox(gt_bboxes, self.pc_range) 113 | if self.align_with_loss: 114 | normalized_gt_bboxes = normalized_gt_bboxes * code_weights 115 | bbox_pred = bbox_pred * code_weights 116 | reg_cost = self.reg_cost(bbox_pred, normalized_gt_bboxes) 117 | else: 118 | reg_cost = self.reg_cost(bbox_pred[:, :8], normalized_gt_bboxes[:, :8]) 119 | 120 | # weighted sum of above two costs 121 | cost = cls_cost + reg_cost 122 | 123 | # 3. do Hungarian matching on CPU using linear_sum_assignment 124 | cost = cost.detach().cpu() 125 | if linear_sum_assignment is None: 126 | raise ImportError('Please run "pip install scipy" ' 127 | 'to install scipy first.') 128 | cost = torch.nan_to_num(cost, nan=100.0, posinf=100.0, neginf=-100.0) 129 | matched_row_inds, matched_col_inds = linear_sum_assignment(cost) 130 | matched_row_inds = torch.from_numpy(matched_row_inds).to(bbox_pred.device) 131 | matched_col_inds = torch.from_numpy(matched_col_inds).to(bbox_pred.device) 132 | 133 | # 4. assign backgrounds and foregrounds 134 | # assign all indices to backgrounds first 135 | assigned_gt_inds[:] = 0 136 | # assign foregrounds based on matching results 137 | assigned_gt_inds[matched_row_inds] = matched_col_inds + 1 138 | assigned_labels[matched_row_inds] = gt_labels[matched_col_inds] 139 | return AssignResult(num_gts, assigned_gt_inds, None, labels=assigned_labels) 140 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/vedet_transformer.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | import torch 5 | from mmcv.cnn.bricks.transformer import build_transformer_layer_sequence 6 | from mmdet.models.utils.builder import TRANSFORMER 7 | from mmdet.models.utils.transformer import inverse_sigmoid 8 | from mmcv.cnn import xavier_init 9 | from mmcv.runner.base_module import BaseModule 10 | 11 | 12 | @TRANSFORMER.register_module() 13 | class VETransformer(BaseModule): 14 | """Implements the DETR transformer. 15 | Following the official DETR implementation, this module copy-paste 16 | from torch.nn.Transformer with modifications: 17 | * positional encodings are passed in MultiheadAttention 18 | * extra LN at the end of encoder is removed 19 | * decoder returns a stack of activations from all decoding layers 20 | See `paper: End-to-End Object Detection with Transformers 21 | `_ for details. 22 | Args: 23 | encoder (`mmcv.ConfigDict` | Dict): Config of 24 | TransformerEncoder. Defaults to None. 25 | decoder ((`mmcv.ConfigDict` | Dict)): Config of 26 | TransformerDecoder. Defaults to None 27 | init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. 28 | Defaults to None. 29 | """ 30 | 31 | def __init__(self, 32 | det_decoder=None, 33 | seg_decoder=None, 34 | use_iterative_refinement=False, 35 | reduction='ego', 36 | init_cfg=None): 37 | super(VETransformer, self).__init__(init_cfg=init_cfg) 38 | 39 | self.det_decoders = None 40 | if det_decoder is not None: 41 | self.det_decoders = build_transformer_layer_sequence(det_decoder) 42 | 43 | self.seg_decoders = None 44 | if seg_decoder is not None: 45 | self.seg_decoders = build_transformer_layer_sequence(seg_decoder) 46 | 47 | assert reduction in {'ego', 'mean'} 48 | self.reduction = reduction 49 | self.use_iterative_refinement = use_iterative_refinement 50 | 51 | def init_weights(self): 52 | # follow the official DETR to init parameters 53 | for m in self.modules(): 54 | if hasattr(m, 'weight') and m.weight.dim() > 1: 55 | xavier_init(m, distribution='uniform') 56 | self._is_init = True 57 | 58 | def forward(self, 59 | x, 60 | mask, 61 | x_pos, 62 | init_det_points, 63 | init_det_points_mtv, 64 | init_seg_points, 65 | pos_encoder, 66 | pos_seg_encoder, 67 | reg_branch=None, 68 | num_decode_views=2, 69 | **kwargs): 70 | """Forward function for `Transformer`. 71 | Args: 72 | x (Tensor): Input query with shape [bs, c, h, w] where 73 | c = embed_dims. 74 | mask (Tensor): The key_padding_mask used for encoder and decoder, 75 | with shape [bs, h, w]. 76 | query_embed (Tensor): The query embedding for decoder, with shape 77 | [num_query, c]. 78 | pos_embed (Tensor): The positional encoding for encoder and 79 | decoder, with the same shape as `x`. 80 | Returns: 81 | tuple[Tensor]: results of decoder containing the following tensor. 82 | - out_dec: Output from decoder. If return_intermediate_dec \ 83 | is True output has shape [num_dec_layers, bs, 84 | num_query, embed_dims], else has shape [1, bs, \ 85 | num_query, embed_dims]. 86 | - memory: Output results from encoder, with shape \ 87 | [bs, embed_dims, h, w]. 88 | """ 89 | bs, n, hw, c = x.shape 90 | x = x.reshape(bs, n * hw, c) 91 | x_pos = x_pos.reshape(bs, n * hw, -1) 92 | 93 | mask = mask.view(bs, -1) # [bs, n, h*w] -> [bs, n*h*w] 94 | 95 | # segmentation decoders 96 | seg_outputs = [] 97 | if self.seg_decoders is not None: 98 | query_points = init_seg_points.flatten(1, -2) 99 | # query_embeds = pos_encoder(query_points) 100 | query_embeds = pos_seg_encoder(query_points) 101 | query = torch.zeros_like(query_embeds) 102 | 103 | seg_outputs = self.seg_decoders( 104 | query=query.transpose(0, 1), 105 | key=x.transpose(0, 1), 106 | value=x.transpose(0, 1), 107 | key_pos=None, 108 | query_pos=query_embeds.transpose(0, 1), 109 | key_padding_mask=None, 110 | reg_branch=None) 111 | seg_outputs = seg_outputs.transpose(1, 2) 112 | seg_outputs = torch.nan_to_num(seg_outputs) 113 | 114 | # detection decoders 115 | det_outputs, regs = [], [] 116 | if self.det_decoders is not None: 117 | memory = x.transpose(0, 1) 118 | attn_masks = [None, None] 119 | num_query = init_det_points.shape[-2] 120 | total_num = num_query * (1 + num_decode_views) 121 | self_attn_mask = memory.new_ones((total_num, total_num)) 122 | for i in range(1 + num_decode_views): 123 | self_attn_mask[i * num_query:(i + 1) * num_query, i * num_query:(i + 1) * num_query] = 0 124 | attn_masks[0] = self_attn_mask 125 | det_outputs, regs = self.decode_bboxes(init_det_points, init_det_points_mtv, memory, x_pos.transpose(0, 1), 126 | mask, attn_masks, pos_encoder, reg_branch, num_decode_views) 127 | 128 | return det_outputs, regs, seg_outputs 129 | 130 | def decode_bboxes(self, init_det_points, init_det_points_mtv, memory, key_pos, mask, attn_masks, pos_encoder, 131 | reg_branch, num_decode_views): 132 | if init_det_points_mtv is not None: 133 | # append queries from virtual views 134 | query_points = torch.cat([init_det_points, init_det_points_mtv], dim=1).flatten(1, 2) 135 | else: 136 | query_points = init_det_points.flatten(1, 2) 137 | 138 | query_embeds = pos_encoder(query_points) 139 | query = torch.zeros_like(query_embeds) 140 | 141 | regs = [] 142 | # output from layers' won't update next's layer's ref points 143 | det_outputs = self.det_decoders( 144 | query=query.transpose(0, 1), 145 | key=memory, 146 | value=memory, 147 | key_pos=key_pos, 148 | query_pos=query_embeds.transpose(0, 1), 149 | key_padding_mask=mask, 150 | attn_masks=attn_masks, 151 | reg_branch=reg_branch) 152 | det_outputs = det_outputs.transpose(1, 2) 153 | det_outputs = torch.nan_to_num(det_outputs) 154 | 155 | for reg_brch, output in zip(reg_branch, det_outputs): 156 | 157 | reg = reg_brch(output) 158 | reference = inverse_sigmoid(query_points[..., :3].clone()) 159 | reg[..., 0:2] += reference[..., 0:2] 160 | reg[..., 0:2] = reg[..., 0:2].sigmoid() 161 | reg[..., 4:5] += reference[..., 2:3] 162 | reg[..., 4:5] = reg[..., 4:5].sigmoid() 163 | 164 | regs.append(reg) 165 | 166 | L, B, _, C = det_outputs.shape 167 | # (L, B, V + 1, M, C) 168 | det_outputs = det_outputs.reshape(L, B, num_decode_views + 1, -1, C) 169 | # (L, B, V + 1, M, 10) 170 | regs = torch.stack(regs).reshape(L, B, num_decode_views + 1, init_det_points.shape[-2], -1) 171 | 172 | # ego decode + mtv center decode, (L, B, M, V * 10) 173 | regs = regs.permute(0, 1, 3, 2, 4).flatten(-2) 174 | 175 | return det_outputs, regs 176 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/loading.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 5 | # ------------------------------------------------------------------------ 6 | import math 7 | 8 | import mmcv 9 | import numpy as np 10 | from mmdet.datasets.builder import PIPELINES 11 | from einops import rearrange 12 | 13 | 14 | @PIPELINES.register_module() 15 | class LoadMapsFromFiles(object): 16 | 17 | def __init__(self, k=None): 18 | self.k = k 19 | 20 | def __call__(self, results): 21 | map_filename = results['map_filename'] 22 | maps = np.load(map_filename) 23 | map_mask = maps['arr_0'].astype(np.float32) 24 | 25 | maps = map_mask.transpose((2, 0, 1)) 26 | results['gt_map'] = maps 27 | maps = rearrange(maps, 'c (h h1) (w w2) -> (h w) c h1 w2 ', h1=16, w2=16) 28 | maps = maps.reshape(256, 3 * 256) 29 | results['map_shape'] = maps.shape 30 | results['maps'] = maps 31 | return results 32 | 33 | 34 | @PIPELINES.register_module() 35 | class LoadMultiViewImageFromMultiSweepsFiles(object): 36 | """Load multi channel images from a list of separate channel files. 37 | Expects results['img_filename'] to be a list of filenames. 38 | Args: 39 | to_float32 (bool): Whether to convert the img to float32. 40 | Defaults to False. 41 | color_type (str): Color type of the file. Defaults to 'unchanged'. 42 | """ 43 | 44 | def __init__( 45 | self, 46 | sweeps_num=5, 47 | to_float32=False, 48 | file_client_args=dict(backend='disk'), 49 | pad_empty_sweeps=False, 50 | sweep_range=[3, 27], 51 | time_range=-1, 52 | sweeps_id=None, 53 | color_type='unchanged', 54 | sensors=['CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT'], 55 | test_mode=True, 56 | prob=1.0, 57 | ): 58 | 59 | self.sweeps_num = sweeps_num 60 | self.to_float32 = to_float32 61 | self.color_type = color_type 62 | self.file_client_args = file_client_args.copy() 63 | self.file_client = None 64 | self.pad_empty_sweeps = pad_empty_sweeps 65 | self.sensors = sensors 66 | self.test_mode = test_mode 67 | self.sweeps_id = sweeps_id 68 | self.sweep_range = sweep_range 69 | self.time_range = time_range 70 | self.prob = prob 71 | if self.sweeps_id: 72 | assert len(self.sweeps_id) == self.sweeps_num 73 | 74 | def __call__(self, results): 75 | """Call function to load multi-view image from files. 76 | Args: 77 | results (dict): Result dict containing multi-view image filenames. 78 | Returns: 79 | dict: The result dict containing the multi-view image data. \ 80 | Added keys and values are described below. 81 | - filename (str): Multi-view image filenames. 82 | - img (np.ndarray): Multi-view image arrays. 83 | - img_shape (tuple[int]): Shape of multi-view image arrays. 84 | - ori_shape (tuple[int]): Shape of original image arrays. 85 | - pad_shape (tuple[int]): Shape of padded image arrays. 86 | - scale_factor (float): Scale factor. 87 | - img_norm_cfg (dict): Normalization configuration of images. 88 | """ 89 | sweep_imgs_list = [] 90 | timestamp_imgs_list = [] 91 | imgs = results['img'] 92 | img_timestamp = results['img_timestamp'] 93 | lidar_timestamp = results['timestamp'] 94 | img_timestamp = [lidar_timestamp - timestamp for timestamp in img_timestamp] 95 | sweep_imgs_list.extend(imgs) 96 | timestamp_imgs_list.extend(img_timestamp) 97 | nums = len(imgs) 98 | if self.pad_empty_sweeps and len(results['cam_sweeps']) == 0: 99 | for i in range(self.sweeps_num): 100 | sweep_imgs_list.extend(imgs) 101 | mean_time = (self.sweep_range[0] + self.sweep_range[1]) / 2.0 * 0.083 102 | timestamp_imgs_list.extend([time + mean_time for time in img_timestamp]) 103 | for j in range(nums): 104 | results['filename'].append(results['filename'][j]) 105 | results['lidar2img'].append(np.copy(results['lidar2img'][j])) 106 | results['intrinsics'].append(np.copy(results['intrinsics'][j])) 107 | results['extrinsics'].append(np.copy(results['extrinsics'][j])) 108 | else: 109 | if self.sweeps_id: 110 | choices = self.sweeps_id 111 | elif len(results['cam_sweeps']) <= self.sweeps_num: 112 | choices = np.arange(len(results['cam_sweeps'])) 113 | elif self.test_mode: 114 | # choices = [int((self.sweep_range[0] + self.sweep_range[1]) / 2) - 1] 115 | max_range = min(self.sweep_range[1], len(results['cam_sweeps'])) 116 | if max_range - self.sweep_range[0] < self.sweeps_num: 117 | choices = list(range(self.sweep_range[0], max_range)) 118 | choices = (choices * math.ceil(self.sweeps_num / len(choices)))[:self.sweeps_num] 119 | else: 120 | interval = int((max_range - self.sweep_range[0]) / (self.sweeps_num + 1)) 121 | choices = [self.sweep_range[0] + interval * (i + 1) for i in range(self.sweeps_num)] 122 | else: 123 | if np.random.random() < self.prob: 124 | max_range = min(self.sweep_range[1], len(results['cam_sweeps'])) 125 | sweep_range = list(range(self.sweep_range[0], max_range)) 126 | choices = np.random.choice( 127 | sweep_range, self.sweeps_num, replace=max_range - self.sweep_range[0] < self.sweeps_num) 128 | 129 | else: 130 | choices = [int((self.sweep_range[0] + self.sweep_range[1]) / 2) - 1] 131 | 132 | choices = sorted(choices) 133 | for idx in choices: 134 | sweep_idx = min(idx, len(results['cam_sweeps']) - 1) 135 | sweep = results['cam_sweeps'][sweep_idx] 136 | if len(sweep.keys()) < len(self.sensors): 137 | sweep = results['cam_sweeps'][sweep_idx - 1] 138 | results['filename'].extend([sweep[sensor]['data_path'] for sensor in self.sensors]) 139 | 140 | img = np.stack([mmcv.imread(sweep[sensor]['data_path'], self.color_type) for sensor in self.sensors], 141 | axis=-1) 142 | 143 | if self.to_float32: 144 | img = img.astype(np.float32) 145 | img = [img[..., i] for i in range(img.shape[-1])] 146 | sweep_imgs_list.extend(img) 147 | sweep_ts = [lidar_timestamp - sweep[sensor]['timestamp'] / 1e6 for sensor in self.sensors] 148 | timestamp_imgs_list.extend(sweep_ts) 149 | for sensor in self.sensors: 150 | results['lidar2img'].append(sweep[sensor]['lidar2img']) 151 | results['intrinsics'].append(sweep[sensor]['intrinsics']) 152 | # due to inverse convention in our repo 153 | results['extrinsics'].append(np.linalg.inv(sweep[sensor]['extrinsics']).T) 154 | results['img'] = sweep_imgs_list 155 | if self.time_range > 0: 156 | timestamp_imgs_list = [time / self.time_range for time in timestamp_imgs_list] 157 | results['timestamp'] = timestamp_imgs_list 158 | 159 | return results 160 | 161 | def __repr__(self): 162 | """str: Return a string that describes the module.""" 163 | repr_str = self.__class__.__name__ 164 | repr_str += f'(to_float32={self.to_float32}, ' 165 | repr_str += f"color_type='{self.color_type}')" 166 | return repr_str 167 | -------------------------------------------------------------------------------- /tools/analysis_tools/analyze_logs.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import json 4 | import numpy as np 5 | import seaborn as sns 6 | from collections import defaultdict 7 | from matplotlib import pyplot as plt 8 | 9 | 10 | def cal_train_time(log_dicts, args): 11 | for i, log_dict in enumerate(log_dicts): 12 | print(f'{"-" * 5}Analyze train time of {args.json_logs[i]}{"-" * 5}') 13 | all_times = [] 14 | for epoch in log_dict.keys(): 15 | if args.include_outliers: 16 | all_times.append(log_dict[epoch]['time']) 17 | else: 18 | all_times.append(log_dict[epoch]['time'][1:]) 19 | all_times = np.array(all_times) 20 | epoch_ave_time = all_times.mean(-1) 21 | slowest_epoch = epoch_ave_time.argmax() 22 | fastest_epoch = epoch_ave_time.argmin() 23 | std_over_epoch = epoch_ave_time.std() 24 | print(f'slowest epoch {slowest_epoch + 1}, ' 25 | f'average time is {epoch_ave_time[slowest_epoch]:.4f}') 26 | print(f'fastest epoch {fastest_epoch + 1}, ' 27 | f'average time is {epoch_ave_time[fastest_epoch]:.4f}') 28 | print(f'time std over epochs is {std_over_epoch:.4f}') 29 | print(f'average iter time: {np.mean(all_times):.4f} s/iter') 30 | print() 31 | 32 | 33 | def plot_curve(log_dicts, args): 34 | if args.backend is not None: 35 | plt.switch_backend(args.backend) 36 | sns.set_style(args.style) 37 | # if legend is None, use {filename}_{key} as legend 38 | legend = args.legend 39 | if legend is None: 40 | legend = [] 41 | for json_log in args.json_logs: 42 | for metric in args.keys: 43 | legend.append(f'{json_log}_{metric}') 44 | assert len(legend) == (len(args.json_logs) * len(args.keys)) 45 | metrics = args.keys 46 | 47 | num_metrics = len(metrics) 48 | for i, log_dict in enumerate(log_dicts): 49 | epochs = list(log_dict.keys()) 50 | for j, metric in enumerate(metrics): 51 | print(f'plot curve of {args.json_logs[i]}, metric is {metric}') 52 | if metric not in log_dict[epochs[args.interval - 1]]: 53 | raise KeyError( 54 | f'{args.json_logs[i]} does not contain metric {metric}') 55 | 56 | if args.mode == 'eval': 57 | if min(epochs) == args.interval: 58 | x0 = args.interval 59 | else: 60 | # if current training is resumed from previous checkpoint 61 | # we lost information in early epochs 62 | # `xs` should start according to `min(epochs)` 63 | if min(epochs) % args.interval == 0: 64 | x0 = min(epochs) 65 | else: 66 | # find the first epoch that do eval 67 | x0 = min(epochs) + args.interval - \ 68 | min(epochs) % args.interval 69 | xs = np.arange(x0, max(epochs) + 1, args.interval) 70 | ys = [] 71 | for epoch in epochs[args.interval - 1::args.interval]: 72 | ys += log_dict[epoch][metric] 73 | 74 | # if training is aborted before eval of the last epoch 75 | # `xs` and `ys` will have different length and cause an error 76 | # check if `ys[-1]` is empty here 77 | if not log_dict[epoch][metric]: 78 | xs = xs[:-1] 79 | 80 | ax = plt.gca() 81 | ax.set_xticks(xs) 82 | plt.xlabel('epoch') 83 | plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o') 84 | else: 85 | xs = [] 86 | ys = [] 87 | num_iters_per_epoch = \ 88 | log_dict[epochs[args.interval-1]]['iter'][-1] 89 | for epoch in epochs[args.interval - 1::args.interval]: 90 | iters = log_dict[epoch]['iter'] 91 | if log_dict[epoch]['mode'][-1] == 'val': 92 | iters = iters[:-1] 93 | xs.append( 94 | np.array(iters) + (epoch - 1) * num_iters_per_epoch) 95 | ys.append(np.array(log_dict[epoch][metric][:len(iters)])) 96 | xs = np.concatenate(xs) 97 | ys = np.concatenate(ys) 98 | plt.xlabel('iter') 99 | plt.plot( 100 | xs, ys, label=legend[i * num_metrics + j], linewidth=0.5) 101 | plt.legend() 102 | if args.title is not None: 103 | plt.title(args.title) 104 | if args.out is None: 105 | plt.show() 106 | else: 107 | print(f'save curve to: {args.out}') 108 | plt.savefig(args.out) 109 | plt.cla() 110 | 111 | 112 | def add_plot_parser(subparsers): 113 | parser_plt = subparsers.add_parser( 114 | 'plot_curve', help='parser for plotting curves') 115 | parser_plt.add_argument( 116 | 'json_logs', 117 | type=str, 118 | nargs='+', 119 | help='path of train log in json format') 120 | parser_plt.add_argument( 121 | '--keys', 122 | type=str, 123 | nargs='+', 124 | default=['mAP_0.25'], 125 | help='the metric that you want to plot') 126 | parser_plt.add_argument('--title', type=str, help='title of figure') 127 | parser_plt.add_argument( 128 | '--legend', 129 | type=str, 130 | nargs='+', 131 | default=None, 132 | help='legend of each plot') 133 | parser_plt.add_argument( 134 | '--backend', type=str, default=None, help='backend of plt') 135 | parser_plt.add_argument( 136 | '--style', type=str, default='dark', help='style of plt') 137 | parser_plt.add_argument('--out', type=str, default=None) 138 | parser_plt.add_argument('--mode', type=str, default='train') 139 | parser_plt.add_argument('--interval', type=int, default=1) 140 | 141 | 142 | def add_time_parser(subparsers): 143 | parser_time = subparsers.add_parser( 144 | 'cal_train_time', 145 | help='parser for computing the average time per training iteration') 146 | parser_time.add_argument( 147 | 'json_logs', 148 | type=str, 149 | nargs='+', 150 | help='path of train log in json format') 151 | parser_time.add_argument( 152 | '--include-outliers', 153 | action='store_true', 154 | help='include the first value of every epoch when computing ' 155 | 'the average time') 156 | 157 | 158 | def parse_args(): 159 | parser = argparse.ArgumentParser(description='Analyze Json Log') 160 | # currently only support plot curve and calculate average train time 161 | subparsers = parser.add_subparsers(dest='task', help='task parser') 162 | add_plot_parser(subparsers) 163 | add_time_parser(subparsers) 164 | args = parser.parse_args() 165 | return args 166 | 167 | 168 | def load_json_logs(json_logs): 169 | # load and convert json_logs to log_dict, key is epoch, value is a sub dict 170 | # keys of sub dict is different metrics, e.g. memory, bbox_mAP 171 | # value of sub dict is a list of corresponding values of all iterations 172 | log_dicts = [dict() for _ in json_logs] 173 | for json_log, log_dict in zip(json_logs, log_dicts): 174 | with open(json_log, 'r') as log_file: 175 | for line in log_file: 176 | log = json.loads(line.strip()) 177 | # skip lines without `epoch` field 178 | if 'epoch' not in log: 179 | continue 180 | epoch = log.pop('epoch') 181 | if epoch not in log_dict: 182 | log_dict[epoch] = defaultdict(list) 183 | for k, v in log.items(): 184 | log_dict[epoch][k].append(v) 185 | return log_dicts 186 | 187 | 188 | def main(): 189 | args = parse_args() 190 | 191 | json_logs = args.json_logs 192 | for json_log in json_logs: 193 | assert json_log.endswith('.json') 194 | 195 | log_dicts = load_json_logs(json_logs) 196 | 197 | eval(args.task)(log_dicts, args) 198 | 199 | 200 | if __name__ == '__main__': 201 | main() 202 | -------------------------------------------------------------------------------- /tools/data_converter/nuimage_converter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import base64 4 | import mmcv 5 | import numpy as np 6 | from nuimages import NuImages 7 | from nuimages.utils.utils import mask_decode, name_to_index_mapping 8 | from os import path as osp 9 | 10 | nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 11 | 'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 12 | 'barrier') 13 | 14 | NAME_MAPPING = { 15 | 'movable_object.barrier': 'barrier', 16 | 'vehicle.bicycle': 'bicycle', 17 | 'vehicle.bus.bendy': 'bus', 18 | 'vehicle.bus.rigid': 'bus', 19 | 'vehicle.car': 'car', 20 | 'vehicle.construction': 'construction_vehicle', 21 | 'vehicle.motorcycle': 'motorcycle', 22 | 'human.pedestrian.adult': 'pedestrian', 23 | 'human.pedestrian.child': 'pedestrian', 24 | 'human.pedestrian.construction_worker': 'pedestrian', 25 | 'human.pedestrian.police_officer': 'pedestrian', 26 | 'movable_object.trafficcone': 'traffic_cone', 27 | 'vehicle.trailer': 'trailer', 28 | 'vehicle.truck': 'truck', 29 | } 30 | 31 | 32 | def parse_args(): 33 | parser = argparse.ArgumentParser(description='Data converter arg parser') 34 | parser.add_argument( 35 | '--data-root', 36 | type=str, 37 | default='./data/nuimages', 38 | help='specify the root path of dataset') 39 | parser.add_argument( 40 | '--version', 41 | type=str, 42 | nargs='+', 43 | default=['v1.0-mini'], 44 | required=False, 45 | help='specify the dataset version') 46 | parser.add_argument( 47 | '--out-dir', 48 | type=str, 49 | default='./data/nuimages/annotations/', 50 | required=False, 51 | help='path to save the exported json') 52 | parser.add_argument( 53 | '--nproc', 54 | type=int, 55 | default=4, 56 | required=False, 57 | help='workers to process semantic masks') 58 | parser.add_argument('--extra-tag', type=str, default='nuimages') 59 | args = parser.parse_args() 60 | return args 61 | 62 | 63 | def get_img_annos(nuim, img_info, cat2id, out_dir, data_root, seg_root): 64 | """Get semantic segmentation map for an image. 65 | 66 | Args: 67 | nuim (obj:`NuImages`): NuImages dataset object 68 | img_info (dict): Meta information of img 69 | 70 | Returns: 71 | np.ndarray: Semantic segmentation map of the image 72 | """ 73 | sd_token = img_info['token'] 74 | image_id = img_info['id'] 75 | name_to_index = name_to_index_mapping(nuim.category) 76 | 77 | # Get image data. 78 | width, height = img_info['width'], img_info['height'] 79 | semseg_mask = np.zeros((height, width)).astype('uint8') 80 | 81 | # Load stuff / surface regions. 82 | surface_anns = [ 83 | o for o in nuim.surface_ann if o['sample_data_token'] == sd_token 84 | ] 85 | 86 | # Draw stuff / surface regions. 87 | for ann in surface_anns: 88 | # Get color and mask. 89 | category_token = ann['category_token'] 90 | category_name = nuim.get('category', category_token)['name'] 91 | if ann['mask'] is None: 92 | continue 93 | mask = mask_decode(ann['mask']) 94 | 95 | # Draw mask for semantic segmentation. 96 | semseg_mask[mask == 1] = name_to_index[category_name] 97 | 98 | # Load object instances. 99 | object_anns = [ 100 | o for o in nuim.object_ann if o['sample_data_token'] == sd_token 101 | ] 102 | 103 | # Sort by token to ensure that objects always appear in the 104 | # instance mask in the same order. 105 | object_anns = sorted(object_anns, key=lambda k: k['token']) 106 | 107 | # Draw object instances. 108 | # The 0 index is reserved for background; thus, the instances 109 | # should start from index 1. 110 | annotations = [] 111 | for i, ann in enumerate(object_anns, start=1): 112 | # Get color, box, mask and name. 113 | category_token = ann['category_token'] 114 | category_name = nuim.get('category', category_token)['name'] 115 | if ann['mask'] is None: 116 | continue 117 | mask = mask_decode(ann['mask']) 118 | 119 | # Draw masks for semantic segmentation and instance segmentation. 120 | semseg_mask[mask == 1] = name_to_index[category_name] 121 | 122 | if category_name in NAME_MAPPING: 123 | cat_name = NAME_MAPPING[category_name] 124 | cat_id = cat2id[cat_name] 125 | 126 | x_min, y_min, x_max, y_max = ann['bbox'] 127 | # encode calibrated instance mask 128 | mask_anno = dict() 129 | mask_anno['counts'] = base64.b64decode( 130 | ann['mask']['counts']).decode() 131 | mask_anno['size'] = ann['mask']['size'] 132 | 133 | data_anno = dict( 134 | image_id=image_id, 135 | category_id=cat_id, 136 | bbox=[x_min, y_min, x_max - x_min, y_max - y_min], 137 | area=(x_max - x_min) * (y_max - y_min), 138 | segmentation=mask_anno, 139 | iscrowd=0) 140 | annotations.append(data_anno) 141 | 142 | # after process, save semantic masks 143 | img_filename = img_info['file_name'] 144 | seg_filename = img_filename.replace('jpg', 'png') 145 | seg_filename = osp.join(seg_root, seg_filename) 146 | mmcv.imwrite(semseg_mask, seg_filename) 147 | return annotations, np.max(semseg_mask) 148 | 149 | 150 | def export_nuim_to_coco(nuim, data_root, out_dir, extra_tag, version, nproc): 151 | print('Process category information') 152 | categories = [] 153 | categories = [ 154 | dict(id=nus_categories.index(cat_name), name=cat_name) 155 | for cat_name in nus_categories 156 | ] 157 | cat2id = {k_v['name']: k_v['id'] for k_v in categories} 158 | 159 | images = [] 160 | print('Process image meta information...') 161 | for sample_info in mmcv.track_iter_progress(nuim.sample_data): 162 | if sample_info['is_key_frame']: 163 | img_idx = len(images) 164 | images.append( 165 | dict( 166 | id=img_idx, 167 | token=sample_info['token'], 168 | file_name=sample_info['filename'], 169 | width=sample_info['width'], 170 | height=sample_info['height'])) 171 | 172 | seg_root = f'{out_dir}semantic_masks' 173 | mmcv.mkdir_or_exist(seg_root) 174 | mmcv.mkdir_or_exist(osp.join(data_root, 'calibrated')) 175 | 176 | global process_img_anno 177 | 178 | def process_img_anno(img_info): 179 | single_img_annos, max_cls_id = get_img_annos(nuim, img_info, cat2id, 180 | out_dir, data_root, 181 | seg_root) 182 | return single_img_annos, max_cls_id 183 | 184 | print('Process img annotations...') 185 | if nproc > 1: 186 | outputs = mmcv.track_parallel_progress( 187 | process_img_anno, images, nproc=nproc) 188 | else: 189 | outputs = [] 190 | for img_info in mmcv.track_iter_progress(images): 191 | outputs.append(process_img_anno(img_info)) 192 | 193 | # Determine the index of object annotation 194 | print('Process annotation information...') 195 | annotations = [] 196 | max_cls_ids = [] 197 | for single_img_annos, max_cls_id in outputs: 198 | max_cls_ids.append(max_cls_id) 199 | for img_anno in single_img_annos: 200 | img_anno.update(id=len(annotations)) 201 | annotations.append(img_anno) 202 | 203 | max_cls_id = max(max_cls_ids) 204 | print(f'Max ID of class in the semantic map: {max_cls_id}') 205 | 206 | coco_format_json = dict( 207 | images=images, annotations=annotations, categories=categories) 208 | 209 | mmcv.mkdir_or_exist(out_dir) 210 | out_file = osp.join(out_dir, f'{extra_tag}_{version}.json') 211 | print(f'Annotation dumped to {out_file}') 212 | mmcv.dump(coco_format_json, out_file) 213 | 214 | 215 | def main(): 216 | args = parse_args() 217 | for version in args.version: 218 | nuim = NuImages( 219 | dataroot=args.data_root, version=version, verbose=True, lazy=True) 220 | export_nuim_to_coco(nuim, args.data_root, args.out_dir, args.extra_tag, 221 | version, args.nproc) 222 | 223 | 224 | if __name__ == '__main__': 225 | main() 226 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/nuscenes_dataset.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 5 | # ------------------------------------------------------------------------ 6 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 7 | # Copyright (c) 2021 Wang, Yue 8 | # ------------------------------------------------------------------------ 9 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 10 | # Copyright (c) OpenMMLab. All rights reserved. 11 | # ------------------------------------------------------------------------ 12 | import numpy as np 13 | from mmdet.datasets import DATASETS 14 | from mmdet3d.datasets import NuScenesDataset 15 | 16 | 17 | @DATASETS.register_module() 18 | class CustomNuScenesDataset(NuScenesDataset): 19 | r"""NuScenes Dataset. 20 | This datset only add camera intrinsics and extrinsics to the results. 21 | """ 22 | 23 | def __init__(self, 24 | ann_file, 25 | pipeline=None, 26 | data_root=None, 27 | classes=None, 28 | load_interval=1, 29 | with_velocity=True, 30 | modality=None, 31 | box_type_3d='LiDAR', 32 | filter_empty_gt=True, 33 | test_mode=False, 34 | eval_version='detection_cvpr_2019', 35 | use_valid_flag=False): 36 | self.load_interval = load_interval 37 | self.use_valid_flag = use_valid_flag 38 | super().__init__( 39 | data_root=data_root, 40 | ann_file=ann_file, 41 | pipeline=pipeline, 42 | classes=classes, 43 | modality=modality, 44 | box_type_3d=box_type_3d, 45 | filter_empty_gt=filter_empty_gt, 46 | test_mode=test_mode) 47 | 48 | self.with_velocity = with_velocity 49 | self.eval_version = eval_version 50 | from nuscenes.eval.detection.config import config_factory 51 | self.eval_detection_configs = config_factory(self.eval_version) 52 | if self.modality is None: 53 | self.modality = dict( 54 | use_camera=False, 55 | use_lidar=True, 56 | use_radar=False, 57 | use_map=False, 58 | use_external=False, 59 | ) 60 | 61 | def _get_scene_mapping(self): 62 | scene_mapping = dict() 63 | for scene in self.nusc.scene: 64 | idx = 0 65 | sample_token = scene['first_sample_token'] 66 | scene_mapping[sample_token] = (scene['name'], idx) 67 | while sample_token is not '': 68 | idx += 1 69 | sample_token = self.nusc.get('sample', sample_token)['next'] 70 | scene_mapping[sample_token] = (scene['name'], idx) 71 | 72 | return scene_mapping 73 | 74 | def get_data_info(self, index): 75 | """Get data info according to the given index. 76 | Args: 77 | index (int): Index of the sample data to get. 78 | Returns: 79 | dict: Data information that will be passed to the data \ 80 | preprocessing pipelines. It includes the following keys: 81 | 82 | - sample_idx (str): Sample index. 83 | - pts_filename (str): Filename of point clouds. 84 | - sweeps (list[dict]): Infos of sweeps. 85 | - timestamp (float): Sample timestamp. 86 | - img_filename (str, optional): Image filename. 87 | - lidar2img (list[np.ndarray], optional): Transformations \ 88 | from lidar to different cameras. 89 | - ann_info (dict): Annotation info. 90 | """ 91 | info = self.data_infos[index] 92 | # TODO: dirty work-around to use the pre-generated info files 93 | info['lidar_path'] = info['lidar_path'].replace('/data/Dataset/nuScenes', 'data/nuscenes') 94 | for sweep in info.get('cam_sweeps', []): 95 | for _, cam_info in sweep.items(): 96 | if isinstance(cam_info, dict): 97 | cam_info['data_path'] = cam_info['data_path'].replace('/data/Dataset/nuScenes', 'data/nuscenes') 98 | # standard protocal modified from SECOND.Pytorch 99 | input_dict = dict( 100 | sample_idx=info['token'], 101 | pts_filename=info['lidar_path'], 102 | sweeps=info.get('sweeps', []), # lidar sweeps 103 | cam_sweeps=info.get('cam_sweeps', []), # camera sweeps 104 | timestamp=info['timestamp'] / 1e6, 105 | ) 106 | 107 | if self.modality['use_camera']: 108 | image_paths = [] 109 | lidar2img_rts = [] 110 | intrinsics = [] 111 | extrinsics = [] 112 | img_timestamp = [] 113 | for cam_type, cam_info in info['cams'].items(): 114 | img_timestamp.append(cam_info['timestamp'] / 1e6) 115 | # TODO: dirty work-around to use the pre-generated info files 116 | cam_info['data_path'] = cam_info['data_path'].replace('/data/Dataset/nuScenes', 'data/nuscenes') 117 | image_paths.append(cam_info['data_path']) 118 | # obtain lidar to image transformation matrix 119 | lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation']) 120 | lidar2cam_t = cam_info['sensor2lidar_translation'] @ lidar2cam_r.T 121 | lidar2cam_rt = np.eye(4) 122 | lidar2cam_rt[:3, :3] = lidar2cam_r.T 123 | lidar2cam_rt[3, :3] = -lidar2cam_t 124 | intrinsic = cam_info['cam_intrinsic'] 125 | viewpad = np.eye(4) 126 | viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic 127 | lidar2img_rt = (viewpad @ lidar2cam_rt.T) 128 | # NOTE: transformation & points use column major multiplication, i.e., x' = Tx 129 | intrinsics.append(viewpad) 130 | extrinsics.append(np.linalg.inv(lidar2cam_rt.T)) 131 | lidar2img_rts.append(lidar2img_rt) 132 | 133 | input_dict.update( 134 | dict( 135 | img_timestamp=img_timestamp, 136 | img_filename=image_paths, 137 | lidar2img=lidar2img_rts, 138 | intrinsics=intrinsics, 139 | extrinsics=extrinsics)) 140 | 141 | if not self.test_mode: 142 | annos = self.get_ann_info(index) 143 | input_dict['ann_info'] = annos 144 | return input_dict 145 | 146 | def evaluate(self, 147 | results, 148 | metric=['bbox'], 149 | logger=None, 150 | jsonfile_prefix=None, 151 | result_names=['pts_bbox'], 152 | show=False, 153 | out_dir=None, 154 | pipeline=None): 155 | """Evaluation in nuScenes protocol. 156 | 157 | Args: 158 | results (list[dict]): Testing results of the dataset. 159 | metric (str | list[str]): Metrics to be evaluated. 160 | logger (logging.Logger | str | None): Logger used for printing 161 | related information during evaluation. Default: None. 162 | jsonfile_prefix (str | None): The prefix of json files. It includes 163 | the file path and the prefix of filename, e.g., "a/b/prefix". 164 | If not specified, a temp file will be created. Default: None. 165 | show (bool): Whether to visualize. 166 | Default: False. 167 | out_dir (str): Path to save the visualization results. 168 | Default: None. 169 | pipeline (list[dict], optional): raw data loading for showing. 170 | Default: None. 171 | 172 | Returns: 173 | dict[str, float]: Results of each evaluation metric. 174 | """ 175 | results_dict = dict() 176 | if 'bbox' in metric: 177 | result_files, tmp_dir = self.format_results(results, jsonfile_prefix) 178 | 179 | if isinstance(result_files, dict): 180 | for name in result_names: 181 | print('Evaluating bboxes of {}'.format(name)) 182 | ret_dict = self._evaluate_single(result_files[name]) 183 | results_dict.update(ret_dict) 184 | elif isinstance(result_files, str): 185 | results_dict = self._evaluate_single(result_files) 186 | 187 | if tmp_dir is not None: 188 | tmp_dir.cleanup() 189 | 190 | if show: 191 | self.show(results, out_dir, pipeline=pipeline) 192 | 193 | return results_dict 194 | -------------------------------------------------------------------------------- /tools/misc/browse_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import numpy as np 4 | import warnings 5 | from mmcv import Config, DictAction, mkdir_or_exist, track_iter_progress 6 | from os import path as osp 7 | 8 | from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode, 9 | DepthInstance3DBoxes, LiDARInstance3DBoxes) 10 | from mmdet3d.core.visualizer import (show_multi_modality_result, show_result, 11 | show_seg_result) 12 | from mmdet3d.datasets import build_dataset 13 | 14 | 15 | def parse_args(): 16 | parser = argparse.ArgumentParser(description='Browse a dataset') 17 | parser.add_argument('config', help='train config file path') 18 | parser.add_argument( 19 | '--skip-type', 20 | type=str, 21 | nargs='+', 22 | default=['Normalize'], 23 | help='skip some useless pipeline') 24 | parser.add_argument( 25 | '--output-dir', 26 | default=None, 27 | type=str, 28 | help='If there is no display interface, you can save it') 29 | parser.add_argument( 30 | '--task', 31 | type=str, 32 | choices=['det', 'seg', 'multi_modality-det', 'mono-det'], 33 | help='Determine the visualization method depending on the task.') 34 | parser.add_argument( 35 | '--online', 36 | action='store_true', 37 | help='Whether to perform online visualization. Note that you often ' 38 | 'need a monitor to do so.') 39 | parser.add_argument( 40 | '--cfg-options', 41 | nargs='+', 42 | action=DictAction, 43 | help='override some settings in the used config, the key-value pair ' 44 | 'in xxx=yyy format will be merged into config file. If the value to ' 45 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 46 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 47 | 'Note that the quotation marks are necessary and that no white space ' 48 | 'is allowed.') 49 | args = parser.parse_args() 50 | return args 51 | 52 | 53 | def build_data_cfg(config_path, skip_type, cfg_options): 54 | """Build data config for loading visualization data.""" 55 | cfg = Config.fromfile(config_path) 56 | if cfg_options is not None: 57 | cfg.merge_from_dict(cfg_options) 58 | # import modules from string list. 59 | if cfg.get('custom_imports', None): 60 | from mmcv.utils import import_modules_from_strings 61 | import_modules_from_strings(**cfg['custom_imports']) 62 | # extract inner dataset of `RepeatDataset` as `cfg.data.train` 63 | # so we don't need to worry about it later 64 | if cfg.data.train['type'] == 'RepeatDataset': 65 | cfg.data.train = cfg.data.train.dataset 66 | # use only first dataset for `ConcatDataset` 67 | if cfg.data.train['type'] == 'ConcatDataset': 68 | cfg.data.train = cfg.data.train.datasets[0] 69 | train_data_cfg = cfg.data.train 70 | # eval_pipeline purely consists of loading functions 71 | # use eval_pipeline for data loading 72 | train_data_cfg['pipeline'] = [ 73 | x for x in cfg.eval_pipeline if x['type'] not in skip_type 74 | ] 75 | 76 | return cfg 77 | 78 | 79 | def to_depth_mode(points, bboxes): 80 | """Convert points and bboxes to Depth Coord and Depth Box mode.""" 81 | if points is not None: 82 | points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR, 83 | Coord3DMode.DEPTH) 84 | if bboxes is not None: 85 | bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR, 86 | Box3DMode.DEPTH) 87 | return points, bboxes 88 | 89 | 90 | def show_det_data(idx, dataset, out_dir, filename, show=False): 91 | """Visualize 3D point cloud and 3D bboxes.""" 92 | example = dataset.prepare_train_data(idx) 93 | points = example['points']._data.numpy() 94 | gt_bboxes = dataset.get_ann_info(idx)['gt_bboxes_3d'].tensor 95 | if dataset.box_mode_3d != Box3DMode.DEPTH: 96 | points, gt_bboxes = to_depth_mode(points, gt_bboxes) 97 | show_result( 98 | points, 99 | gt_bboxes.clone(), 100 | None, 101 | out_dir, 102 | filename, 103 | show=show, 104 | snapshot=True) 105 | 106 | 107 | def show_seg_data(idx, dataset, out_dir, filename, show=False): 108 | """Visualize 3D point cloud and segmentation mask.""" 109 | example = dataset.prepare_train_data(idx) 110 | points = example['points']._data.numpy() 111 | gt_seg = example['pts_semantic_mask']._data.numpy() 112 | show_seg_result( 113 | points, 114 | gt_seg.copy(), 115 | None, 116 | out_dir, 117 | filename, 118 | np.array(dataset.PALETTE), 119 | dataset.ignore_index, 120 | show=show, 121 | snapshot=True) 122 | 123 | 124 | def show_proj_bbox_img(idx, 125 | dataset, 126 | out_dir, 127 | filename, 128 | show=False, 129 | is_nus_mono=False): 130 | """Visualize 3D bboxes on 2D image by projection.""" 131 | try: 132 | example = dataset.prepare_train_data(idx) 133 | except AttributeError: # for Mono-3D datasets 134 | example = dataset.prepare_train_img(idx) 135 | gt_bboxes = dataset.get_ann_info(idx)['gt_bboxes_3d'] 136 | img_metas = example['img_metas']._data 137 | img = example['img']._data.numpy() 138 | # need to transpose channel to first dim 139 | img = img.transpose(1, 2, 0) 140 | # no 3D gt bboxes, just show img 141 | if gt_bboxes.tensor.shape[0] == 0: 142 | gt_bboxes = None 143 | if isinstance(gt_bboxes, DepthInstance3DBoxes): 144 | show_multi_modality_result( 145 | img, 146 | gt_bboxes, 147 | None, 148 | None, 149 | out_dir, 150 | filename, 151 | box_mode='depth', 152 | img_metas=img_metas, 153 | show=show) 154 | elif isinstance(gt_bboxes, LiDARInstance3DBoxes): 155 | show_multi_modality_result( 156 | img, 157 | gt_bboxes, 158 | None, 159 | img_metas['lidar2img'], 160 | out_dir, 161 | filename, 162 | box_mode='lidar', 163 | img_metas=img_metas, 164 | show=show) 165 | elif isinstance(gt_bboxes, CameraInstance3DBoxes): 166 | show_multi_modality_result( 167 | img, 168 | gt_bboxes, 169 | None, 170 | img_metas['cam2img'], 171 | out_dir, 172 | filename, 173 | box_mode='camera', 174 | img_metas=img_metas, 175 | show=show) 176 | else: 177 | # can't project, just show img 178 | warnings.warn( 179 | f'unrecognized gt box type {type(gt_bboxes)}, only show image') 180 | show_multi_modality_result( 181 | img, None, None, None, out_dir, filename, show=show) 182 | 183 | 184 | def main(): 185 | args = parse_args() 186 | 187 | if args.output_dir is not None: 188 | mkdir_or_exist(args.output_dir) 189 | 190 | cfg = build_data_cfg(args.config, args.skip_type, args.cfg_options) 191 | try: 192 | dataset = build_dataset( 193 | cfg.data.train, default_args=dict(filter_empty_gt=False)) 194 | except TypeError: # seg dataset doesn't have `filter_empty_gt` key 195 | dataset = build_dataset(cfg.data.train) 196 | data_infos = dataset.data_infos 197 | dataset_type = cfg.dataset_type 198 | 199 | # configure visualization mode 200 | vis_task = args.task # 'det', 'seg', 'multi_modality-det', 'mono-det' 201 | 202 | for idx, data_info in enumerate(track_iter_progress(data_infos)): 203 | if dataset_type in ['KittiDataset', 'WaymoDataset']: 204 | data_path = data_info['point_cloud']['velodyne_path'] 205 | elif dataset_type in [ 206 | 'ScanNetDataset', 'SUNRGBDDataset', 'ScanNetSegDataset', 207 | 'S3DISSegDataset', 'S3DISDataset' 208 | ]: 209 | data_path = data_info['pts_path'] 210 | elif dataset_type in ['NuScenesDataset', 'LyftDataset']: 211 | data_path = data_info['lidar_path'] 212 | elif dataset_type in ['NuScenesMonoDataset']: 213 | data_path = data_info['file_name'] 214 | else: 215 | raise NotImplementedError( 216 | f'unsupported dataset type {dataset_type}') 217 | 218 | file_name = osp.splitext(osp.basename(data_path))[0] 219 | 220 | if vis_task in ['det', 'multi_modality-det']: 221 | # show 3D bboxes on 3D point clouds 222 | show_det_data( 223 | idx, dataset, args.output_dir, file_name, show=args.online) 224 | if vis_task in ['multi_modality-det', 'mono-det']: 225 | # project 3D bboxes to 2D image 226 | show_proj_bbox_img( 227 | idx, 228 | dataset, 229 | args.output_dir, 230 | file_name, 231 | show=args.online, 232 | is_nus_mono=(dataset_type == 'NuScenesMonoDataset')) 233 | elif vis_task in ['seg']: 234 | # show 3D segmentation mask on 3D point clouds 235 | show_seg_data( 236 | idx, dataset, args.output_dir, file_name, show=args.online) 237 | 238 | 239 | if __name__ == '__main__': 240 | main() 241 | -------------------------------------------------------------------------------- /tools/data_converter/sunrgbd_data_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | import numpy as np 4 | from concurrent import futures as futures 5 | from os import path as osp 6 | from scipy import io as sio 7 | 8 | 9 | def random_sampling(points, num_points, replace=None, return_choices=False): 10 | """Random sampling. 11 | 12 | Sampling point cloud to a certain number of points. 13 | 14 | Args: 15 | points (ndarray): Point cloud. 16 | num_points (int): The number of samples. 17 | replace (bool): Whether the sample is with or without replacement. 18 | return_choices (bool): Whether to return choices. 19 | 20 | Returns: 21 | points (ndarray): Point cloud after sampling. 22 | """ 23 | 24 | if replace is None: 25 | replace = (points.shape[0] < num_points) 26 | choices = np.random.choice(points.shape[0], num_points, replace=replace) 27 | if return_choices: 28 | return points[choices], choices 29 | else: 30 | return points[choices] 31 | 32 | 33 | class SUNRGBDInstance(object): 34 | 35 | def __init__(self, line): 36 | data = line.split(' ') 37 | data[1:] = [float(x) for x in data[1:]] 38 | self.classname = data[0] 39 | self.xmin = data[1] 40 | self.ymin = data[2] 41 | self.xmax = data[1] + data[3] 42 | self.ymax = data[2] + data[4] 43 | self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax]) 44 | self.centroid = np.array([data[5], data[6], data[7]]) 45 | self.w = data[8] 46 | self.l = data[9] # noqa: E741 47 | self.h = data[10] 48 | self.orientation = np.zeros((3, )) 49 | self.orientation[0] = data[11] 50 | self.orientation[1] = data[12] 51 | self.heading_angle = -1 * np.arctan2(self.orientation[1], 52 | self.orientation[0]) 53 | self.box3d = np.concatenate([ 54 | self.centroid, 55 | np.array([self.l * 2, self.w * 2, self.h * 2, self.heading_angle]) 56 | ]) 57 | 58 | 59 | class SUNRGBDData(object): 60 | """SUNRGBD data. 61 | 62 | Generate scannet infos for sunrgbd_converter. 63 | 64 | Args: 65 | root_path (str): Root path of the raw data. 66 | split (str): Set split type of the data. Default: 'train'. 67 | use_v1 (bool): Whether to use v1. Default: False. 68 | """ 69 | 70 | def __init__(self, root_path, split='train', use_v1=False): 71 | self.root_dir = root_path 72 | self.split = split 73 | self.split_dir = osp.join(root_path, 'sunrgbd_trainval') 74 | self.classes = [ 75 | 'bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 76 | 'night_stand', 'bookshelf', 'bathtub' 77 | ] 78 | self.cat2label = {cat: self.classes.index(cat) for cat in self.classes} 79 | self.label2cat = { 80 | label: self.classes[label] 81 | for label in range(len(self.classes)) 82 | } 83 | assert split in ['train', 'val', 'test'] 84 | split_file = osp.join(self.split_dir, f'{split}_data_idx.txt') 85 | mmcv.check_file_exist(split_file) 86 | self.sample_id_list = map(int, mmcv.list_from_file(split_file)) 87 | self.image_dir = osp.join(self.split_dir, 'image') 88 | self.calib_dir = osp.join(self.split_dir, 'calib') 89 | self.depth_dir = osp.join(self.split_dir, 'depth') 90 | if use_v1: 91 | self.label_dir = osp.join(self.split_dir, 'label_v1') 92 | else: 93 | self.label_dir = osp.join(self.split_dir, 'label') 94 | 95 | def __len__(self): 96 | return len(self.sample_id_list) 97 | 98 | def get_image(self, idx): 99 | img_filename = osp.join(self.image_dir, f'{idx:06d}.jpg') 100 | return mmcv.imread(img_filename) 101 | 102 | def get_image_shape(self, idx): 103 | image = self.get_image(idx) 104 | return np.array(image.shape[:2], dtype=np.int32) 105 | 106 | def get_depth(self, idx): 107 | depth_filename = osp.join(self.depth_dir, f'{idx:06d}.mat') 108 | depth = sio.loadmat(depth_filename)['instance'] 109 | return depth 110 | 111 | def get_calibration(self, idx): 112 | calib_filepath = osp.join(self.calib_dir, f'{idx:06d}.txt') 113 | lines = [line.rstrip() for line in open(calib_filepath)] 114 | Rt = np.array([float(x) for x in lines[0].split(' ')]) 115 | Rt = np.reshape(Rt, (3, 3), order='F').astype(np.float32) 116 | K = np.array([float(x) for x in lines[1].split(' ')]) 117 | K = np.reshape(K, (3, 3), order='F').astype(np.float32) 118 | return K, Rt 119 | 120 | def get_label_objects(self, idx): 121 | label_filename = osp.join(self.label_dir, f'{idx:06d}.txt') 122 | lines = [line.rstrip() for line in open(label_filename)] 123 | objects = [SUNRGBDInstance(line) for line in lines] 124 | return objects 125 | 126 | def get_infos(self, num_workers=4, has_label=True, sample_id_list=None): 127 | """Get data infos. 128 | 129 | This method gets information from the raw data. 130 | 131 | Args: 132 | num_workers (int): Number of threads to be used. Default: 4. 133 | has_label (bool): Whether the data has label. Default: True. 134 | sample_id_list (list[int]): Index list of the sample. 135 | Default: None. 136 | 137 | Returns: 138 | infos (list[dict]): Information of the raw data. 139 | """ 140 | 141 | def process_single_scene(sample_idx): 142 | print(f'{self.split} sample_idx: {sample_idx}') 143 | # convert depth to points 144 | SAMPLE_NUM = 50000 145 | # TODO: Check whether can move the point 146 | # sampling process during training. 147 | pc_upright_depth = self.get_depth(sample_idx) 148 | pc_upright_depth_subsampled = random_sampling( 149 | pc_upright_depth, SAMPLE_NUM) 150 | 151 | info = dict() 152 | pc_info = {'num_features': 6, 'lidar_idx': sample_idx} 153 | info['point_cloud'] = pc_info 154 | 155 | mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points')) 156 | pc_upright_depth_subsampled.tofile( 157 | osp.join(self.root_dir, 'points', f'{sample_idx:06d}.bin')) 158 | 159 | info['pts_path'] = osp.join('points', f'{sample_idx:06d}.bin') 160 | img_path = osp.join('image', f'{sample_idx:06d}.jpg') 161 | image_info = { 162 | 'image_idx': sample_idx, 163 | 'image_shape': self.get_image_shape(sample_idx), 164 | 'image_path': img_path 165 | } 166 | info['image'] = image_info 167 | 168 | K, Rt = self.get_calibration(sample_idx) 169 | calib_info = {'K': K, 'Rt': Rt} 170 | info['calib'] = calib_info 171 | 172 | if has_label: 173 | obj_list = self.get_label_objects(sample_idx) 174 | annotations = {} 175 | annotations['gt_num'] = len([ 176 | obj.classname for obj in obj_list 177 | if obj.classname in self.cat2label.keys() 178 | ]) 179 | if annotations['gt_num'] != 0: 180 | annotations['name'] = np.array([ 181 | obj.classname for obj in obj_list 182 | if obj.classname in self.cat2label.keys() 183 | ]) 184 | annotations['bbox'] = np.concatenate([ 185 | obj.box2d.reshape(1, 4) for obj in obj_list 186 | if obj.classname in self.cat2label.keys() 187 | ], 188 | axis=0) 189 | annotations['location'] = np.concatenate([ 190 | obj.centroid.reshape(1, 3) for obj in obj_list 191 | if obj.classname in self.cat2label.keys() 192 | ], 193 | axis=0) 194 | annotations['dimensions'] = 2 * np.array([ 195 | [obj.l, obj.w, obj.h] for obj in obj_list 196 | if obj.classname in self.cat2label.keys() 197 | ]) # lwh (depth) format 198 | annotations['rotation_y'] = np.array([ 199 | obj.heading_angle for obj in obj_list 200 | if obj.classname in self.cat2label.keys() 201 | ]) 202 | annotations['index'] = np.arange( 203 | len(obj_list), dtype=np.int32) 204 | annotations['class'] = np.array([ 205 | self.cat2label[obj.classname] for obj in obj_list 206 | if obj.classname in self.cat2label.keys() 207 | ]) 208 | annotations['gt_boxes_upright_depth'] = np.stack( 209 | [ 210 | obj.box3d for obj in obj_list 211 | if obj.classname in self.cat2label.keys() 212 | ], 213 | axis=0) # (K,8) 214 | info['annos'] = annotations 215 | return info 216 | 217 | sample_id_list = sample_id_list if \ 218 | sample_id_list is not None else self.sample_id_list 219 | with futures.ThreadPoolExecutor(num_workers) as executor: 220 | infos = executor.map(process_single_scene, sample_id_list) 221 | return list(infos) 222 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Copyright (c) 2021 megvii-model. All Rights Reserved. 5 | # ------------------------------------------------------------------------ 6 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 7 | # Copyright (c) 2021 Wang, Yue 8 | # ------------------------------------------------------------------------ 9 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 10 | # Copyright (c) OpenMMLab. All rights reserved. 11 | # ------------------------------------------------------------------------ 12 | import torch 13 | 14 | from mmdet.core.bbox import BaseBBoxCoder 15 | from mmdet.core.bbox.builder import BBOX_CODERS 16 | from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox 17 | import torch.nn.functional as F 18 | 19 | 20 | @BBOX_CODERS.register_module() 21 | class NMSFreeCoder(BaseBBoxCoder): 22 | """Bbox coder for NMS-free detector. 23 | Args: 24 | pc_range (list[float]): Range of point cloud. 25 | post_center_range (list[float]): Limit of the center. 26 | Default: None. 27 | max_num (int): Max number to be kept. Default: 100. 28 | score_threshold (float): Threshold to filter boxes based on score. 29 | Default: None. 30 | code_size (int): Code size of bboxes. Default: 9 31 | """ 32 | 33 | def __init__(self, 34 | pc_range, 35 | voxel_size=None, 36 | post_center_range=None, 37 | max_num=100, 38 | score_threshold=None, 39 | num_classes=10): 40 | 41 | self.pc_range = pc_range 42 | self.voxel_size = voxel_size 43 | self.post_center_range = post_center_range 44 | self.max_num = max_num 45 | self.score_threshold = score_threshold 46 | self.num_classes = num_classes 47 | 48 | def encode(self): 49 | pass 50 | 51 | def decode_single(self, cls_scores, bbox_preds): 52 | """Decode bboxes. 53 | Args: 54 | cls_scores (Tensor): Outputs from the classification head, \ 55 | shape [num_query, cls_out_channels]. Note \ 56 | cls_out_channels should includes background. 57 | bbox_preds (Tensor): Outputs from the regression \ 58 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 59 | Shape [num_query, 9]. 60 | Returns: 61 | list[dict]: Decoded boxes. 62 | """ 63 | max_num = self.max_num 64 | 65 | cls_scores = cls_scores.sigmoid() 66 | scores, indexs = cls_scores.view(-1).topk(max_num) 67 | labels = indexs % self.num_classes 68 | bbox_index = indexs // self.num_classes 69 | bbox_preds = bbox_preds[bbox_index] 70 | 71 | final_box_preds = denormalize_bbox(bbox_preds, self.pc_range) 72 | final_scores = scores 73 | final_preds = labels 74 | 75 | # use score threshold 76 | if self.score_threshold is not None: 77 | thresh_mask = final_scores > self.score_threshold 78 | if self.post_center_range is not None: 79 | self.post_center_range = torch.tensor(self.post_center_range, device=scores.device) 80 | 81 | mask = (final_box_preds[..., :3] >= self.post_center_range[:3]).all(1) 82 | mask &= (final_box_preds[..., :3] <= self.post_center_range[3:]).all(1) 83 | 84 | if self.score_threshold: 85 | mask &= thresh_mask 86 | 87 | boxes3d = final_box_preds[mask] 88 | scores = final_scores[mask] 89 | labels = final_preds[mask] 90 | predictions_dict = {'bboxes': boxes3d, 'scores': scores, 'labels': labels} 91 | 92 | else: 93 | raise NotImplementedError('Need to reorganize output as a batch, only ' 94 | 'support post_center_range is not None for now!') 95 | return predictions_dict 96 | 97 | def decode(self, preds_dicts): 98 | """Decode bboxes. 99 | Args: 100 | all_cls_scores (Tensor): Outputs from the classification head, \ 101 | shape [nb_dec, bs, num_query, cls_out_channels]. Note \ 102 | cls_out_channels should includes background. 103 | all_bbox_preds (Tensor): Sigmoid outputs from the regression \ 104 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 105 | Shape [nb_dec, bs, num_query, 9]. 106 | Returns: 107 | list[dict]: Decoded boxes. 108 | """ 109 | all_cls_scores = preds_dicts['all_cls_scores'][-1] 110 | all_bbox_preds = preds_dicts['all_bbox_preds'][-1] 111 | 112 | batch_size = all_cls_scores.size()[0] 113 | predictions_list = [] 114 | for i in range(batch_size): 115 | predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i])) 116 | return predictions_list 117 | 118 | 119 | @BBOX_CODERS.register_module() 120 | class NMSFreeClsCoder(BaseBBoxCoder): 121 | """Bbox coder for NMS-free detector. 122 | Args: 123 | pc_range (list[float]): Range of point cloud. 124 | post_center_range (list[float]): Limit of the center. 125 | Default: None. 126 | max_num (int): Max number to be kept. Default: 100. 127 | score_threshold (float): Threshold to filter boxes based on score. 128 | Default: None. 129 | code_size (int): Code size of bboxes. Default: 9 130 | """ 131 | 132 | def __init__(self, 133 | pc_range, 134 | voxel_size=None, 135 | post_center_range=None, 136 | max_num=100, 137 | score_threshold=None, 138 | num_classes=10): 139 | 140 | self.pc_range = pc_range 141 | self.voxel_size = voxel_size 142 | self.post_center_range = post_center_range 143 | self.max_num = max_num 144 | self.score_threshold = score_threshold 145 | self.num_classes = num_classes 146 | 147 | def encode(self): 148 | pass 149 | 150 | def decode_single(self, cls_scores, bbox_preds): 151 | """Decode bboxes. 152 | Args: 153 | cls_scores (Tensor): Outputs from the classification head, \ 154 | shape [num_query, cls_out_channels]. Note \ 155 | cls_out_channels should includes background. 156 | bbox_preds (Tensor): Outputs from the regression \ 157 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 158 | Shape [num_query, 9]. 159 | Returns: 160 | list[dict]: Decoded boxes. 161 | """ 162 | max_num = self.max_num 163 | 164 | # cls_scores = cls_scores.sigmoid() 165 | # scores, indexs = cls_scores.view(-1).topk(max_num) 166 | # labels = indexs % self.num_classes 167 | # bbox_index = indexs // self.num_classes 168 | # bbox_preds = bbox_preds[bbox_index] 169 | 170 | cls_scores, labels = F.softmax(cls_scores, dim=-1)[..., :-1].max(-1) 171 | scores, indexs = cls_scores.view(-1).topk(max_num) 172 | labels = labels[indexs] 173 | bbox_preds = bbox_preds[indexs] 174 | 175 | final_box_preds = denormalize_bbox(bbox_preds, self.pc_range) 176 | final_scores = scores 177 | final_preds = labels 178 | 179 | # use score threshold 180 | if self.score_threshold is not None: 181 | thresh_mask = final_scores > self.score_threshold 182 | if self.post_center_range is not None: 183 | self.post_center_range = torch.tensor(self.post_center_range, device=scores.device) 184 | 185 | mask = (final_box_preds[..., :3] >= self.post_center_range[:3]).all(1) 186 | mask &= (final_box_preds[..., :3] <= self.post_center_range[3:]).all(1) 187 | 188 | if self.score_threshold: 189 | mask &= thresh_mask 190 | 191 | boxes3d = final_box_preds[mask] 192 | scores = final_scores[mask] 193 | labels = final_preds[mask] 194 | predictions_dict = {'bboxes': boxes3d, 'scores': scores, 'labels': labels} 195 | 196 | else: 197 | raise NotImplementedError('Need to reorganize output as a batch, only ' 198 | 'support post_center_range is not None for now!') 199 | return predictions_dict 200 | 201 | def decode(self, preds_dicts): 202 | """Decode bboxes. 203 | Args: 204 | all_cls_scores (Tensor): Outputs from the classification head, \ 205 | shape [nb_dec, bs, num_query, cls_out_channels]. Note \ 206 | cls_out_channels should includes background. 207 | all_bbox_preds (Tensor): Sigmoid outputs from the regression \ 208 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 209 | Shape [nb_dec, bs, num_query, 9]. 210 | Returns: 211 | list[dict]: Decoded boxes. 212 | """ 213 | all_cls_scores = preds_dicts['all_cls_scores'][-1] 214 | all_bbox_preds = preds_dicts['all_bbox_preds'][-1] 215 | 216 | batch_size = all_cls_scores.size()[0] 217 | predictions_list = [] 218 | for i in range(batch_size): 219 | predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i])) 220 | return predictions_list 221 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/necks/cp_fpn.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from mmdetection (https://github.com/open-mmlab/mmdetection) 5 | # Copyright (c) OpenMMLab. All rights reserved. 6 | # ------------------------------------------------------------------------ 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from mmcv.cnn import ConvModule 10 | from mmcv.runner import BaseModule, auto_fp16 11 | 12 | from mmdet.models import NECKS 13 | 14 | ####This FPN remove the unused parameters which can used with checkpoint (with_cp = True in Backbone) 15 | @NECKS.register_module() 16 | class CPFPN(BaseModule): 17 | r"""Feature Pyramid Network. 18 | 19 | This is an implementation of paper `Feature Pyramid Networks for Object 20 | Detection `_. 21 | 22 | Args: 23 | in_channels (List[int]): Number of input channels per scale. 24 | out_channels (int): Number of output channels (used at each scale) 25 | num_outs (int): Number of output scales. 26 | start_level (int): Index of the start input backbone level used to 27 | build the feature pyramid. Default: 0. 28 | end_level (int): Index of the end input backbone level (exclusive) to 29 | build the feature pyramid. Default: -1, which means the last level. 30 | add_extra_convs (bool | str): If bool, it decides whether to add conv 31 | layers on top of the original feature maps. Default to False. 32 | If True, it is equivalent to `add_extra_convs='on_input'`. 33 | If str, it specifies the source feature map of the extra convs. 34 | Only the following options are allowed 35 | 36 | - 'on_input': Last feat map of neck inputs (i.e. backbone feature). 37 | - 'on_lateral': Last feature map after lateral convs. 38 | - 'on_output': The last output feature map after fpn convs. 39 | relu_before_extra_convs (bool): Whether to apply relu before the extra 40 | conv. Default: False. 41 | no_norm_on_lateral (bool): Whether to apply norm on lateral. 42 | Default: False. 43 | conv_cfg (dict): Config dict for convolution layer. Default: None. 44 | norm_cfg (dict): Config dict for normalization layer. Default: None. 45 | act_cfg (str): Config dict for activation layer in ConvModule. 46 | Default: None. 47 | upsample_cfg (dict): Config dict for interpolate layer. 48 | Default: `dict(mode='nearest')` 49 | init_cfg (dict or list[dict], optional): Initialization config dict. 50 | 51 | Example: 52 | >>> import torch 53 | >>> in_channels = [2, 3, 5, 7] 54 | >>> scales = [340, 170, 84, 43] 55 | >>> inputs = [torch.rand(1, c, s, s) 56 | ... for c, s in zip(in_channels, scales)] 57 | >>> self = FPN(in_channels, 11, len(in_channels)).eval() 58 | >>> outputs = self.forward(inputs) 59 | >>> for i in range(len(outputs)): 60 | ... print(f'outputs[{i}].shape = {outputs[i].shape}') 61 | outputs[0].shape = torch.Size([1, 11, 340, 340]) 62 | outputs[1].shape = torch.Size([1, 11, 170, 170]) 63 | outputs[2].shape = torch.Size([1, 11, 84, 84]) 64 | outputs[3].shape = torch.Size([1, 11, 43, 43]) 65 | """ 66 | 67 | def __init__(self, 68 | in_channels, 69 | out_channels, 70 | num_outs, 71 | start_level=0, 72 | end_level=-1, 73 | add_extra_convs=False, 74 | relu_before_extra_convs=False, 75 | no_norm_on_lateral=False, 76 | conv_cfg=None, 77 | norm_cfg=None, 78 | act_cfg=None, 79 | upsample_cfg=dict(mode='nearest'), 80 | init_cfg=dict( 81 | type='Xavier', layer='Conv2d', distribution='uniform')): 82 | super(CPFPN, self).__init__(init_cfg) 83 | assert isinstance(in_channels, list) 84 | self.in_channels = in_channels 85 | self.out_channels = out_channels 86 | self.num_ins = len(in_channels) 87 | self.num_outs = num_outs 88 | self.relu_before_extra_convs = relu_before_extra_convs 89 | self.no_norm_on_lateral = no_norm_on_lateral 90 | self.fp16_enabled = False 91 | self.upsample_cfg = upsample_cfg.copy() 92 | 93 | if end_level == -1: 94 | self.backbone_end_level = self.num_ins 95 | assert num_outs >= self.num_ins - start_level 96 | else: 97 | # if end_level < inputs, no extra level is allowed 98 | self.backbone_end_level = end_level 99 | assert end_level <= len(in_channels) 100 | assert num_outs == end_level - start_level 101 | self.start_level = start_level 102 | self.end_level = end_level 103 | self.add_extra_convs = add_extra_convs 104 | assert isinstance(add_extra_convs, (str, bool)) 105 | if isinstance(add_extra_convs, str): 106 | # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' 107 | assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') 108 | elif add_extra_convs: # True 109 | self.add_extra_convs = 'on_input' 110 | 111 | self.lateral_convs = nn.ModuleList() 112 | self.fpn_convs = nn.ModuleList() 113 | 114 | for i in range(self.start_level, self.backbone_end_level): 115 | l_conv = ConvModule( 116 | in_channels[i], 117 | out_channels, 118 | 1, 119 | conv_cfg=conv_cfg, 120 | norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, 121 | act_cfg=act_cfg, 122 | inplace=False) 123 | self.lateral_convs.append(l_conv) 124 | if i == 0 : 125 | fpn_conv = ConvModule( 126 | out_channels, 127 | out_channels, 128 | 3, 129 | padding=1, 130 | conv_cfg=conv_cfg, 131 | norm_cfg=norm_cfg, 132 | act_cfg=act_cfg, 133 | inplace=False) 134 | self.fpn_convs.append(fpn_conv) 135 | 136 | # add extra conv layers (e.g., RetinaNet) 137 | extra_levels = num_outs - self.backbone_end_level + self.start_level 138 | if self.add_extra_convs and extra_levels >= 1: 139 | for i in range(extra_levels): 140 | if i == 0 and self.add_extra_convs == 'on_input': 141 | in_channels = self.in_channels[self.backbone_end_level - 1] 142 | else: 143 | in_channels = out_channels 144 | extra_fpn_conv = ConvModule( 145 | in_channels, 146 | out_channels, 147 | 3, 148 | stride=2, 149 | padding=1, 150 | conv_cfg=conv_cfg, 151 | norm_cfg=norm_cfg, 152 | act_cfg=act_cfg, 153 | inplace=False) 154 | self.fpn_convs.append(extra_fpn_conv) 155 | 156 | @auto_fp16() 157 | def forward(self, inputs): 158 | """Forward function.""" 159 | assert len(inputs) == len(self.in_channels) 160 | 161 | # build laterals 162 | laterals = [ 163 | lateral_conv(inputs[i + self.start_level]) 164 | for i, lateral_conv in enumerate(self.lateral_convs) 165 | ] 166 | 167 | # build top-down path 168 | used_backbone_levels = len(laterals) 169 | for i in range(used_backbone_levels - 1, 0, -1): 170 | # In some cases, fixing `scale factor` (e.g. 2) is preferred, but 171 | # it cannot co-exist with `size` in `F.interpolate`. 172 | if 'scale_factor' in self.upsample_cfg: 173 | laterals[i - 1] += F.interpolate(laterals[i], 174 | **self.upsample_cfg) 175 | else: 176 | prev_shape = laterals[i - 1].shape[2:] 177 | laterals[i - 1] += F.interpolate( 178 | laterals[i], size=prev_shape, **self.upsample_cfg) 179 | 180 | # build outputs 181 | # part 1: from original levels 182 | outs = [ 183 | self.fpn_convs[i](laterals[i]) if i==0 else laterals[i] for i in range(used_backbone_levels) 184 | ] 185 | # part 2: add extra levels 186 | if self.num_outs > len(outs): 187 | # use max pool to get more levels on top of outputs 188 | # (e.g., Faster R-CNN, Mask R-CNN) 189 | if not self.add_extra_convs: 190 | for i in range(self.num_outs - used_backbone_levels): 191 | outs.append(F.max_pool2d(outs[-1], 1, stride=2)) 192 | # add conv layers on top of original feature maps (RetinaNet) 193 | else: 194 | if self.add_extra_convs == 'on_input': 195 | extra_source = inputs[self.backbone_end_level - 1] 196 | elif self.add_extra_convs == 'on_lateral': 197 | extra_source = laterals[-1] 198 | elif self.add_extra_convs == 'on_output': 199 | extra_source = outs[-1] 200 | else: 201 | raise NotImplementedError 202 | outs.append(self.fpn_convs[used_backbone_levels](extra_source)) 203 | for i in range(used_backbone_levels + 1, self.num_outs): 204 | if self.relu_before_extra_convs: 205 | outs.append(self.fpn_convs[i](F.relu(outs[-1]))) 206 | else: 207 | outs.append(self.fpn_convs[i](outs[-1])) 208 | return tuple(outs) 209 | -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import mmcv 4 | import os 5 | import torch 6 | import warnings 7 | from mmcv import Config, DictAction 8 | from mmcv.cnn import fuse_conv_bn 9 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 10 | from mmcv.runner import (get_dist_info, init_dist, load_checkpoint, 11 | wrap_fp16_model) 12 | 13 | from mmdet3d.apis import single_gpu_test 14 | from mmdet3d.datasets import build_dataloader, build_dataset 15 | from mmdet3d.models import build_model 16 | from mmdet.apis import multi_gpu_test, set_random_seed 17 | from mmdet.datasets import replace_ImageToTensor 18 | 19 | 20 | def parse_args(): 21 | parser = argparse.ArgumentParser( 22 | description='MMDet test (and eval) a model') 23 | parser.add_argument('config', help='test config file path') 24 | parser.add_argument('checkpoint', help='checkpoint file') 25 | parser.add_argument('--out', help='output result file in pickle format') 26 | parser.add_argument( 27 | '--fuse-conv-bn', 28 | action='store_true', 29 | help='Whether to fuse conv and bn, this will slightly increase' 30 | 'the inference speed') 31 | parser.add_argument( 32 | '--format-only', 33 | action='store_true', 34 | help='Format the output results without perform evaluation. It is' 35 | 'useful when you want to format the result to a specific format and ' 36 | 'submit it to the test server') 37 | parser.add_argument( 38 | '--eval', 39 | type=str, 40 | nargs='+', 41 | help='evaluation metrics, which depends on the dataset, e.g., "bbox",' 42 | ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC') 43 | parser.add_argument('--show', action='store_true', help='show results') 44 | parser.add_argument( 45 | '--show-dir', help='directory where results will be saved') 46 | parser.add_argument( 47 | '--gpu-collect', 48 | action='store_true', 49 | help='whether to use gpu to collect results.') 50 | parser.add_argument( 51 | '--tmpdir', 52 | help='tmp directory used for collecting results from multiple ' 53 | 'workers, available when gpu-collect is not specified') 54 | parser.add_argument('--seed', type=int, default=0, help='random seed') 55 | parser.add_argument( 56 | '--deterministic', 57 | action='store_true', 58 | help='whether to set deterministic options for CUDNN backend.') 59 | parser.add_argument( 60 | '--cfg-options', 61 | nargs='+', 62 | action=DictAction, 63 | help='override some settings in the used config, the key-value pair ' 64 | 'in xxx=yyy format will be merged into config file. If the value to ' 65 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 66 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 67 | 'Note that the quotation marks are necessary and that no white space ' 68 | 'is allowed.') 69 | parser.add_argument( 70 | '--options', 71 | nargs='+', 72 | action=DictAction, 73 | help='custom options for evaluation, the key-value pair in xxx=yyy ' 74 | 'format will be kwargs for dataset.evaluate() function (deprecate), ' 75 | 'change to --eval-options instead.') 76 | parser.add_argument( 77 | '--eval-options', 78 | nargs='+', 79 | action=DictAction, 80 | help='custom options for evaluation, the key-value pair in xxx=yyy ' 81 | 'format will be kwargs for dataset.evaluate() function') 82 | parser.add_argument( 83 | '--launcher', 84 | choices=['none', 'pytorch', 'slurm', 'mpi'], 85 | default='none', 86 | help='job launcher') 87 | parser.add_argument('--local_rank', type=int, default=0) 88 | args = parser.parse_args() 89 | if 'LOCAL_RANK' not in os.environ: 90 | os.environ['LOCAL_RANK'] = str(args.local_rank) 91 | 92 | if args.options and args.eval_options: 93 | raise ValueError( 94 | '--options and --eval-options cannot be both specified, ' 95 | '--options is deprecated in favor of --eval-options') 96 | if args.options: 97 | warnings.warn('--options is deprecated in favor of --eval-options') 98 | args.eval_options = args.options 99 | return args 100 | 101 | 102 | def main(): 103 | args = parse_args() 104 | 105 | assert args.out or args.eval or args.format_only or args.show \ 106 | or args.show_dir, \ 107 | ('Please specify at least one operation (save/eval/format/show the ' 108 | 'results / save the results) with the argument "--out", "--eval"' 109 | ', "--format-only", "--show" or "--show-dir"') 110 | 111 | if args.eval and args.format_only: 112 | raise ValueError('--eval and --format_only cannot be both specified') 113 | 114 | if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): 115 | raise ValueError('The output file must be a pkl file.') 116 | 117 | cfg = Config.fromfile(args.config) 118 | if args.cfg_options is not None: 119 | cfg.merge_from_dict(args.cfg_options) 120 | # import modules from string list. 121 | if cfg.get('custom_imports', None): 122 | from mmcv.utils import import_modules_from_strings 123 | import_modules_from_strings(**cfg['custom_imports']) 124 | 125 | # import modules from plguin/xx, registry will be updated 126 | if hasattr(cfg, 'plugin'): 127 | if cfg.plugin: 128 | import importlib 129 | if hasattr(cfg, 'plugin_dir'): 130 | plugin_dir = cfg.plugin_dir 131 | _module_dir = os.path.dirname(plugin_dir) 132 | _module_dir = _module_dir.split('/') 133 | _module_path = _module_dir[0] 134 | 135 | for m in _module_dir[1:]: 136 | _module_path = _module_path + '.' + m 137 | print(_module_path) 138 | plg_lib = importlib.import_module(_module_path) 139 | else: 140 | # import dir is the dirpath for the config file 141 | _module_dir = os.path.dirname(args.config) 142 | _module_dir = _module_dir.split('/') 143 | _module_path = _module_dir[0] 144 | for m in _module_dir[1:]: 145 | _module_path = _module_path + '.' + m 146 | print(_module_path) 147 | plg_lib = importlib.import_module(_module_path) 148 | 149 | # set cudnn_benchmark 150 | if cfg.get('cudnn_benchmark', False): 151 | torch.backends.cudnn.benchmark = True 152 | 153 | cfg.model.pretrained = None 154 | # in case the test dataset is concatenated 155 | samples_per_gpu = 1 156 | if isinstance(cfg.data.test, dict): 157 | cfg.data.test.test_mode = True 158 | samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1) 159 | if samples_per_gpu > 1: 160 | # Replace 'ImageToTensor' to 'DefaultFormatBundle' 161 | cfg.data.test.pipeline = replace_ImageToTensor( 162 | cfg.data.test.pipeline) 163 | elif isinstance(cfg.data.test, list): 164 | for ds_cfg in cfg.data.test: 165 | ds_cfg.test_mode = True 166 | samples_per_gpu = max( 167 | [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test]) 168 | if samples_per_gpu > 1: 169 | for ds_cfg in cfg.data.test: 170 | ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline) 171 | 172 | # init distributed env first, since logger depends on the dist info. 173 | if args.launcher == 'none': 174 | distributed = False 175 | else: 176 | distributed = True 177 | init_dist(args.launcher, **cfg.dist_params) 178 | 179 | # set random seeds 180 | if args.seed is not None: 181 | set_random_seed(args.seed, deterministic=args.deterministic) 182 | 183 | # build the dataloader 184 | dataset = build_dataset(cfg.data.test) 185 | data_loader = build_dataloader( 186 | dataset, 187 | samples_per_gpu=samples_per_gpu, 188 | workers_per_gpu=cfg.data.workers_per_gpu, 189 | dist=distributed, 190 | shuffle=False) 191 | 192 | # build the model and load checkpoint 193 | cfg.model.train_cfg = None 194 | model = build_model(cfg.model, test_cfg=cfg.get('test_cfg')) 195 | fp16_cfg = cfg.get('fp16', None) 196 | if fp16_cfg is not None: 197 | wrap_fp16_model(model) 198 | checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') 199 | if args.fuse_conv_bn: 200 | model = fuse_conv_bn(model) 201 | # old versions did not save class info in checkpoints, this walkaround is 202 | # for backward compatibility 203 | if 'CLASSES' in checkpoint.get('meta', {}): 204 | model.CLASSES = checkpoint['meta']['CLASSES'] 205 | else: 206 | model.CLASSES = dataset.CLASSES 207 | # palette for visualization in segmentation tasks 208 | if 'PALETTE' in checkpoint.get('meta', {}): 209 | model.PALETTE = checkpoint['meta']['PALETTE'] 210 | elif hasattr(dataset, 'PALETTE'): 211 | # segmentation dataset has `PALETTE` attribute 212 | model.PALETTE = dataset.PALETTE 213 | 214 | if not distributed: 215 | model = MMDataParallel(model, device_ids=[0]) 216 | outputs = single_gpu_test(model, data_loader, args.show, args.show_dir) 217 | else: 218 | model = MMDistributedDataParallel( 219 | model.cuda(), 220 | device_ids=[torch.cuda.current_device()], 221 | broadcast_buffers=False) 222 | outputs = multi_gpu_test(model, data_loader, args.tmpdir, 223 | args.gpu_collect) 224 | 225 | rank, _ = get_dist_info() 226 | if rank == 0: 227 | if args.out: 228 | print(f'\nwriting results to {args.out}') 229 | mmcv.dump(outputs, args.out) 230 | kwargs = {} if args.eval_options is None else args.eval_options 231 | if args.format_only: 232 | dataset.format_results(outputs, **kwargs) 233 | if args.eval: 234 | eval_kwargs = cfg.get('evaluation', {}).copy() 235 | # hard-code way to remove EvalHook args 236 | for key in [ 237 | 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', 238 | 'rule' 239 | ]: 240 | eval_kwargs.pop(key, None) 241 | eval_kwargs.update(dict(metric=args.eval, **kwargs)) 242 | print(dataset.evaluate(outputs, **eval_kwargs)) 243 | 244 | 245 | if __name__ == '__main__': 246 | main() 247 | -------------------------------------------------------------------------------- /projects/configs/vedet_vovnet_p4_1600x640_2vview_2frame.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '/workspace/mmlab/mmdetection3d/configs/_base_/datasets/nus-3d.py', 3 | '/workspace/mmlab/mmdetection3d/configs/_base_/default_runtime.py' 4 | ] 5 | backbone_norm_cfg = dict(type='LN', requires_grad=True) 6 | plugin = True 7 | plugin_dir = 'projects/mmdet3d_plugin/' 8 | 9 | log_config = dict( 10 | interval=10, 11 | hooks=[ 12 | dict(type='TextLoggerHook'), 13 | dict(type='TensorboardLoggerHook'), 14 | dict( 15 | type='WandbLoggerHook', 16 | init_kwargs={'project': 'mmdet3d'}, 17 | interval=10, 18 | ) 19 | ]) 20 | 21 | # If point cloud range is changed, the models should also change their point 22 | # cloud range accordingly 23 | point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0] 24 | voxel_size = [0.2, 0.2, 8] 25 | img_norm_cfg = dict(mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395], to_rgb=False) 26 | # For nuScenes we usually do 10-class detection 27 | class_names = [ 28 | 'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier', 'motorcycle', 'bicycle', 'pedestrian', 29 | 'traffic_cone' 30 | ] 31 | input_modality = dict(use_lidar=False, use_camera=True, use_radar=False, use_map=False, use_external=False) 32 | bands, max_freq = 64, 8 33 | num_views = 2 34 | code_weights = [1.0] * 10 + [0.0] * 10 * num_views 35 | code_weights[8] = 0.2 36 | code_weights[9] = 0.2 37 | virtual_weights = 0.2 38 | for i in range(1, num_views + 1): 39 | code_weights[i * 10] = virtual_weights # x 40 | code_weights[i * 10 + 1] = virtual_weights # y 41 | code_weights[i * 10 + 4] = virtual_weights # z 42 | code_weights[i * 10 + 6] = virtual_weights # sin(yaw) 43 | code_weights[i * 10 + 7] = virtual_weights # cos(yaw) 44 | code_weights[i * 10 + 2] = virtual_weights # w 45 | code_weights[i * 10 + 3] = virtual_weights # l 46 | code_weights[i * 10 + 5] = virtual_weights # h 47 | code_weights[i * 10 + 8] = 0.2 * virtual_weights # vx 48 | code_weights[i * 10 + 9] = 0.2 * virtual_weights # vy 49 | model = dict( 50 | type='VEDet', 51 | use_grid_mask=True, 52 | img_backbone=dict( 53 | type='VoVNetCP', 54 | spec_name='V-99-eSE', 55 | norm_eval=True, 56 | frozen_stages=-1, 57 | input_ch=3, 58 | out_features=( 59 | 'stage4', 60 | 'stage5', 61 | )), 62 | img_neck=dict(type='CPFPN', in_channels=[768, 1024], out_channels=256, num_outs=2), 63 | gt_depth_sup=False, # use cache to supervise 64 | pts_bbox_head=dict( 65 | type='VEDetHead', 66 | num_classes=10, 67 | in_channels=256, 68 | num_query=900, 69 | position_range=point_cloud_range, 70 | reg_hidden_dims=[512, 512], 71 | code_size=(num_views + 1) * 10, 72 | code_weights=code_weights, 73 | reg_channels=10, 74 | num_decode_views=num_views, 75 | with_time=True, 76 | det_transformer=dict( 77 | type='VETransformer', 78 | det_decoder=dict( 79 | type='PETRTransformerDecoder', 80 | return_intermediate=True, 81 | num_layers=6, 82 | transformerlayers=dict( 83 | type='PETRTransformerDecoderLayer', 84 | attn_cfgs=[ 85 | dict(type='MultiheadAttention', embed_dims=256, num_heads=8, dropout=0.1), 86 | dict(type='PETRMultiheadAttention', embed_dims=256, num_heads=8, dropout=0.1), 87 | ], 88 | feedforward_channels=2048, 89 | ffn_dropout=0.1, 90 | with_cp=True, 91 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm')), 92 | )), 93 | bbox_coder=dict( 94 | type='NMSFreeCoder', 95 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 96 | pc_range=point_cloud_range, 97 | max_num=300, 98 | voxel_size=voxel_size, 99 | num_classes=10), 100 | input_ray_encoding=dict( 101 | type='FourierMLPEncoding', 102 | input_channels=10, 103 | hidden_dims=[int(1.5 * 10 * 2 * bands)], 104 | embed_dim=256, 105 | fourier_type='linear', 106 | fourier_channels=10 * 2 * bands, 107 | max_frequency=max_freq), 108 | output_det_encoding=dict( 109 | type='FourierMLPEncoding', 110 | input_channels=10, 111 | hidden_dims=[int(1.5 * 10 * 2 * bands)], 112 | embed_dim=256, 113 | fourier_type='linear', 114 | fourier_channels=10 * 2 * bands, 115 | max_frequency=max_freq), 116 | loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=2.0), 117 | loss_bbox=dict(type='L1Loss', loss_weight=0.25), 118 | loss_iou=dict(type='GIoULoss', loss_weight=0.0), 119 | ), 120 | # model training and testing settings 121 | train_cfg=dict( 122 | pts=dict( 123 | grid_size=[512, 512, 1], 124 | voxel_size=voxel_size, 125 | point_cloud_range=point_cloud_range, 126 | out_size_factor=4, 127 | assigner=dict( 128 | type='HungarianAssigner3D', 129 | cls_cost=dict(type='FocalLossCost', weight=2.0), 130 | reg_cost=dict(type='BBox3DL1Cost', weight=0.25), 131 | iou_cost=dict(type='IoUCost', 132 | weight=0.0), # Fake cost. This is just to make it compatible with DETR head. 133 | align_with_loss=True, 134 | pc_range=point_cloud_range)))) 135 | 136 | dataset_type = 'CustomNuScenesDataset' 137 | data_root = 'data/nuscenes/' 138 | 139 | file_client_args = dict(backend='disk') 140 | ida_aug_conf = { 141 | "resize_lim": (0.94, 1.25), 142 | "final_dim": (640, 1600), 143 | "bot_pct_lim": (0.0, 0.0), 144 | "rot_lim": (0.0, 0.0), 145 | "H": 900, 146 | "W": 1600, 147 | "rand_flip": True, 148 | } 149 | meta_keys = ('filename', 'ori_shape', 'img_shape', 'lidar2img', 'depth2img', 'cam2img', 'pad_shape', 'scale_factor', 150 | 'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 151 | 'pcd_trans', 'sample_idx', 'pcd_scale_factor', 'pcd_rotation', 'pts_filename', 'transformation_3d_flow', 152 | 'intrinsics', 'extrinsics', 'scale_ratio', 'dec_extrinsics', 'timestamp') 153 | train_pipeline = [ 154 | dict(type='LoadMultiViewImageFromFiles', to_float32=True), 155 | dict( 156 | type='LoadMultiViewImageFromMultiSweepsFiles', 157 | sweeps_num=1, 158 | to_float32=True, 159 | pad_empty_sweeps=True, 160 | test_mode=False, 161 | time_range=3, 162 | sweep_range=[3, 27]), 163 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_attr_label=False), 164 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 165 | dict(type='ObjectNameFilter', classes=class_names), 166 | dict(type='ResizeCropFlipImageFull3D', data_aug_conf=ida_aug_conf, training=True), 167 | dict( 168 | type='GlobalRotScaleTransImage', 169 | rot_range=[-0.3925, 0.3925], 170 | translation_std=[0, 0, 0], 171 | scale_ratio_range=[0.95, 1.05], 172 | reverse_angle=True, 173 | training=True), 174 | dict(type='ComputeMultiviewTargets', local_frame=True, visible_only=False, use_virtual=True, num_views=num_views), 175 | dict(type='NormalizeMultiviewImage', **img_norm_cfg), 176 | dict(type='PadMultiViewImage', size_divisor=32), 177 | dict(type='DefaultFormatBundle3D', class_names=class_names), 178 | dict(type='Collect3D', keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'], meta_keys=meta_keys) 179 | ] 180 | test_pipeline = [ 181 | dict(type='LoadMultiViewImageFromFiles', to_float32=True), 182 | dict( 183 | type='LoadMultiViewImageFromMultiSweepsFiles', 184 | sweeps_num=1, 185 | to_float32=True, 186 | pad_empty_sweeps=True, 187 | time_range=3, 188 | sweep_range=[3, 27]), 189 | dict(type='ResizeCropFlipImageFull3D', data_aug_conf=ida_aug_conf, training=False), 190 | dict(type='ComputeMultiviewTargets', local_frame=True, visible_only=False), 191 | dict(type='NormalizeMultiviewImage', **img_norm_cfg), 192 | dict(type='PadMultiViewImage', size_divisor=32), 193 | dict( 194 | type='MultiScaleFlipAug3D', 195 | img_scale=(1333, 800), 196 | pts_scale_ratio=1, 197 | flip=False, 198 | transforms=[ 199 | dict(type='DefaultFormatBundle3D', class_names=class_names, with_label=False), 200 | dict(type='Collect3D', keys=['img'], meta_keys=meta_keys) 201 | ]) 202 | ] 203 | 204 | data = dict( 205 | samples_per_gpu=1, 206 | workers_per_gpu=4, 207 | train=dict( 208 | type=dataset_type, 209 | data_root=data_root, 210 | ann_file=data_root + 'mmdet3d_nuscenes_30f_infos_train.pkl', 211 | pipeline=train_pipeline, 212 | classes=class_names, 213 | modality=input_modality, 214 | test_mode=False, 215 | use_valid_flag=True, 216 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 217 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 218 | box_type_3d='LiDAR'), 219 | val=dict( 220 | type=dataset_type, 221 | pipeline=test_pipeline, 222 | ann_file=data_root + 'mmdet3d_nuscenes_30f_infos_val.pkl', 223 | classes=class_names, 224 | modality=input_modality), 225 | test=dict( 226 | type=dataset_type, 227 | pipeline=test_pipeline, 228 | ann_file=data_root + 'mmdet3d_nuscenes_30f_infos_val.pkl', 229 | classes=class_names, 230 | modality=input_modality)) 231 | 232 | optimizer = dict( 233 | type='AdamW', lr=2e-4, paramwise_cfg=dict(custom_keys={ 234 | 'img_backbone': dict(lr_mult=0.1), 235 | }), weight_decay=0.01) 236 | 237 | optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512., grad_clip=dict(max_norm=35, norm_type=2)) 238 | 239 | # learning policy 240 | lr_config = dict( 241 | policy='CosineAnnealing', 242 | warmup='linear', 243 | warmup_iters=500, 244 | warmup_ratio=1.0 / 3, 245 | min_lr_ratio=1e-3, 246 | ) 247 | total_epochs = 24 248 | evaluation = dict(interval=2, pipeline=test_pipeline, metric=['bbox']) 249 | checkpoint_config = dict(interval=24) 250 | find_unused_parameters = False 251 | 252 | runner = dict(type='EpochBasedRunner', max_epochs=total_epochs) 253 | load_from = 'ckpts/fcos3d_vovnet_imgbackbone-remapped.pth' 254 | resume_from = None 255 | -------------------------------------------------------------------------------- /tools/data_converter/s3dis_data_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | import numpy as np 4 | import os 5 | from concurrent import futures as futures 6 | from os import path as osp 7 | 8 | 9 | class S3DISData(object): 10 | """S3DIS data. 11 | 12 | Generate s3dis infos for s3dis_converter. 13 | 14 | Args: 15 | root_path (str): Root path of the raw data. 16 | split (str): Set split type of the data. Default: 'Area_1'. 17 | """ 18 | 19 | def __init__(self, root_path, split='Area_1'): 20 | self.root_dir = root_path 21 | self.split = split 22 | self.data_dir = osp.join(root_path, 23 | 'Stanford3dDataset_v1.2_Aligned_Version') 24 | 25 | # Following `GSDN `_, use 5 furniture 26 | # classes for detection: table, chair, sofa, bookcase, board. 27 | self.cat_ids = np.array([7, 8, 9, 10, 11]) 28 | self.cat_ids2class = { 29 | cat_id: i 30 | for i, cat_id in enumerate(list(self.cat_ids)) 31 | } 32 | 33 | assert split in [ 34 | 'Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_6' 35 | ] 36 | self.sample_id_list = os.listdir(osp.join(self.data_dir, 37 | split)) # conferenceRoom_1 38 | for sample_id in self.sample_id_list: 39 | if os.path.isfile(osp.join(self.data_dir, split, sample_id)): 40 | self.sample_id_list.remove(sample_id) 41 | 42 | def __len__(self): 43 | return len(self.sample_id_list) 44 | 45 | def get_infos(self, num_workers=4, has_label=True, sample_id_list=None): 46 | """Get data infos. 47 | 48 | This method gets information from the raw data. 49 | 50 | Args: 51 | num_workers (int): Number of threads to be used. Default: 4. 52 | has_label (bool): Whether the data has label. Default: True. 53 | sample_id_list (list[int]): Index list of the sample. 54 | Default: None. 55 | 56 | Returns: 57 | infos (list[dict]): Information of the raw data. 58 | """ 59 | 60 | def process_single_scene(sample_idx): 61 | print(f'{self.split} sample_idx: {sample_idx}') 62 | info = dict() 63 | pc_info = { 64 | 'num_features': 6, 65 | 'lidar_idx': f'{self.split}_{sample_idx}' 66 | } 67 | info['point_cloud'] = pc_info 68 | pts_filename = osp.join(self.root_dir, 's3dis_data', 69 | f'{self.split}_{sample_idx}_point.npy') 70 | pts_instance_mask_path = osp.join( 71 | self.root_dir, 's3dis_data', 72 | f'{self.split}_{sample_idx}_ins_label.npy') 73 | pts_semantic_mask_path = osp.join( 74 | self.root_dir, 's3dis_data', 75 | f'{self.split}_{sample_idx}_sem_label.npy') 76 | 77 | points = np.load(pts_filename).astype(np.float32) 78 | pts_instance_mask = np.load(pts_instance_mask_path).astype(np.int) 79 | pts_semantic_mask = np.load(pts_semantic_mask_path).astype(np.int) 80 | 81 | mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points')) 82 | mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask')) 83 | mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask')) 84 | 85 | points.tofile( 86 | osp.join(self.root_dir, 'points', 87 | f'{self.split}_{sample_idx}.bin')) 88 | pts_instance_mask.tofile( 89 | osp.join(self.root_dir, 'instance_mask', 90 | f'{self.split}_{sample_idx}.bin')) 91 | pts_semantic_mask.tofile( 92 | osp.join(self.root_dir, 'semantic_mask', 93 | f'{self.split}_{sample_idx}.bin')) 94 | 95 | info['pts_path'] = osp.join('points', 96 | f'{self.split}_{sample_idx}.bin') 97 | info['pts_instance_mask_path'] = osp.join( 98 | 'instance_mask', f'{self.split}_{sample_idx}.bin') 99 | info['pts_semantic_mask_path'] = osp.join( 100 | 'semantic_mask', f'{self.split}_{sample_idx}.bin') 101 | info['annos'] = self.get_bboxes(points, pts_instance_mask, 102 | pts_semantic_mask) 103 | 104 | return info 105 | 106 | sample_id_list = sample_id_list if sample_id_list is not None \ 107 | else self.sample_id_list 108 | with futures.ThreadPoolExecutor(num_workers) as executor: 109 | infos = executor.map(process_single_scene, sample_id_list) 110 | return list(infos) 111 | 112 | def get_bboxes(self, points, pts_instance_mask, pts_semantic_mask): 113 | """Convert instance masks to axis-aligned bounding boxes. 114 | 115 | Args: 116 | points (np.array): Scene points of shape (n, 6). 117 | pts_instance_mask (np.ndarray): Instance labels of shape (n,). 118 | pts_semantic_mask (np.ndarray): Semantic labels of shape (n,). 119 | 120 | Returns: 121 | dict: A dict containing detection infos with following keys: 122 | 123 | - gt_boxes_upright_depth (np.ndarray): Bounding boxes 124 | of shape (n, 6) 125 | - class (np.ndarray): Box labels of shape (n,) 126 | - gt_num (int): Number of boxes. 127 | """ 128 | bboxes, labels = [], [] 129 | for i in range(1, pts_instance_mask.max()): 130 | ids = pts_instance_mask == i 131 | # check if all instance points have same semantic label 132 | assert pts_semantic_mask[ids].min() == pts_semantic_mask[ids].max() 133 | label = pts_semantic_mask[ids][0] 134 | # keep only furniture objects 135 | if label in self.cat_ids2class: 136 | labels.append(self.cat_ids2class[pts_semantic_mask[ids][0]]) 137 | pts = points[:, :3][ids] 138 | min_pts = pts.min(axis=0) 139 | max_pts = pts.max(axis=0) 140 | locations = (min_pts + max_pts) / 2 141 | dimensions = max_pts - min_pts 142 | bboxes.append(np.concatenate((locations, dimensions))) 143 | annotation = dict() 144 | # follow ScanNet and SUN RGB-D keys 145 | annotation['gt_boxes_upright_depth'] = np.array(bboxes) 146 | annotation['class'] = np.array(labels) 147 | annotation['gt_num'] = len(labels) 148 | return annotation 149 | 150 | 151 | class S3DISSegData(object): 152 | """S3DIS dataset used to generate infos for semantic segmentation task. 153 | 154 | Args: 155 | data_root (str): Root path of the raw data. 156 | ann_file (str): The generated scannet infos. 157 | split (str): Set split type of the data. Default: 'train'. 158 | num_points (int): Number of points in each data input. Default: 8192. 159 | label_weight_func (function): Function to compute the label weight. 160 | Default: None. 161 | """ 162 | 163 | def __init__(self, 164 | data_root, 165 | ann_file, 166 | split='Area_1', 167 | num_points=4096, 168 | label_weight_func=None): 169 | self.data_root = data_root 170 | self.data_infos = mmcv.load(ann_file) 171 | self.split = split 172 | self.num_points = num_points 173 | 174 | self.all_ids = np.arange(13) # all possible ids 175 | self.cat_ids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 176 | 12]) # used for seg task 177 | self.ignore_index = len(self.cat_ids) 178 | 179 | self.cat_id2class = np.ones((self.all_ids.shape[0],), dtype=np.int) * \ 180 | self.ignore_index 181 | for i, cat_id in enumerate(self.cat_ids): 182 | self.cat_id2class[cat_id] = i 183 | 184 | # label weighting function is taken from 185 | # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24 186 | self.label_weight_func = (lambda x: 1.0 / np.log(1.2 + x)) if \ 187 | label_weight_func is None else label_weight_func 188 | 189 | def get_seg_infos(self): 190 | scene_idxs, label_weight = self.get_scene_idxs_and_label_weight() 191 | save_folder = osp.join(self.data_root, 'seg_info') 192 | mmcv.mkdir_or_exist(save_folder) 193 | np.save( 194 | osp.join(save_folder, f'{self.split}_resampled_scene_idxs.npy'), 195 | scene_idxs) 196 | np.save( 197 | osp.join(save_folder, f'{self.split}_label_weight.npy'), 198 | label_weight) 199 | print(f'{self.split} resampled scene index and label weight saved') 200 | 201 | def _convert_to_label(self, mask): 202 | """Convert class_id in loaded segmentation mask to label.""" 203 | if isinstance(mask, str): 204 | if mask.endswith('npy'): 205 | mask = np.load(mask) 206 | else: 207 | mask = np.fromfile(mask, dtype=np.long) 208 | label = self.cat_id2class[mask] 209 | return label 210 | 211 | def get_scene_idxs_and_label_weight(self): 212 | """Compute scene_idxs for data sampling and label weight for loss \ 213 | calculation. 214 | 215 | We sample more times for scenes with more points. Label_weight is 216 | inversely proportional to number of class points. 217 | """ 218 | num_classes = len(self.cat_ids) 219 | num_point_all = [] 220 | label_weight = np.zeros((num_classes + 1, )) # ignore_index 221 | for data_info in self.data_infos: 222 | label = self._convert_to_label( 223 | osp.join(self.data_root, data_info['pts_semantic_mask_path'])) 224 | num_point_all.append(label.shape[0]) 225 | class_count, _ = np.histogram(label, range(num_classes + 2)) 226 | label_weight += class_count 227 | 228 | # repeat scene_idx for num_scene_point // num_sample_point times 229 | sample_prob = np.array(num_point_all) / float(np.sum(num_point_all)) 230 | num_iter = int(np.sum(num_point_all) / float(self.num_points)) 231 | scene_idxs = [] 232 | for idx in range(len(self.data_infos)): 233 | scene_idxs.extend([idx] * int(round(sample_prob[idx] * num_iter))) 234 | scene_idxs = np.array(scene_idxs).astype(np.int32) 235 | 236 | # calculate label weight, adopted from PointNet++ 237 | label_weight = label_weight[:-1].astype(np.float32) 238 | label_weight = label_weight / label_weight.sum() 239 | label_weight = self.label_weight_func(label_weight).astype(np.float32) 240 | 241 | return scene_idxs, label_weight 242 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 5 | # Copyright (c) 2021 Wang, Yue 6 | # ------------------------------------------------------------------------ 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 8 | # Copyright (c) OpenMMLab. All rights reserved. 9 | # ------------------------------------------------------------------------ 10 | 11 | from __future__ import division 12 | 13 | import argparse 14 | import copy 15 | import mmcv 16 | import os 17 | import time 18 | import torch 19 | import warnings 20 | from mmcv import Config, DictAction 21 | from mmcv.runner import get_dist_info, init_dist 22 | from os import path as osp 23 | 24 | from mmdet import __version__ as mmdet_version 25 | from mmdet3d import __version__ as mmdet3d_version 26 | from mmdet3d.apis import train_model 27 | from mmdet3d.datasets import build_dataset 28 | from mmdet3d.models import build_model 29 | from mmdet3d.utils import collect_env, get_root_logger 30 | from mmdet.apis import set_random_seed 31 | from mmseg import __version__ as mmseg_version 32 | from mmdet.utils import get_device 33 | 34 | def parse_args(): 35 | parser = argparse.ArgumentParser(description='Train a detector') 36 | parser.add_argument('config', help='train config file path') 37 | parser.add_argument('--work-dir', help='the dir to save logs and models') 38 | parser.add_argument( 39 | '--resume-from', help='the checkpoint file to resume from') 40 | parser.add_argument( 41 | '--no-validate', 42 | action='store_true', 43 | help='whether not to evaluate the checkpoint during training') 44 | group_gpus = parser.add_mutually_exclusive_group() 45 | group_gpus.add_argument( 46 | '--gpus', 47 | type=int, 48 | help='number of gpus to use ' 49 | '(only applicable to non-distributed training)') 50 | group_gpus.add_argument( 51 | '--gpu-ids', 52 | type=int, 53 | nargs='+', 54 | help='ids of gpus to use ' 55 | '(only applicable to non-distributed training)') 56 | parser.add_argument('--seed', type=int, default=0, help='random seed') 57 | parser.add_argument( 58 | '--deterministic', 59 | action='store_true', 60 | help='whether to set deterministic options for CUDNN backend.') 61 | parser.add_argument( 62 | '--options', 63 | nargs='+', 64 | action=DictAction, 65 | help='override some settings in the used config, the key-value pair ' 66 | 'in xxx=yyy format will be merged into config file (deprecate), ' 67 | 'change to --cfg-options instead.') 68 | parser.add_argument( 69 | '--cfg-options', 70 | nargs='+', 71 | action=DictAction, 72 | help='override some settings in the used config, the key-value pair ' 73 | 'in xxx=yyy format will be merged into config file. If the value to ' 74 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 75 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 76 | 'Note that the quotation marks are necessary and that no white space ' 77 | 'is allowed.') 78 | parser.add_argument( 79 | '--launcher', 80 | choices=['none', 'pytorch', 'slurm', 'mpi'], 81 | default='none', 82 | help='job launcher') 83 | parser.add_argument('--local_rank', type=int, default=0) 84 | parser.add_argument( 85 | '--autoscale-lr', 86 | action='store_true', 87 | help='automatically scale lr with the number of gpus') 88 | args = parser.parse_args() 89 | if 'LOCAL_RANK' not in os.environ: 90 | os.environ['LOCAL_RANK'] = str(args.local_rank) 91 | 92 | if args.options and args.cfg_options: 93 | raise ValueError( 94 | '--options and --cfg-options cannot be both specified, ' 95 | '--options is deprecated in favor of --cfg-options') 96 | if args.options: 97 | warnings.warn('--options is deprecated in favor of --cfg-options') 98 | args.cfg_options = args.options 99 | 100 | return args 101 | 102 | 103 | def main(): 104 | args = parse_args() 105 | 106 | cfg = Config.fromfile(args.config) 107 | if args.cfg_options is not None: 108 | cfg.merge_from_dict(args.cfg_options) 109 | # import modules from string list. 110 | if cfg.get('custom_imports', None): 111 | from mmcv.utils import import_modules_from_strings 112 | import_modules_from_strings(**cfg['custom_imports']) 113 | 114 | # import modules from plguin/xx, registry will be updated 115 | if hasattr(cfg, 'plugin'): 116 | if cfg.plugin: 117 | import importlib 118 | if hasattr(cfg, 'plugin_dir'): 119 | plugin_dir = cfg.plugin_dir 120 | _module_dir = os.path.dirname(plugin_dir) 121 | _module_dir = _module_dir.split('/') 122 | _module_path = _module_dir[0] 123 | 124 | for m in _module_dir[1:]: 125 | _module_path = _module_path + '.' + m 126 | print(_module_path) 127 | plg_lib = importlib.import_module(_module_path) 128 | else: 129 | # import dir is the dirpath for the config file 130 | _module_dir = os.path.dirname(args.config) 131 | _module_dir = _module_dir.split('/') 132 | _module_path = _module_dir[0] 133 | for m in _module_dir[1:]: 134 | _module_path = _module_path + '.' + m 135 | print(_module_path) 136 | plg_lib = importlib.import_module(_module_path) 137 | 138 | # set cudnn_benchmark 139 | if cfg.get('cudnn_benchmark', False): 140 | torch.backends.cudnn.benchmark = True 141 | 142 | # work_dir is determined in this priority: CLI > segment in file > filename 143 | if args.work_dir is not None: 144 | # update configs according to CLI args if args.work_dir is not None 145 | cfg.work_dir = args.work_dir 146 | elif cfg.get('work_dir', None) is None: 147 | # use config filename as default work_dir if cfg.work_dir is None 148 | cfg.work_dir = osp.join('./work_dirs', 149 | osp.splitext(osp.basename(args.config))[0]) 150 | if args.resume_from is not None: 151 | cfg.resume_from = args.resume_from 152 | if args.gpu_ids is not None: 153 | cfg.gpu_ids = args.gpu_ids 154 | else: 155 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) 156 | 157 | if args.autoscale_lr: 158 | # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) 159 | cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 160 | 161 | # init distributed env first, since logger depends on the dist info. 162 | if args.launcher == 'none': 163 | distributed = False 164 | else: 165 | distributed = True 166 | init_dist(args.launcher, **cfg.dist_params) 167 | # re-set gpu_ids with distributed training mode 168 | _, world_size = get_dist_info() 169 | cfg.gpu_ids = range(world_size) 170 | 171 | # create work_dir 172 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) 173 | # dump config 174 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) 175 | # init the logger before other steps 176 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 177 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log') 178 | # specify logger name, if we still use 'mmdet', the output info will be 179 | # filtered and won't be saved in the log_file 180 | # TODO: ugly workaround to judge whether we are training det or seg model 181 | if cfg.model.type in ['EncoderDecoder3D']: 182 | logger_name = 'mmseg' 183 | else: 184 | logger_name = 'mmdet' 185 | logger = get_root_logger( 186 | log_file=log_file, log_level=cfg.log_level, name=logger_name) 187 | 188 | # init the meta dict to record some important information such as 189 | # environment info and seed, which will be logged 190 | meta = dict() 191 | # log env info 192 | env_info_dict = collect_env() 193 | env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) 194 | dash_line = '-' * 60 + '\n' 195 | logger.info('Environment info:\n' + dash_line + env_info + '\n' + 196 | dash_line) 197 | meta['env_info'] = env_info 198 | meta['config'] = cfg.pretty_text 199 | cfg.device = get_device() 200 | # log some basic info 201 | logger.info(f'Distributed training: {distributed}') 202 | logger.info(f'Config:\n{cfg.pretty_text}') 203 | 204 | # set random seeds 205 | if args.seed is not None: 206 | logger.info(f'Set random seed to {args.seed}, ' 207 | f'deterministic: {args.deterministic}') 208 | set_random_seed(args.seed, deterministic=args.deterministic) 209 | cfg.seed = args.seed 210 | meta['seed'] = args.seed 211 | meta['exp_name'] = osp.basename(args.config) 212 | 213 | model = build_model( 214 | cfg.model, 215 | train_cfg=cfg.get('train_cfg'), 216 | test_cfg=cfg.get('test_cfg')) 217 | model.init_weights() 218 | 219 | logger.info(f'Model:\n{model}') 220 | datasets = [build_dataset(cfg.data.train)] 221 | if len(cfg.workflow) == 2: 222 | val_dataset = copy.deepcopy(cfg.data.val) 223 | # in case we use a dataset wrapper 224 | if 'dataset' in cfg.data.train: 225 | val_dataset.pipeline = cfg.data.train.dataset.pipeline 226 | else: 227 | val_dataset.pipeline = cfg.data.train.pipeline 228 | # set test_mode=False here in deep copied config 229 | # which do not affect AP/AR calculation later 230 | # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow # noqa 231 | val_dataset.test_mode = False 232 | datasets.append(build_dataset(val_dataset)) 233 | if cfg.checkpoint_config is not None: 234 | # save mmdet version, config file content and class names in 235 | # checkpoints as meta data 236 | cfg.checkpoint_config.meta = dict( 237 | mmdet_version=mmdet_version, 238 | mmseg_version=mmseg_version, 239 | mmdet3d_version=mmdet3d_version, 240 | config=cfg.pretty_text, 241 | CLASSES=datasets[0].CLASSES, 242 | PALETTE=datasets[0].PALETTE # for segmentors 243 | if hasattr(datasets[0], 'PALETTE') else None) 244 | # add an attribute for visualization convenience 245 | model.CLASSES = datasets[0].CLASSES 246 | train_model( 247 | model, 248 | datasets, 249 | cfg, 250 | distributed=distributed, 251 | validate=(not args.no_validate), 252 | timestamp=timestamp, 253 | meta=meta) 254 | 255 | 256 | if __name__ == '__main__': 257 | main() 258 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/detectors/vedet.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------ 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved. 3 | # ------------------------------------------------------------------------ 4 | # Copyright (c) 2022 megvii-model. All Rights Reserved. 5 | # ------------------------------------------------------------------------ 6 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d) 7 | # Copyright (c) 2021 Wang, Yue 8 | # ------------------------------------------------------------------------ 9 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d) 10 | # Copyright (c) OpenMMLab. All rights reserved. 11 | # ------------------------------------------------------------------------ 12 | 13 | import torch 14 | import torch.nn.functional as F 15 | from einops import rearrange 16 | from mmcv.runner import force_fp32, auto_fp16 17 | from mmdet.models import DETECTORS 18 | from mmdet3d.core import bbox3d2result 19 | from mmdet3d.models.detectors.mvx_two_stage import MVXTwoStageDetector 20 | from projects.mmdet3d_plugin.models.utils.grid_mask import GridMask 21 | 22 | 23 | @DETECTORS.register_module() 24 | class VEDet(MVXTwoStageDetector): 25 | """Petr3D.""" 26 | 27 | def __init__(self, 28 | use_grid_mask=False, 29 | use_gt_scale=False, 30 | pts_voxel_layer=None, 31 | pts_voxel_encoder=None, 32 | pts_middle_encoder=None, 33 | pts_fusion_layer=None, 34 | img_backbone=None, 35 | pts_backbone=None, 36 | img_neck=None, 37 | pts_neck=None, 38 | pts_bbox_head=None, 39 | img_roi_head=None, 40 | img_rpn_head=None, 41 | train_cfg=None, 42 | test_cfg=None, 43 | pretrained=None, 44 | gt_depth_sup=True): 45 | super(VEDet, self).__init__(pts_voxel_layer, pts_voxel_encoder, pts_middle_encoder, pts_fusion_layer, 46 | img_backbone, pts_backbone, img_neck, pts_neck, pts_bbox_head, img_roi_head, 47 | img_rpn_head, train_cfg, test_cfg, pretrained) 48 | self.grid_mask = GridMask(True, True, rotate=1, offset=False, ratio=0.5, mode=1, prob=0.7) 49 | self.use_grid_mask = use_grid_mask 50 | self.use_gt_scale = use_gt_scale 51 | self.gt_depth_sup = gt_depth_sup 52 | 53 | def extract_img_feat(self, img, img_metas): 54 | """Extract features of images.""" 55 | # print(img[0].size()) 56 | if isinstance(img, list): 57 | img = torch.stack(img, dim=0) 58 | 59 | B = img.size(0) 60 | if img is not None: 61 | input_shape = img.shape[-2:] 62 | # update real input shape of each single img 63 | for img_meta in img_metas: 64 | img_meta.update(input_shape=input_shape) 65 | if img.dim() == 5: 66 | if img.size(0) == 1 and img.size(1) != 1: 67 | img.squeeze_() 68 | else: 69 | B, N, C, H, W = img.size() 70 | img = img.view(B * N, C, H, W) 71 | if self.use_grid_mask: 72 | img = self.grid_mask(img) 73 | img_feats = self.img_backbone(img) 74 | if isinstance(img_feats, dict): 75 | img_feats = list(img_feats.values()) 76 | else: 77 | return None 78 | if self.with_img_neck: 79 | img_feats = self.img_neck(img_feats) 80 | img_feats_reshaped = [] 81 | for img_feat in img_feats: 82 | BN, C, H, W = img_feat.size() 83 | img_feats_reshaped.append(img_feat.view(B, int(BN / B), C, H, W)) 84 | return img_feats_reshaped 85 | 86 | @auto_fp16(apply_to=('img'), out_fp32=True) 87 | def extract_feat(self, img, img_metas): 88 | """Extract features from images and points.""" 89 | img_feats = self.extract_img_feat(img, img_metas) 90 | return img_feats 91 | 92 | def forward_pts_train(self, pts_feats, gt_bboxes_3d, gt_labels_3d, gt_maps, img_metas): 93 | """Forward function for point cloud branch. 94 | Args: 95 | pts_feats (list[torch.Tensor]): Features of point cloud branch 96 | gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth 97 | boxes for each sample. 98 | gt_labels_3d (list[torch.Tensor]): Ground truth labels for 99 | boxes of each sampole 100 | img_metas (list[dict]): Meta information of samples. 101 | gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth 102 | boxes to be ignored. Defaults to None. 103 | Returns: 104 | dict: Losses of each branch. 105 | """ 106 | outs = self.pts_bbox_head(pts_feats, img_metas) 107 | loss_inputs = [gt_bboxes_3d, gt_labels_3d, gt_maps, outs] 108 | losses = self.pts_bbox_head.loss(*loss_inputs) 109 | 110 | return losses 111 | 112 | @force_fp32(apply_to=('img', 'points')) 113 | def forward(self, return_loss=True, **kwargs): 114 | """Calls either forward_train or forward_test depending on whether 115 | return_loss=True. 116 | Note this setting will change the expected inputs. When 117 | `return_loss=True`, img and img_metas are single-nested (i.e. 118 | torch.Tensor and list[dict]), and when `resturn_loss=False`, img and 119 | img_metas should be double nested (i.e. list[torch.Tensor], 120 | list[list[dict]]), with the outer list indicating test time 121 | augmentations. 122 | """ 123 | if return_loss: 124 | return self.forward_train(**kwargs) 125 | else: 126 | return self.forward_test(**kwargs) 127 | 128 | def forward_train(self, img_metas=None, gt_bboxes_3d=None, gt_labels_3d=None, maps=None, img=None): 129 | """Forward training function. 130 | Args: 131 | points (list[torch.Tensor], optional): Points of each sample. 132 | Defaults to None. 133 | img_metas (list[dict], optional): Meta information of each sample. 134 | Defaults to None. 135 | gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional): 136 | Ground truth 3D boxes. Defaults to None. 137 | gt_labels_3d (list[torch.Tensor], optional): Ground truth labels 138 | of 3D boxes. Defaults to None. 139 | gt_labels (list[torch.Tensor], optional): Ground truth labels 140 | of 2D boxes in images. Defaults to None. 141 | gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in 142 | images. Defaults to None. 143 | img (torch.Tensor optional): Images of each sample with shape 144 | (N, C, H, W). Defaults to None. 145 | proposals ([list[torch.Tensor], optional): Predicted proposals 146 | used for training Fast RCNN. Defaults to None. 147 | gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth 148 | 2D boxes in images to be ignored. Defaults to None. 149 | Returns: 150 | dict: Losses of different branches. 151 | """ 152 | 153 | img_feats = self.extract_feat(img=img, img_metas=img_metas) 154 | 155 | losses = dict() 156 | losses_pts = self.forward_pts_train(img_feats, gt_bboxes_3d, gt_labels_3d, maps, img_metas) 157 | losses.update(losses_pts) 158 | return losses 159 | 160 | def forward_test(self, img_metas, img=None, gt_map=None, **kwargs): 161 | for var, name in [(img_metas, 'img_metas')]: 162 | if not isinstance(var, list): 163 | raise TypeError('{} must be a list, but got {}'.format(name, type(var))) 164 | img = [img] if img is None else img 165 | return self.simple_test(img_metas[0], img[0], gt_map[0] if gt_map is not None else None, **kwargs) 166 | 167 | def simple_test_pts(self, x, img_metas, gt_map=None, rescale=False): 168 | """Test function of point cloud branch.""" 169 | outs = self.pts_bbox_head(x, img_metas) 170 | results = dict() 171 | if outs.get('all_cls_scores', None) is not None: 172 | bbox_list = self.pts_bbox_head.get_bboxes(outs, img_metas, rescale=rescale) 173 | bbox_results = [bbox3d2result(bboxes, scores, labels) for bboxes, scores, labels in bbox_list] 174 | results['bbox_results'] = bbox_results 175 | 176 | if gt_map is not None: 177 | seg_results = self.compute_seg_iou(outs) 178 | results['seg_results'] = seg_results 179 | 180 | return results 181 | 182 | def simple_test(self, img_metas, img=None, gt_map=None, rescale=False): 183 | """Test function without augmentaiton.""" 184 | img_feats = self.extract_feat(img=img, img_metas=img_metas) 185 | 186 | results_list = [dict() for i in range(len(img_metas))] 187 | results = self.simple_test_pts(img_feats, img_metas, gt_map, rescale=rescale) 188 | if 'bbox_results' in results: 189 | for result_dict, pts_bbox in zip(results_list, results['bbox_results']): 190 | result_dict['pts_bbox'] = pts_bbox 191 | 192 | return results_list 193 | 194 | @torch.no_grad() 195 | def compute_seg_iou(self, outs): 196 | lane_preds = outs['all_seg_preds'][-1].squeeze(0) #[B,N,H,W] 197 | 198 | pred_maps = lane_preds.view(256, 3, 16, 16) 199 | 200 | f_lane = rearrange(pred_maps, '(h w) c h1 w2 -> c (h h1) (w w2)', h=16, w=16) 201 | f_lane = f_lane.sigmoid() 202 | f_lane[f_lane >= 0.5] = 1 203 | f_lane[f_lane < 0.5] = 0 204 | 205 | f_lane = f_lane.view(3, -1) 206 | gt_map = gt_map.view(3, -1) 207 | 208 | ret_iou = IOU(f_lane, gt_map).cpu() 209 | return ret_iou 210 | 211 | def aug_test_pts(self, feats, img_metas, rescale=False): 212 | feats_list = [] 213 | for j in range(len(feats[0])): 214 | feats_list_level = [] 215 | for i in range(len(feats)): 216 | feats_list_level.append(feats[i][j]) 217 | feats_list.append(torch.stack(feats_list_level, -1).mean(-1)) 218 | outs = self.pts_bbox_head(feats_list, img_metas) 219 | bbox_list = self.pts_bbox_head.get_bboxes(outs, img_metas, rescale=rescale) 220 | bbox_results = [bbox3d2result(bboxes, scores, labels) for bboxes, scores, labels in bbox_list] 221 | return bbox_results 222 | 223 | def aug_test(self, img_metas, imgs=None, rescale=False): 224 | """Test function with augmentaiton.""" 225 | img_feats = self.extract_feats(img_metas, imgs) 226 | img_metas = img_metas[0] 227 | bbox_list = [dict() for i in range(len(img_metas))] 228 | bbox_pts = self.aug_test_pts(img_feats, img_metas, rescale) 229 | for result_dict, pts_bbox in zip(bbox_list, bbox_pts): 230 | result_dict['pts_bbox'] = pts_bbox 231 | return bbox_list 232 | 233 | 234 | def IOU(intputs, targets): 235 | numerator = 2 * (intputs * targets).sum(dim=1) 236 | denominator = intputs.sum(dim=1) + targets.sum(dim=1) 237 | loss = (numerator + 0.01) / (denominator + 0.01) 238 | return loss 239 | -------------------------------------------------------------------------------- /tools/data_converter/lyft_converter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | import numpy as np 4 | import os 5 | from logging import warning 6 | from lyft_dataset_sdk.lyftdataset import LyftDataset as Lyft 7 | from os import path as osp 8 | from pyquaternion import Quaternion 9 | 10 | from mmdet3d.datasets import LyftDataset 11 | from .nuscenes_converter import (get_2d_boxes, get_available_scenes, 12 | obtain_sensor2top) 13 | 14 | lyft_categories = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 15 | 'motorcycle', 'bicycle', 'pedestrian', 'animal') 16 | 17 | 18 | def create_lyft_infos(root_path, 19 | info_prefix, 20 | version='v1.01-train', 21 | max_sweeps=10): 22 | """Create info file of lyft dataset. 23 | 24 | Given the raw data, generate its related info file in pkl format. 25 | 26 | Args: 27 | root_path (str): Path of the data root. 28 | info_prefix (str): Prefix of the info file to be generated. 29 | version (str): Version of the data. 30 | Default: 'v1.01-train' 31 | max_sweeps (int): Max number of sweeps. 32 | Default: 10 33 | """ 34 | lyft = Lyft( 35 | data_path=osp.join(root_path, version), 36 | json_path=osp.join(root_path, version, version), 37 | verbose=True) 38 | available_vers = ['v1.01-train', 'v1.01-test'] 39 | assert version in available_vers 40 | if version == 'v1.01-train': 41 | train_scenes = mmcv.list_from_file('data/lyft/train.txt') 42 | val_scenes = mmcv.list_from_file('data/lyft/val.txt') 43 | elif version == 'v1.01-test': 44 | train_scenes = mmcv.list_from_file('data/lyft/test.txt') 45 | val_scenes = [] 46 | else: 47 | raise ValueError('unknown') 48 | 49 | # filter existing scenes. 50 | available_scenes = get_available_scenes(lyft) 51 | available_scene_names = [s['name'] for s in available_scenes] 52 | train_scenes = list( 53 | filter(lambda x: x in available_scene_names, train_scenes)) 54 | val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes)) 55 | train_scenes = set([ 56 | available_scenes[available_scene_names.index(s)]['token'] 57 | for s in train_scenes 58 | ]) 59 | val_scenes = set([ 60 | available_scenes[available_scene_names.index(s)]['token'] 61 | for s in val_scenes 62 | ]) 63 | 64 | test = 'test' in version 65 | if test: 66 | print(f'test scene: {len(train_scenes)}') 67 | else: 68 | print(f'train scene: {len(train_scenes)}, \ 69 | val scene: {len(val_scenes)}') 70 | train_lyft_infos, val_lyft_infos = _fill_trainval_infos( 71 | lyft, train_scenes, val_scenes, test, max_sweeps=max_sweeps) 72 | 73 | metadata = dict(version=version) 74 | if test: 75 | print(f'test sample: {len(train_lyft_infos)}') 76 | data = dict(infos=train_lyft_infos, metadata=metadata) 77 | info_name = f'{info_prefix}_infos_test' 78 | info_path = osp.join(root_path, f'{info_name}.pkl') 79 | mmcv.dump(data, info_path) 80 | else: 81 | print(f'train sample: {len(train_lyft_infos)}, \ 82 | val sample: {len(val_lyft_infos)}') 83 | data = dict(infos=train_lyft_infos, metadata=metadata) 84 | train_info_name = f'{info_prefix}_infos_train' 85 | info_path = osp.join(root_path, f'{train_info_name}.pkl') 86 | mmcv.dump(data, info_path) 87 | data['infos'] = val_lyft_infos 88 | val_info_name = f'{info_prefix}_infos_val' 89 | info_val_path = osp.join(root_path, f'{val_info_name}.pkl') 90 | mmcv.dump(data, info_val_path) 91 | 92 | 93 | def _fill_trainval_infos(lyft, 94 | train_scenes, 95 | val_scenes, 96 | test=False, 97 | max_sweeps=10): 98 | """Generate the train/val infos from the raw data. 99 | 100 | Args: 101 | lyft (:obj:`LyftDataset`): Dataset class in the Lyft dataset. 102 | train_scenes (list[str]): Basic information of training scenes. 103 | val_scenes (list[str]): Basic information of validation scenes. 104 | test (bool): Whether use the test mode. In the test mode, no 105 | annotations can be accessed. Default: False. 106 | max_sweeps (int): Max number of sweeps. Default: 10. 107 | 108 | Returns: 109 | tuple[list[dict]]: Information of training set and 110 | validation set that will be saved to the info file. 111 | """ 112 | train_lyft_infos = [] 113 | val_lyft_infos = [] 114 | 115 | for sample in mmcv.track_iter_progress(lyft.sample): 116 | lidar_token = sample['data']['LIDAR_TOP'] 117 | sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP']) 118 | cs_record = lyft.get('calibrated_sensor', 119 | sd_rec['calibrated_sensor_token']) 120 | pose_record = lyft.get('ego_pose', sd_rec['ego_pose_token']) 121 | abs_lidar_path, boxes, _ = lyft.get_sample_data(lidar_token) 122 | # nuScenes devkit returns more convenient relative paths while 123 | # lyft devkit returns absolute paths 124 | abs_lidar_path = str(abs_lidar_path) # absolute path 125 | lidar_path = abs_lidar_path.split(f'{os.getcwd()}/')[-1] 126 | # relative path 127 | 128 | mmcv.check_file_exist(lidar_path) 129 | 130 | info = { 131 | 'lidar_path': lidar_path, 132 | 'token': sample['token'], 133 | 'sweeps': [], 134 | 'cams': dict(), 135 | 'lidar2ego_translation': cs_record['translation'], 136 | 'lidar2ego_rotation': cs_record['rotation'], 137 | 'ego2global_translation': pose_record['translation'], 138 | 'ego2global_rotation': pose_record['rotation'], 139 | 'timestamp': sample['timestamp'], 140 | } 141 | 142 | l2e_r = info['lidar2ego_rotation'] 143 | l2e_t = info['lidar2ego_translation'] 144 | e2g_r = info['ego2global_rotation'] 145 | e2g_t = info['ego2global_translation'] 146 | l2e_r_mat = Quaternion(l2e_r).rotation_matrix 147 | e2g_r_mat = Quaternion(e2g_r).rotation_matrix 148 | 149 | # obtain 6 image's information per frame 150 | camera_types = [ 151 | 'CAM_FRONT', 152 | 'CAM_FRONT_RIGHT', 153 | 'CAM_FRONT_LEFT', 154 | 'CAM_BACK', 155 | 'CAM_BACK_LEFT', 156 | 'CAM_BACK_RIGHT', 157 | ] 158 | for cam in camera_types: 159 | cam_token = sample['data'][cam] 160 | cam_path, _, cam_intrinsic = lyft.get_sample_data(cam_token) 161 | cam_info = obtain_sensor2top(lyft, cam_token, l2e_t, l2e_r_mat, 162 | e2g_t, e2g_r_mat, cam) 163 | cam_info.update(cam_intrinsic=cam_intrinsic) 164 | info['cams'].update({cam: cam_info}) 165 | 166 | # obtain sweeps for a single key-frame 167 | sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP']) 168 | sweeps = [] 169 | while len(sweeps) < max_sweeps: 170 | if not sd_rec['prev'] == '': 171 | sweep = obtain_sensor2top(lyft, sd_rec['prev'], l2e_t, 172 | l2e_r_mat, e2g_t, e2g_r_mat, 'lidar') 173 | sweeps.append(sweep) 174 | sd_rec = lyft.get('sample_data', sd_rec['prev']) 175 | else: 176 | break 177 | info['sweeps'] = sweeps 178 | # obtain annotation 179 | if not test: 180 | annotations = [ 181 | lyft.get('sample_annotation', token) 182 | for token in sample['anns'] 183 | ] 184 | locs = np.array([b.center for b in boxes]).reshape(-1, 3) 185 | dims = np.array([b.wlh for b in boxes]).reshape(-1, 3) 186 | rots = np.array([b.orientation.yaw_pitch_roll[0] 187 | for b in boxes]).reshape(-1, 1) 188 | 189 | names = [b.name for b in boxes] 190 | for i in range(len(names)): 191 | if names[i] in LyftDataset.NameMapping: 192 | names[i] = LyftDataset.NameMapping[names[i]] 193 | names = np.array(names) 194 | 195 | # we need to convert rot to SECOND format. 196 | gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1) 197 | assert len(gt_boxes) == len( 198 | annotations), f'{len(gt_boxes)}, {len(annotations)}' 199 | info['gt_boxes'] = gt_boxes 200 | info['gt_names'] = names 201 | info['num_lidar_pts'] = np.array( 202 | [a['num_lidar_pts'] for a in annotations]) 203 | info['num_radar_pts'] = np.array( 204 | [a['num_radar_pts'] for a in annotations]) 205 | 206 | if sample['scene_token'] in train_scenes: 207 | train_lyft_infos.append(info) 208 | else: 209 | val_lyft_infos.append(info) 210 | 211 | return train_lyft_infos, val_lyft_infos 212 | 213 | 214 | def export_2d_annotation(root_path, info_path, version): 215 | """Export 2d annotation from the info file and raw data. 216 | 217 | Args: 218 | root_path (str): Root path of the raw data. 219 | info_path (str): Path of the info file. 220 | version (str): Dataset version. 221 | """ 222 | warning.warn('DeprecationWarning: 2D annotations are not used on the ' 223 | 'Lyft dataset. The function export_2d_annotation will be ' 224 | 'deprecated.') 225 | # get bbox annotations for camera 226 | camera_types = [ 227 | 'CAM_FRONT', 228 | 'CAM_FRONT_RIGHT', 229 | 'CAM_FRONT_LEFT', 230 | 'CAM_BACK', 231 | 'CAM_BACK_LEFT', 232 | 'CAM_BACK_RIGHT', 233 | ] 234 | lyft_infos = mmcv.load(info_path)['infos'] 235 | lyft = Lyft( 236 | data_path=osp.join(root_path, version), 237 | json_path=osp.join(root_path, version, version), 238 | verbose=True) 239 | # info_2d_list = [] 240 | cat2Ids = [ 241 | dict(id=lyft_categories.index(cat_name), name=cat_name) 242 | for cat_name in lyft_categories 243 | ] 244 | coco_ann_id = 0 245 | coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids) 246 | for info in mmcv.track_iter_progress(lyft_infos): 247 | for cam in camera_types: 248 | cam_info = info['cams'][cam] 249 | coco_infos = get_2d_boxes( 250 | lyft, 251 | cam_info['sample_data_token'], 252 | visibilities=['', '1', '2', '3', '4']) 253 | (height, width, _) = mmcv.imread(cam_info['data_path']).shape 254 | coco_2d_dict['images'].append( 255 | dict( 256 | file_name=cam_info['data_path'], 257 | id=cam_info['sample_data_token'], 258 | width=width, 259 | height=height)) 260 | for coco_info in coco_infos: 261 | if coco_info is None: 262 | continue 263 | # add an empty key for coco format 264 | coco_info['segmentation'] = [] 265 | coco_info['id'] = coco_ann_id 266 | coco_2d_dict['annotations'].append(coco_info) 267 | coco_ann_id += 1 268 | mmcv.dump(coco_2d_dict, f'{info_path[:-4]}.coco.json') 269 | --------------------------------------------------------------------------------