├── media
    └── framework.png
├── tools
    ├── data_converter
    │   ├── __init__.py
    │   ├── lyft_data_fixer.py
    │   ├── indoor_converter.py
    │   ├── nuimage_converter.py
    │   ├── sunrgbd_data_utils.py
    │   ├── s3dis_data_utils.py
    │   └── lyft_converter.py
    ├── build-dataset.py
    ├── dist_train.sh
    ├── dist_test.sh
    ├── create_data.sh
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── misc
    │   ├── print_config.py
    │   ├── visualize_results.py
    │   ├── fuse_conv_bn.py
    │   └── browse_dataset.py
    ├── model_converters
    │   ├── publish_model.py
    │   ├── regnet2mmdet.py
    │   └── convert_votenet_checkpoints.py
    ├── rename_depth_cache.py
    ├── analysis_tools
    │   ├── benchmark.py
    │   ├── get_flops.py
    │   └── analyze_logs.py
    ├── test.py
    └── train.py
├── projects
    ├── mmdet3d_plugin
    │   ├── models
    │   │   ├── losses
    │   │   │   ├── __init__.py
    │   │   │   └── Sigmoid_ce_loss.py
    │   │   ├── dense_heads
    │   │   │   └── __init__.py
    │   │   ├── necks
    │   │   │   ├── __init__.py
    │   │   │   └── cp_fpn.py
    │   │   ├── detectors
    │   │   │   ├── __init__.py
    │   │   │   └── vedet.py
    │   │   ├── backbones
    │   │   │   └── __init__.py
    │   │   └── utils
    │   │   │   ├── __init__.py
    │   │   │   ├── grid_mask.py
    │   │   │   └── vedet_transformer.py
    │   ├── core
    │   │   └── bbox
    │   │   │   ├── assigners
    │   │   │       ├── __init__.py
    │   │   │       └── hungarian_assigner_3d.py
    │   │   │   ├── coders
    │   │   │       ├── __init__.py
    │   │   │       └── nms_free_coder.py
    │   │   │   ├── match_costs
    │   │   │       ├── __init__.py
    │   │   │       └── match_cost.py
    │   │   │   ├── iou_calculators
    │   │   │       └── __init__.py
    │   │   │   └── util.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── pipelines
    │   │   │   ├── __init__.py
    │   │   │   └── loading.py
    │   │   └── nuscenes_dataset.py
    │   └── __init__.py
    └── configs
    │   └── vedet_vovnet_p4_1600x640_2vview_2frame.py
├── Makefile
├── .gitignore
├── docker
    └── Dockerfile-mmlab-cu111
├── docs
    └── INSTALL.md
└── README.md


/media/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TRI-ML/VEDet/HEAD/media/framework.png


--------------------------------------------------------------------------------
/tools/data_converter/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .Sigmoid_ce_loss import Sigmoid_ce_loss
2 | 
3 | __all__ = ['Sigmoid_ce_loss']
4 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .hungarian_assigner_3d import HungarianAssigner3D
2 | 
3 | __all__ = ['HungarianAssigner3D']
4 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_free_coder import NMSFreeCoder, NMSFreeClsCoder
2 | __all__ = ['NMSFreeCoder', 'NMSFreeClsCoder']
3 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.core.bbox.match_costs import build_match_cost
2 | from .match_cost import BBox3DL1Cost
3 | 
4 | __all__ = ['build_match_cost', 'BBox3DL1Cost']
5 | 


--------------------------------------------------------------------------------
/tools/build-dataset.py:
--------------------------------------------------------------------------------
1 | from data_converter.nuscenes_converter_seg import  create_nuscenes_infos
2 | 
3 | 
4 | 
5 | if __name__ == '__main__':
6 |     # Training settings
7 |     create_nuscenes_infos( '/data/Dataset/nuScenes/','HDmaps-nocovers')
8 | 
9 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-28500}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29500}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python3 -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------
2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved.
3 | # ------------------------------------------------------------------------
4 | from .vedet_head import VEDetHead
5 | 
6 | __all__ = ['VEDetHead']
7 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------
2 | # Copyright (c) 2022 megvii-model. All Rights Reserved.
3 | # ------------------------------------------------------------------------
4 | # Modified from mmdetection (https://github.com/open-mmlab/mmdetection)
5 | # Copyright (c) OpenMMLab. All rights reserved.
6 | # ------------------------------------------------------------------------
7 | from .cp_fpn import CPFPN
8 | __all__ = ['CPFPN']
9 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/iou_calculators/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | from .iou3d_calculator import (AxisAlignedBboxOverlaps3D, BboxOverlaps3D,
 3 |                                BboxOverlapsNearest3D,
 4 |                                axis_aligned_bbox_overlaps_3d, bbox_overlaps_3d,
 5 |                                bbox_overlaps_nearest_3d)
 6 | 
 7 | __all__ = [
 8 |     'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
 9 |     'bbox_overlaps_3d', 'AxisAlignedBboxOverlaps3D',
10 |     'axis_aligned_bbox_overlaps_3d'
11 | ]


--------------------------------------------------------------------------------
/tools/create_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | export PYTHONPATH=`pwd`:$PYTHONPATH
 5 | 
 6 | PARTITION=$1
 7 | JOB_NAME=$2
 8 | CONFIG=$3
 9 | WORK_DIR=$4
10 | GPUS=${GPUS:-1}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-1}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | JOB_NAME=create_data
14 | 
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --kill-on-bad-exit=1 \
21 |     ${SRUN_ARGS} \
22 |     python -u tools/create_data.py kitti \
23 |             --root-path ./data/kitti \
24 |             --out-dir ./data/kitti \
25 |             --extra-tag kitti
26 | 


--------------------------------------------------------------------------------
/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${@:5}
14 | 
15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
16 | srun -p ${PARTITION} \
17 |     --job-name=${JOB_NAME} \
18 |     --gres=gpu:${GPUS_PER_NODE} \
19 |     --ntasks=${GPUS} \
20 |     --ntasks-per-node=${GPUS_PER_NODE} \
21 |     --cpus-per-task=${CPUS_PER_TASK} \
22 |     --kill-on-bad-exit=1 \
23 |     ${SRUN_ARGS} \
24 |     python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
25 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved.
 3 | # ------------------------------------------------------------------------
 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d)
 5 | # Copyright (c) 2021 Wang, Yue
 6 | # ------------------------------------------------------------------------
 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d)
 8 | # Copyright (c) OpenMMLab. All rights reserved.
 9 | # ------------------------------------------------------------------------
10 | from .vedet import VEDet
11 | 
12 | __all__ = ['VEDet']
13 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved.
 3 | # ------------------------------------------------------------------------
 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d)
 5 | # Copyright (c) 2021 Wang, Yue
 6 | # ------------------------------------------------------------------------
 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d)
 8 | # Copyright (c) OpenMMLab. All rights reserved.
 9 | # ------------------------------------------------------------------------
10 | from .nuscenes_dataset import CustomNuScenesDataset
11 | 
12 | __all__ = ['CustomNuScenesDataset']
13 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved.
 3 | # ------------------------------------------------------------------------
 4 | # Copyright (c) 2022 megvii-model. All Rights Reserved.
 5 | # ------------------------------------------------------------------------
 6 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d)
 7 | # Copyright (c) 2021 Wang, Yue
 8 | # ------------------------------------------------------------------------
 9 | from .vovnet import VoVNet
10 | from .vovnetcp import VoVNetCP
11 | from .convnext import ConvNeXt
12 | 
13 | __all__ = ['VoVNet', 'VoVNetCP', 'ConvNeXt']
14 | 


--------------------------------------------------------------------------------
/tools/misc/print_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | from mmcv import Config, DictAction
 4 | 
 5 | 
 6 | def parse_args():
 7 |     parser = argparse.ArgumentParser(description='Print the whole config')
 8 |     parser.add_argument('config', help='config file path')
 9 |     parser.add_argument(
10 |         '--options', nargs='+', action=DictAction, help='arguments in dict')
11 |     args = parser.parse_args()
12 | 
13 |     return args
14 | 
15 | 
16 | def main():
17 |     args = parse_args()
18 | 
19 |     cfg = Config.fromfile(args.config)
20 |     if args.options is not None:
21 |         cfg.merge_from_dict(args.options)
22 |     print(f'Config:\n{cfg.pretty_text}')
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved.
 3 | # ------------------------------------------------------------------------
 4 | # Copyright (c) 2022 megvii-model. All Rights Reserved.
 5 | # ------------------------------------------------------------------------
 6 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d)
 7 | # Copyright (c) 2021 Wang, Yue
 8 | # ------------------------------------------------------------------------
 9 | from .petr_transformer import PETRTransformer, PETRMultiheadAttention, PETRTransformerEncoder, PETRTransformerDecoder
10 | from .vedet_transformer import VETransformer
11 | from .positional_encoding import FourierMLPEncoding
12 | 
13 | __all__ = [
14 |     'PETRTransformer', 'PETRMultiheadAttention', 'PETRTransformerEncoder', 'PETRTransformerDecoder', 'VETransformer',
15 |     'FourierMLPEncoding'
16 | ]
17 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | WORK_DIR=${PWD}
 2 | PROJECT=vedet
 3 | DOCKER_IMAGE=${PROJECT}:latest
 4 | DOCKER_FILE=docker/Dockerfile-mmlab-cu111
 5 | DATA_ROOT?=/mnt/fsx-2/datasets
 6 | CKPTS_ROOT?=/mnt/fsx-2/ckpts
 7 | SAVE_ROOT?=/mnt/fsx-2/experiments
 8 | 
 9 | DOCKER_OPTS = \
10 | 	-it \
11 | 	--rm \
12 | 	-e DISPLAY=${DISPLAY} \
13 | 	-v /tmp:/tmp \
14 | 	-v /tmp/.X11-unix:/tmp/.X11-unix \
15 | 	-v ~/.ssh:/root/.ssh \
16 | 	-v ~/.aws:/root/.aws \
17 | 	-v ${WORK_DIR}:/workspace/${PROJECT} \
18 | 	-v ${DATA_ROOT}:/workspace/${PROJECT}/data \
19 | 	-v ${CKPTS_ROOT}:/workspace/${PROJECT}/ckpts \
20 | 	-v ${SAVE_ROOT}:/workspace/${PROJECT}/work_dirs \
21 | 	--shm-size=8G \
22 | 	--ipc=host \
23 | 	--network=host \
24 | 	--pid=host \
25 | 	--privileged
26 | 
27 | DOCKER_BUILD_ARGS = \
28 | 	--build-arg WANDB_ENTITY \
29 | 	--build-arg WANDB_API_KEY \
30 | 
31 | docker-build:
32 | 	nvidia-docker image build -f $(DOCKER_FILE) -t $(DOCKER_IMAGE) \
33 | 	$(DOCKER_BUILD_ARGS) .
34 | 
35 | docker-dev:
36 | 	nvidia-docker run --name $(PROJECT) \
37 | 	$(DOCKER_OPTS) \
38 | 	$(DOCKER_IMAGE) bash
39 | 
40 | clean:
41 | 	find . -name '"*.pyc' | xargs sudo rm -f && \
42 | 	find . -name '__pycache__' | xargs sudo rm -rf
43 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved.
 3 | # ------------------------------------------------------------------------
 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d)
 5 | # Copyright (c) 2021 Wang, Yue
 6 | # ------------------------------------------------------------------------
 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d)
 8 | # Copyright (c) OpenMMLab. All rights reserved.
 9 | # ------------------------------------------------------------------------
10 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D
11 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder
12 | from .core.bbox.match_costs import BBox3DL1Cost
13 | from .datasets import CustomNuScenesDataset
14 | from .datasets.pipelines import (PhotoMetricDistortionMultiViewImage, PadMultiViewImage, NormalizeMultiviewImage)
15 | from .models.backbones.vovnet import VoVNet
16 | from .models.dense_heads import *
17 | from .models.detectors import *
18 | from .models.necks import *
19 | from .models.losses import *
20 | from .models.utils import *
21 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Copyright (c) 2022 megvii-model. All Rights Reserved.
 3 | # ------------------------------------------------------------------------
 4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d)
 5 | # Copyright (c) 2021 Wang, Yue
 6 | # ------------------------------------------------------------------------
 7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d)
 8 | # Copyright (c) OpenMMLab. All rights reserved.
 9 | # ------------------------------------------------------------------------
10 | from .transform_3d import (PadMultiViewImage, NormalizeMultiviewImage, PhotoMetricDistortionMultiViewImage,
11 |                            ResizeMultiview3D, AlbuMultiview3D, ResizeCropFlipImage, GlobalRotScaleTransImage)
12 | from .loading import LoadMultiViewImageFromMultiSweepsFiles, LoadMapsFromFiles
13 | 
14 | __all__ = [
15 |     'PadMultiViewImage', 'NormalizeMultiviewImage', 'PhotoMetricDistortionMultiViewImage',
16 |     'LoadMultiViewImageFromMultiSweepsFiles', 'LoadMapsFromFiles', 'ResizeMultiview3D', 'AlbuMultiview3D',
17 |     'ResizeCropFlipImage', 'GlobalRotScaleTransImage'
18 | ]
19 | 


--------------------------------------------------------------------------------
/tools/model_converters/publish_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import subprocess
 4 | import torch
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(
 9 |         description='Process a checkpoint to be published')
10 |     parser.add_argument('in_file', help='input checkpoint filename')
11 |     parser.add_argument('out_file', help='output checkpoint filename')
12 |     args = parser.parse_args()
13 |     return args
14 | 
15 | 
16 | def process_checkpoint(in_file, out_file):
17 |     checkpoint = torch.load(in_file, map_location='cpu')
18 |     # remove optimizer for smaller file size
19 |     if 'optimizer' in checkpoint:
20 |         del checkpoint['optimizer']
21 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
22 |     # add the code here.
23 |     torch.save(checkpoint, out_file)
24 |     sha = subprocess.check_output(['sha256sum', out_file]).decode()
25 |     final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
26 |     subprocess.Popen(['mv', out_file, final_file])
27 | 
28 | 
29 | def main():
30 |     args = parse_args()
31 |     process_checkpoint(args.in_file, args.out_file)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     main()
36 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved.
 3 | # ------------------------------------------------------------------------
 4 | import torch
 5 | from mmdet.core.bbox.match_costs.builder import MATCH_COST
 6 | from mmdet.core.bbox.iou_calculators import bbox_overlaps
 7 | 
 8 | 
 9 | @MATCH_COST.register_module()
10 | class BBox3DL1Cost(object):
11 |     """BBox3DL1Cost.
12 |      Args:
13 |          weight (int | float, optional): loss_weight
14 |     """
15 | 
16 |     def __init__(self, weight=1.):
17 |         self.weight = weight
18 | 
19 |     def __call__(self, bbox_pred, gt_bboxes):
20 |         """
21 |         Args:
22 |             bbox_pred (Tensor): Predicted boxes with normalized coordinates
23 |                 (cx, cy, w, h), which are all in range [0, 1]. Shape
24 |                 [num_query, 4].
25 |             gt_bboxes (Tensor): Ground truth boxes with normalized
26 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
27 |         Returns:
28 |             torch.Tensor: bbox_cost value with weight
29 |         """
30 |         bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
31 |         return bbox_cost * self.weight
32 | 


--------------------------------------------------------------------------------
/tools/rename_depth_cache.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from glob import glob
 3 | import os
 4 | from tqdm import tqdm
 5 | 
 6 | if __name__ == "__main__":
 7 |     parser = argparse.ArgumentParser(description='Rename cache from dd3dv2')
 8 |     parser.add_argument('--source-dir', type=str, required=True, help='source dir of cache')
 9 |     parser.add_argument('--target-dir', type=str, required=True, help='source dir of cache')
10 |     args = parser.parse_args()
11 | 
12 |     source_dir = args.source_dir
13 |     target_dir = args.target_dir
14 |     os.makedirs(target_dir, exist_ok=True)
15 | 
16 |     cache_files = sorted(glob(os.path.join(source_dir, "*.npz")), key=lambda x: int(os.path.basename(x).split('_')[1]))
17 |     visited = set()
18 |     for cache_file in tqdm(cache_files):
19 |         cache_name = os.path.basename(cache_file)
20 |         components = cache_name.split('_')
21 |         scene_name, global_idx = components[:2]
22 |         if scene_name not in visited:
23 |             global_start_idx = int(global_idx)
24 |             visited.add(scene_name)
25 | 
26 |         sample_id = int(global_idx) - int(global_start_idx)
27 |         components[1] = f"{sample_id:03d}"
28 |         cache_name = "_".join(components)
29 |         symlink_path = os.path.join(target_dir, cache_name)
30 |         os.system(f'ln -s {cache_file} {symlink_path}')
31 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/Sigmoid_ce_loss.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------
 2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved.
 3 | # ------------------------------------------------------------------------
 4 | # Copyright (c) 2022 megvii-model. All Rights Reserved.
 5 | # ------------------------------------------------------------------------
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | from mmdet.models.builder import LOSSES
 9 | 
10 | 
11 | @LOSSES.register_module()
12 | class Sigmoid_ce_loss(nn.Module):
13 | 
14 |     def __init__(self, loss_weight=1.0):
15 |         super(Sigmoid_ce_loss, self).__init__()
16 |         self.loss_weight = loss_weight
17 | 
18 |     def forward(
19 |         self,
20 |         inputs,
21 |         targets,
22 |     ):
23 |         """Forward function to calculate accuracy.
24 | 
25 |         Args:
26 |             pred (torch.Tensor): Prediction of models.
27 |             target (torch.Tensor): Target for each prediction.
28 | 
29 |         Returns:
30 |             tuple[float]: The accuracies under different topk criterions.
31 |         """
32 |         # inputs=inputs.sigmoid()
33 |         pos_weight = (targets == 0).float().sum(dim=1) / (targets == 1).float().sum(dim=1).clamp(min=1.0)
34 |         pos_weight = pos_weight.unsqueeze(1)
35 |         weight_loss = targets * pos_weight + (1 - targets)
36 |         loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="mean", weight=weight_loss)
37 |         return self.loss_weight * loss
38 | 


--------------------------------------------------------------------------------
/tools/data_converter/lyft_data_fixer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import numpy as np
 4 | import os
 5 | 
 6 | 
 7 | def fix_lyft(root_folder='./data/lyft', version='v1.01'):
 8 |     # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000  # noqa
 9 |     lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin'
10 |     root_folder = os.path.join(root_folder, f'{version}-train')
11 |     lidar_path = os.path.join(root_folder, lidar_path)
12 |     assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \
13 |         f'dataset and make sure {lidar_path} is present.'
14 |     points = np.fromfile(lidar_path, dtype=np.float32, count=-1)
15 |     try:
16 |         points.reshape([-1, 5])
17 |         print(f'This fix is not required for version {version}.')
18 |     except ValueError:
19 |         new_points = np.array(list(points) + [100.0, 1.0], dtype='float32')
20 |         new_points.tofile(lidar_path)
21 |         print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.')
22 | 
23 | 
24 | parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser')
25 | parser.add_argument(
26 |     '--root-folder',
27 |     type=str,
28 |     default='./data/lyft',
29 |     help='specify the root path of Lyft dataset')
30 | parser.add_argument(
31 |     '--version',
32 |     type=str,
33 |     default='v1.01',
34 |     help='specify Lyft dataset version')
35 | args = parser.parse_args()
36 | 
37 | if __name__ == '__main__':
38 |     fix_lyft(root_folder=args.root_folder, version=args.version)
39 | 


--------------------------------------------------------------------------------
/tools/misc/visualize_results.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import mmcv
 4 | from mmcv import Config
 5 | 
 6 | from mmdet3d.datasets import build_dataset
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(
11 |         description='MMDet3D visualize the results')
12 |     parser.add_argument('config', help='test config file path')
13 |     parser.add_argument('--result', help='results file in pickle format')
14 |     parser.add_argument(
15 |         '--show-dir', help='directory where visualize results will be saved')
16 |     args = parser.parse_args()
17 | 
18 |     return args
19 | 
20 | 
21 | def main():
22 |     args = parse_args()
23 | 
24 |     if args.result is not None and \
25 |             not args.result.endswith(('.pkl', '.pickle')):
26 |         raise ValueError('The results file must be a pkl file.')
27 | 
28 |     cfg = Config.fromfile(args.config)
29 |     cfg.data.test.test_mode = True
30 | 
31 |     # build the dataset
32 |     dataset = build_dataset(cfg.data.test)
33 |     results = mmcv.load(args.result)
34 | 
35 |     if getattr(dataset, 'show', None) is not None:
36 |         # data loading pipeline for showing
37 |         eval_pipeline = cfg.get('eval_pipeline', {})
38 |         if eval_pipeline:
39 |             dataset.show(results, args.show_dir, pipeline=eval_pipeline)
40 |         else:
41 |             dataset.show(results, args.show_dir)  # use default pipeline
42 |     else:
43 |         raise NotImplementedError(
44 |             'Show is not implemented for dataset {}!'.format(
45 |                 type(dataset).__name__))
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     main()
50 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.png
  2 | *.jpg
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | *.ipynb
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/en/_build/
 72 | docs/zh_cn/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # pyenv
 81 | .python-version
 82 | 
 83 | # celery beat schedule file
 84 | celerybeat-schedule
 85 | 
 86 | # SageMath parsed files
 87 | *.sage.py
 88 | 
 89 | # Environments
 90 | .env
 91 | .venv
 92 | env/
 93 | venv/
 94 | ENV/
 95 | env.bak/
 96 | venv.bak/
 97 | 
 98 | # Spyder project settings
 99 | .spyderproject
100 | .spyproject
101 | 
102 | # Rope project settings
103 | .ropeproject
104 | 
105 | # mkdocs documentation
106 | /site
107 | 
108 | # mypy
109 | .mypy_cache/
110 | 
111 | # cython generated cpp
112 | data
113 | .vscode
114 | .idea
115 | 
116 | # custom
117 | *.pkl
118 | *.pkl.json
119 | *.log.json
120 | work_dirs/
121 | ckpts/
122 | data/
123 | mmdetection3d/
124 | exps/
125 | *~
126 | mmdet3d/.mim
127 | wandb/
128 | 
129 | # Pytorch
130 | *.pth
131 | 
132 | # demo
133 | *.jpg
134 | data/s3dis/Stanford3dDataset_v1.2_Aligned_Version/
135 | data/scannet/scans/
136 | data/sunrgbd/OFFICIAL_SUNRGBD/
137 | *.obj
138 | *.ply
139 | 
140 | # Waymo evaluation
141 | mmdet3d/core/evaluation/waymo_utils/compute_detection_metrics_main


--------------------------------------------------------------------------------
/tools/misc/fuse_conv_bn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import torch
 4 | from mmcv.runner import save_checkpoint
 5 | from torch import nn as nn
 6 | 
 7 | from mmdet.apis import init_model
 8 | 
 9 | 
10 | def fuse_conv_bn(conv, bn):
11 |     """During inference, the functionary of batch norm layers is turned off but
12 |     only the mean and var alone channels are used, which exposes the chance to
13 |     fuse it with the preceding conv layers to save computations and simplify
14 |     network structures."""
15 |     conv_w = conv.weight
16 |     conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
17 |         bn.running_mean)
18 | 
19 |     factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
20 |     conv.weight = nn.Parameter(conv_w *
21 |                                factor.reshape([conv.out_channels, 1, 1, 1]))
22 |     conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
23 |     return conv
24 | 
25 | 
26 | def fuse_module(m):
27 |     last_conv = None
28 |     last_conv_name = None
29 | 
30 |     for name, child in m.named_children():
31 |         if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)):
32 |             if last_conv is None:  # only fuse BN that is after Conv
33 |                 continue
34 |             fused_conv = fuse_conv_bn(last_conv, child)
35 |             m._modules[last_conv_name] = fused_conv
36 |             # To reduce changes, set BN as Identity instead of deleting it.
37 |             m._modules[name] = nn.Identity()
38 |             last_conv = None
39 |         elif isinstance(child, nn.Conv2d):
40 |             last_conv = child
41 |             last_conv_name = name
42 |         else:
43 |             fuse_module(child)
44 |     return m
45 | 
46 | 
47 | def parse_args():
48 |     parser = argparse.ArgumentParser(
49 |         description='fuse Conv and BN layers in a model')
50 |     parser.add_argument('config', help='config file path')
51 |     parser.add_argument('checkpoint', help='checkpoint file path')
52 |     parser.add_argument('out', help='output path of the converted model')
53 |     args = parser.parse_args()
54 |     return args
55 | 
56 | 
57 | def main():
58 |     args = parse_args()
59 |     # build the model from a config file and a checkpoint file
60 |     model = init_model(args.config, args.checkpoint)
61 |     # fuse conv and bn layers of the model
62 |     fused_model = fuse_module(model)
63 |     save_checkpoint(fused_model, args.out)
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     main()
68 | 


--------------------------------------------------------------------------------
/docker/Dockerfile-mmlab-cu111:
--------------------------------------------------------------------------------
 1 | # Base image
 2 | ARG PYTORCH="1.9.0"
 3 | ARG CUDA="11.1"
 4 | ARG CUDNN="8"
 5 | 
 6 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
 7 | 
 8 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX"
 9 | ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
10 | ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
11 | 
12 | # To fix GPG key error when running apt-get update
13 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
14 | RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
15 | 
16 | # Core tools
17 | RUN apt-get update && apt-get install -y \
18 |     cmake \
19 |     curl \
20 |     docker.io \
21 |     ffmpeg \
22 |     git \
23 |     htop \
24 |     libsm6 \
25 |     libxext6 \
26 |     libglib2.0-0 \
27 |     libsm6 \
28 |     libxrender-dev \
29 |     libxext6 \
30 |     ninja-build \
31 |     unzip \
32 |     vim \
33 |     wget \
34 |     sudo \
35 |     && apt-get clean \
36 |     && rm -rf /var/lib/apt/lists/*
37 | 
38 | # -------------------------
39 | # Optional: W&B credentials
40 | # -------------------------
41 | ARG WANDB_ENTITY
42 | ENV WANDB_ENTITY=${WANDB_ENTITY}
43 | 
44 | ARG WANDB_API_KEY
45 | ENV WANDB_API_KEY=${WANDB_API_KEY}
46 | 
47 | # Python tools
48 | RUN pip install \
49 |     wandb==0.12.17 \
50 |     einops==0.4.1 \
51 |     pytorch3d==0.3.0 \
52 |     pycocotools==2.0.4 \
53 |     nuscenes-devkit==1.1.7 \
54 |     timm==0.6.11
55 | 
56 | # Install OpenMMLab packages
57 | ARG MMCV="1.4.0"
58 | ARG MMDET="v2.25.0"
59 | ARG MMSEG="v0.20.2"
60 | ARG MMDET3D="v0.17.1"
61 | ENV FORCE_CUDA="1"
62 | 
63 | RUN pip install mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
64 | 
65 | WORKDIR /workspace/mmlab
66 | RUN git clone https://github.com/open-mmlab/mmdetection.git && cd mmdetection && \
67 |     git checkout ${MMDET} && \
68 |     pip install -r requirements/build.txt && pip install -e .
69 | ENV PYTHONPATH="/workspace/mmlab/mmdetection:$PYTHONPATH"
70 | 
71 | RUN git clone https://github.com/open-mmlab/mmsegmentation.git && cd mmsegmentation && \
72 |     git checkout ${MMSEG} && \
73 |     pip install -e .
74 | ENV PYTHONPATH="/workspace/mmlab/mmsegmentation:$PYTHONPATH"
75 | 
76 | RUN git clone https://github.com/open-mmlab/mmdetection3d.git && cd mmdetection3d && \
77 |     git checkout ${MMDET3D} && \
78 |     pip install -e .
79 | ENV PYTHONPATH="/workspace/mmlab/mmdetection3d:$PYTHONPATH"
80 | 
81 | WORKDIR /workspace/vedet
82 | RUN git config --global --add safe.directory /workspace/vedet
83 | 


--------------------------------------------------------------------------------
/docs/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Use Docker Environment
 2 | We provide a self-contained dockerfile and recommend preparing the environment using docker. To build the image, run the following command in this directory:
 3 | ```bash
 4 | make docker-build
 5 | ```
 6 | 
 7 | We use [Weights & Biases](https://wandb.ai/site) to log the training. To optionally build your wandb credentials into the docker, run (if you don't build credentials into the docker, you can still manually log into wandb after entering the docker container):
 8 | ```bash
 9 | make docker-build WANDB_API_KEY=<your WANDB_API_KEY> WANDB_ENTITY=<your WANDB_ENTITY>
10 | ```
11 | 
12 | After the image is built, run the following command with your paths on the host machine for data, checkpoints, and logging, to enter the dockerized environment:
13 | ```bash
14 | # DATA_ROOT will be mounted as /workspace/vedet/data
15 | # CKPTS_ROOT will be mounted as /workspace/vedet/ckpts
16 | # SAVE_ROOT will be mounted as /workspace/vedet/work_dirs
17 | make docker-dev DATA_ROOT=<host DATA_ROOT> CKPTS_ROOT=<host CKPTS_ROOT> SAVE_ROOT=<host SAVE_ROOT>
18 | ```
19 | 
20 | Inside the docker the folder structure will look like this, with data, checkpoints, logging paths mounted under `/workspace/vedet/`:
21 | ```
22 | /workspace/
23 | |-- mmlab
24 | |   |-- mmdetection
25 | |   |-- mmdetection3d
26 | |   `-- mmsegmentation
27 | `-- vedet
28 |     |-- LICENSE.md
29 |     |-- Makefile
30 |     |-- README.md
31 |     |-- ckpts
32 |     |-- data
33 |     |-- docker
34 |     |-- docs
35 |     |-- projects
36 |     |-- requirements.txt
37 |     |-- tools
38 | ```
39 | 
40 | ## Use Pip/Conda Environment
41 | The pytorch version we use in this project is `1.9.0` with CUDA `11.1`, CUDNN `8`. After install the right version in your environment, please install the following dependencies.
42 | 
43 | ### Python tools
44 | ```bash
45 | pip install \
46 |     wandb==0.12.17 \
47 |     einops==0.4.1 \
48 |     pytorch3d==0.3.0 \
49 |     pycocotools==2.0.4 \
50 |     nuscenes-devkit==1.1.7 \
51 |     timm==0.6.11
52 | ```
53 | 
54 | ### OpenMMLab packages
55 | ```bash
56 | export MMCV="1.4.0"
57 | export MMDET="v2.25.0"
58 | export MMSEG="v0.20.2"
59 | export MMDET3D="v0.17.1"
60 | export FORCE_CUDA="1"
61 | 
62 | # install mmcv
63 | pip install mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html
64 | 
65 | # install mmdetection
66 | git clone https://github.com/open-mmlab/mmdetection.git
67 | cd mmdetection && git checkout ${MMDET}
68 | pip install -r requirements/build.txt && pip install -e .
69 | 
70 | # install mmsegmentation
71 | git clone https://github.com/open-mmlab/mmsegmentation.git
72 | cd mmsegmentation && git checkout ${MMSEG}
73 | pip install -e .
74 | 
75 | # install mmdetection3d
76 | git clone https://github.com/open-mmlab/mmdetection3d.git
77 | cd mmdetection3d && git checkout ${MMDET3D}
78 | pip install -e .
79 | ```
80 | 
81 | ### Data, checkpoints, logging paths
82 | ```bash
83 | # enter the project top-level directory
84 | cd vedet
85 | ln -s $DATA_ROOT data/
86 | ln -s $CKPTS_ROOT ckpts/
87 | ln -s $SAVE_ROOT work_dirs/
88 | ```
89 | 


--------------------------------------------------------------------------------
/tools/analysis_tools/benchmark.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import time
 4 | import torch
 5 | from mmcv import Config
 6 | from mmcv.parallel import MMDataParallel
 7 | from mmcv.runner import load_checkpoint, wrap_fp16_model
 8 | 
 9 | from mmdet3d.datasets import build_dataloader, build_dataset
10 | from mmdet3d.models import build_detector
11 | from tools.misc.fuse_conv_bn import fuse_module
12 | 
13 | 
14 | def parse_args():
15 |     parser = argparse.ArgumentParser(description='MMDet benchmark a model')
16 |     parser.add_argument('config', help='test config file path')
17 |     parser.add_argument('checkpoint', help='checkpoint file')
18 |     parser.add_argument('--samples', default=2000, help='samples to benchmark')
19 |     parser.add_argument(
20 |         '--log-interval', default=50, help='interval of logging')
21 |     parser.add_argument(
22 |         '--fuse-conv-bn',
23 |         action='store_true',
24 |         help='Whether to fuse conv and bn, this will slightly increase'
25 |         'the inference speed')
26 |     args = parser.parse_args()
27 |     return args
28 | 
29 | 
30 | def main():
31 |     args = parse_args()
32 | 
33 |     cfg = Config.fromfile(args.config)
34 |     # set cudnn_benchmark
35 |     if cfg.get('cudnn_benchmark', False):
36 |         torch.backends.cudnn.benchmark = True
37 |     cfg.model.pretrained = None
38 |     cfg.data.test.test_mode = True
39 | 
40 |     # build the dataloader
41 |     # TODO: support multiple images per gpu (only minor changes are needed)
42 |     dataset = build_dataset(cfg.data.test)
43 |     data_loader = build_dataloader(
44 |         dataset,
45 |         samples_per_gpu=1,
46 |         workers_per_gpu=cfg.data.workers_per_gpu,
47 |         dist=False,
48 |         shuffle=False)
49 | 
50 |     # build the model and load checkpoint
51 |     cfg.model.train_cfg = None
52 |     model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
53 |     fp16_cfg = cfg.get('fp16', None)
54 |     if fp16_cfg is not None:
55 |         wrap_fp16_model(model)
56 |     load_checkpoint(model, args.checkpoint, map_location='cpu')
57 |     if args.fuse_conv_bn:
58 |         model = fuse_module(model)
59 | 
60 |     model = MMDataParallel(model, device_ids=[0])
61 | 
62 |     model.eval()
63 | 
64 |     # the first several iterations may be very slow so skip them
65 |     num_warmup = 5
66 |     pure_inf_time = 0
67 | 
68 |     # benchmark with several samples and take the average
69 |     for i, data in enumerate(data_loader):
70 | 
71 |         torch.cuda.synchronize()
72 |         start_time = time.perf_counter()
73 | 
74 |         with torch.no_grad():
75 |             model(return_loss=False, rescale=True, **data)
76 | 
77 |         torch.cuda.synchronize()
78 |         elapsed = time.perf_counter() - start_time
79 | 
80 |         if i >= num_warmup:
81 |             pure_inf_time += elapsed
82 |             if (i + 1) % args.log_interval == 0:
83 |                 fps = (i + 1 - num_warmup) / pure_inf_time
84 |                 print(f'Done image [{i + 1:<3}/ {args.samples}], '
85 |                       f'fps: {fps:.1f} img / s')
86 | 
87 |         if (i + 1) == args.samples:
88 |             pure_inf_time += elapsed
89 |             fps = (i + 1 - num_warmup) / pure_inf_time
90 |             print(f'Overall fps: {fps:.1f} img / s')
91 |             break
92 | 
93 | 
94 | if __name__ == '__main__':
95 |     main()
96 | 


--------------------------------------------------------------------------------
/tools/model_converters/regnet2mmdet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import torch
 4 | from collections import OrderedDict
 5 | 
 6 | 
 7 | def convert_stem(model_key, model_weight, state_dict, converted_names):
 8 |     new_key = model_key.replace('stem.conv', 'conv1')
 9 |     new_key = new_key.replace('stem.bn', 'bn1')
10 |     state_dict[new_key] = model_weight
11 |     converted_names.add(model_key)
12 |     print(f'Convert {model_key} to {new_key}')
13 | 
14 | 
15 | def convert_head(model_key, model_weight, state_dict, converted_names):
16 |     new_key = model_key.replace('head.fc', 'fc')
17 |     state_dict[new_key] = model_weight
18 |     converted_names.add(model_key)
19 |     print(f'Convert {model_key} to {new_key}')
20 | 
21 | 
22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names):
23 |     split_keys = model_key.split('.')
24 |     layer, block, module = split_keys[:3]
25 |     block_id = int(block[1:])
26 |     layer_name = f'layer{int(layer[1:])}'
27 |     block_name = f'{block_id - 1}'
28 | 
29 |     if block_id == 1 and module == 'bn':
30 |         new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}'
31 |     elif block_id == 1 and module == 'proj':
32 |         new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}'
33 |     elif module == 'f':
34 |         if split_keys[3] == 'a_bn':
35 |             module_name = 'bn1'
36 |         elif split_keys[3] == 'b_bn':
37 |             module_name = 'bn2'
38 |         elif split_keys[3] == 'c_bn':
39 |             module_name = 'bn3'
40 |         elif split_keys[3] == 'a':
41 |             module_name = 'conv1'
42 |         elif split_keys[3] == 'b':
43 |             module_name = 'conv2'
44 |         elif split_keys[3] == 'c':
45 |             module_name = 'conv3'
46 |         new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}'
47 |     else:
48 |         raise ValueError(f'Unsupported conversion of key {model_key}')
49 |     print(f'Convert {model_key} to {new_key}')
50 |     state_dict[new_key] = model_weight
51 |     converted_names.add(model_key)
52 | 
53 | 
54 | def convert(src, dst):
55 |     """Convert keys in pycls pretrained RegNet models to mmdet style."""
56 |     # load caffe model
57 |     regnet_model = torch.load(src)
58 |     blobs = regnet_model['model_state']
59 |     # convert to pytorch style
60 |     state_dict = OrderedDict()
61 |     converted_names = set()
62 |     for key, weight in blobs.items():
63 |         if 'stem' in key:
64 |             convert_stem(key, weight, state_dict, converted_names)
65 |         elif 'head' in key:
66 |             convert_head(key, weight, state_dict, converted_names)
67 |         elif key.startswith('s'):
68 |             convert_reslayer(key, weight, state_dict, converted_names)
69 | 
70 |     # check if all layers are converted
71 |     for key in blobs:
72 |         if key not in converted_names:
73 |             print(f'not converted: {key}')
74 |     # save checkpoint
75 |     checkpoint = dict()
76 |     checkpoint['state_dict'] = state_dict
77 |     torch.save(checkpoint, dst)
78 | 
79 | 
80 | def main():
81 |     parser = argparse.ArgumentParser(description='Convert model keys')
82 |     parser.add_argument('src', help='src detectron model path')
83 |     parser.add_argument('dst', help='save path')
84 |     args = parser.parse_args()
85 |     convert(args.src, args.dst)
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     main()
90 | 


--------------------------------------------------------------------------------
/tools/analysis_tools/get_flops.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import torch
 4 | from mmcv import Config, DictAction
 5 | 
 6 | from mmdet3d.models import build_model
 7 | 
 8 | try:
 9 |     from mmcv.cnn import get_model_complexity_info
10 | except ImportError:
11 |     raise ImportError('Please upgrade mmcv to >0.6.2')
12 | 
13 | 
14 | def parse_args():
15 |     parser = argparse.ArgumentParser(description='Train a detector')
16 |     parser.add_argument('config', help='train config file path')
17 |     parser.add_argument(
18 |         '--shape',
19 |         type=int,
20 |         nargs='+',
21 |         default=[40000, 4],
22 |         help='input point cloud size')
23 |     parser.add_argument(
24 |         '--modality',
25 |         type=str,
26 |         default='point',
27 |         choices=['point', 'image', 'multi'],
28 |         help='input data modality')
29 |     parser.add_argument(
30 |         '--cfg-options',
31 |         nargs='+',
32 |         action=DictAction,
33 |         help='override some settings in the used config, the key-value pair '
34 |         'in xxx=yyy format will be merged into config file. If the value to '
35 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
36 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
37 |         'Note that the quotation marks are necessary and that no white space '
38 |         'is allowed.')
39 |     args = parser.parse_args()
40 |     return args
41 | 
42 | 
43 | def main():
44 | 
45 |     args = parse_args()
46 | 
47 |     if args.modality == 'point':
48 |         assert len(args.shape) == 2, 'invalid input shape'
49 |         input_shape = tuple(args.shape)
50 |     elif args.modality == 'image':
51 |         if len(args.shape) == 1:
52 |             input_shape = (3, args.shape[0], args.shape[0])
53 |         elif len(args.shape) == 2:
54 |             input_shape = (3, ) + tuple(args.shape)
55 |         else:
56 |             raise ValueError('invalid input shape')
57 |     elif args.modality == 'multi':
58 |         raise NotImplementedError(
59 |             'FLOPs counter is currently not supported for models with '
60 |             'multi-modality input')
61 | 
62 |     cfg = Config.fromfile(args.config)
63 |     if args.cfg_options is not None:
64 |         cfg.merge_from_dict(args.cfg_options)
65 |     # import modules from string list.
66 |     if cfg.get('custom_imports', None):
67 |         from mmcv.utils import import_modules_from_strings
68 |         import_modules_from_strings(**cfg['custom_imports'])
69 | 
70 |     model = build_model(
71 |         cfg.model,
72 |         train_cfg=cfg.get('train_cfg'),
73 |         test_cfg=cfg.get('test_cfg'))
74 |     if torch.cuda.is_available():
75 |         model.cuda()
76 |     model.eval()
77 | 
78 |     if hasattr(model, 'forward_dummy'):
79 |         model.forward = model.forward_dummy
80 |     else:
81 |         raise NotImplementedError(
82 |             'FLOPs counter is currently not supported for {}'.format(
83 |                 model.__class__.__name__))
84 | 
85 |     flops, params = get_model_complexity_info(model, input_shape)
86 |     split_line = '=' * 30
87 |     print(f'{split_line}\nInput shape: {input_shape}\n'
88 |           f'Flops: {flops}\nParams: {params}\n{split_line}')
89 |     print('!!!Please be cautious if you use the results in papers. '
90 |           'You may need to check if all ops are supported and verify that the '
91 |           'flops computation is correct.')
92 | 
93 | 
94 | if __name__ == '__main__':
95 |     main()
96 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/util.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from .array_converter import array_converter
  3 | 
  4 | 
  5 | @array_converter(apply_to=('points', 'cam2img'))
  6 | def points_img2cam(points, cam2img):
  7 |     """Project points in image coordinates to camera coordinates.
  8 | 
  9 |     Args:
 10 |         points (torch.Tensor): 2.5D points in 2D images, [N, 3],
 11 |             3 corresponds with x, y in the image and depth.
 12 |         cam2img (torch.Tensor): Camera intrinsic matrix. The shape can be
 13 |             [3, 3], [3, 4] or [4, 4].
 14 | 
 15 |     Returns:
 16 |         torch.Tensor: points in 3D space. [N, 3],
 17 |             3 corresponds with x, y, z in 3D space.
 18 |     """
 19 |     assert cam2img.shape[0] <= 4
 20 |     assert cam2img.shape[1] <= 4
 21 |     assert points.shape[1] == 3
 22 | 
 23 |     xys = points[:, :2]
 24 |     depths = points[:, 2].view(-1, 1)
 25 |     unnormed_xys = torch.cat([xys * depths, depths], dim=1)
 26 | 
 27 |     pad_cam2img = torch.eye(4, dtype=xys.dtype, device=xys.device)
 28 |     pad_cam2img[:cam2img.shape[0], :cam2img.shape[1]] = cam2img
 29 |     inv_pad_cam2img = torch.inverse(pad_cam2img).transpose(0, 1)
 30 | 
 31 |     # Do operation in homogeneous coordinates.
 32 |     num_points = unnormed_xys.shape[0]
 33 |     homo_xys = torch.cat([unnormed_xys, xys.new_ones((num_points, 1))], dim=1)
 34 |     points3D = torch.mm(homo_xys, inv_pad_cam2img)[:, :3]
 35 | 
 36 |     return points3D
 37 | 
 38 | 
 39 | def normalize_bbox(bboxes, pc_range):
 40 |     include_velocity = (bboxes.shape[-1] % 9 == 0)
 41 |     num_properties = 9 if include_velocity else 7
 42 |     num_views = bboxes.shape[-1] // num_properties
 43 | 
 44 |     cx = bboxes[..., 0::num_properties]
 45 |     cy = bboxes[..., 1::num_properties]
 46 |     cz = bboxes[..., 2::num_properties]
 47 |     w = bboxes[..., 3::num_properties].log()
 48 |     l = bboxes[..., 4::num_properties].log()
 49 |     h = bboxes[..., 5::num_properties].log()
 50 | 
 51 |     rot = bboxes[..., 6::num_properties]
 52 |     if include_velocity:
 53 |         vx = bboxes[..., 7::num_properties]
 54 |         vy = bboxes[..., 8::num_properties]
 55 |         # (..., 10 x V)
 56 |         normalized_bboxes = torch.cat((cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1)
 57 |     else:
 58 |         # (..., 8 x V)
 59 |         normalized_bboxes = torch.cat((cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1)
 60 | 
 61 |     normalized_bboxes = normalized_bboxes.reshape(*normalized_bboxes.shape[:-1], num_properties + 1, num_views)
 62 |     # (..., V x P)
 63 |     normalized_bboxes = normalized_bboxes.transpose(-1, -2).flatten(-2)
 64 | 
 65 |     return normalized_bboxes
 66 | 
 67 | 
 68 | def denormalize_bbox(normalized_bboxes, pc_range):
 69 |     include_velocity = (normalized_bboxes.shape[-1] % 10 == 0)
 70 |     num_properties = 10 if include_velocity else 8
 71 |     num_views = normalized_bboxes.shape[-1] // num_properties
 72 | 
 73 |     # rotation
 74 |     rot_sin = normalized_bboxes[..., 6::num_properties]
 75 |     rot_cos = normalized_bboxes[..., 7::num_properties]
 76 |     rot = torch.atan2(rot_sin, rot_cos)
 77 | 
 78 |     # center in the bev
 79 |     cx = normalized_bboxes[..., 0::num_properties]
 80 |     cy = normalized_bboxes[..., 1::num_properties]
 81 |     cz = normalized_bboxes[..., 4::num_properties]
 82 | 
 83 |     # size
 84 |     w = normalized_bboxes[..., 2::num_properties]
 85 |     l = normalized_bboxes[..., 3::num_properties]
 86 |     h = normalized_bboxes[..., 5::num_properties]
 87 | 
 88 |     w = w.exp()
 89 |     l = l.exp()
 90 |     h = h.exp()
 91 |     if include_velocity:
 92 |         # velocity
 93 |         vx = normalized_bboxes[:, 8::num_properties]
 94 |         vy = normalized_bboxes[:, 9::num_properties]
 95 |         # (..., 9 x V)
 96 |         denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)
 97 |     else:
 98 |         # (..., 7 x V)
 99 |         denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)
100 | 
101 |     denormalized_bboxes = denormalized_bboxes.reshape(*denormalized_bboxes.shape[:-1], num_properties - 1, num_views)
102 |     # (..., V * P)
103 |     denormalized_bboxes = denormalized_bboxes.transpose(-1, -2).flatten(-2)
104 | 
105 |     return denormalized_bboxes


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/grid_mask.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | from PIL import Image
  5 | 
  6 | class Grid(object):
  7 |     def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
  8 |         self.use_h = use_h
  9 |         self.use_w = use_w
 10 |         self.rotate = rotate
 11 |         self.offset = offset
 12 |         self.ratio = ratio
 13 |         self.mode=mode
 14 |         self.st_prob = prob
 15 |         self.prob = prob
 16 | 
 17 |     def set_prob(self, epoch, max_epoch):
 18 |         self.prob = self.st_prob * epoch / max_epoch
 19 | 
 20 |     def __call__(self, img, label):
 21 |         if np.random.rand() > self.prob:
 22 |             return img, label
 23 |         h = img.size(1)
 24 |         w = img.size(2)
 25 |         self.d1 = 2
 26 |         self.d2 = min(h, w)
 27 |         hh = int(1.5*h)
 28 |         ww = int(1.5*w)
 29 |         d = np.random.randint(self.d1, self.d2)
 30 |         if self.ratio == 1:
 31 |             self.l = np.random.randint(1, d)
 32 |         else:
 33 |             self.l = min(max(int(d*self.ratio+0.5),1),d-1)
 34 |         mask = np.ones((hh, ww), np.float32)
 35 |         st_h = np.random.randint(d)
 36 |         st_w = np.random.randint(d)
 37 |         if self.use_h:
 38 |             for i in range(hh//d):
 39 |                 s = d*i + st_h
 40 |                 t = min(s+self.l, hh)
 41 |                 mask[s:t,:] *= 0
 42 |         if self.use_w:
 43 |             for i in range(ww//d):
 44 |                 s = d*i + st_w
 45 |                 t = min(s+self.l, ww)
 46 |                 mask[:,s:t] *= 0
 47 |        
 48 |         r = np.random.randint(self.rotate)
 49 |         mask = Image.fromarray(np.uint8(mask))
 50 |         mask = mask.rotate(r)
 51 |         mask = np.asarray(mask)
 52 |         mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
 53 | 
 54 |         mask = torch.from_numpy(mask).float()
 55 |         if self.mode == 1:
 56 |             mask = 1-mask
 57 | 
 58 |         mask = mask.expand_as(img)
 59 |         if self.offset:
 60 |             offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float()
 61 |             offset = (1 - mask) * offset
 62 |             img = img * mask + offset
 63 |         else:
 64 |             img = img * mask 
 65 | 
 66 |         return img, label
 67 | 
 68 | 
 69 | class GridMask(nn.Module):
 70 |     def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
 71 |         super(GridMask, self).__init__()
 72 |         self.use_h = use_h
 73 |         self.use_w = use_w
 74 |         self.rotate = rotate
 75 |         self.offset = offset
 76 |         self.ratio = ratio
 77 |         self.mode = mode
 78 |         self.st_prob = prob
 79 |         self.prob = prob
 80 | 
 81 |     def set_prob(self, epoch, max_epoch):
 82 |         self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5
 83 | 
 84 |     def forward(self, x):
 85 |         if np.random.rand() > self.prob or not self.training:
 86 |             return x
 87 |         n,c,h,w = x.size()
 88 |         x = x.view(-1,h,w)
 89 |         hh = int(1.5*h)
 90 |         ww = int(1.5*w)
 91 |         d = np.random.randint(2, h)
 92 |         self.l = min(max(int(d*self.ratio+0.5),1),d-1)
 93 |         mask = np.ones((hh, ww), np.float32)
 94 |         st_h = np.random.randint(d)
 95 |         st_w = np.random.randint(d)
 96 |         if self.use_h:
 97 |             for i in range(hh//d):
 98 |                 s = d*i + st_h
 99 |                 t = min(s+self.l, hh)
100 |                 mask[s:t,:] *= 0
101 |         if self.use_w:
102 |             for i in range(ww//d):
103 |                 s = d*i + st_w
104 |                 t = min(s+self.l, ww)
105 |                 mask[:,s:t] *= 0
106 |        
107 |         r = np.random.randint(self.rotate)
108 |         mask = Image.fromarray(np.uint8(mask))
109 |         mask = mask.rotate(r)
110 |         mask = np.asarray(mask)
111 |         mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
112 | 
113 |         mask = torch.from_numpy(mask).float().cuda()
114 |         if self.mode == 1:
115 |             mask = 1-mask
116 |         mask = mask.expand_as(x)
117 |         if self.offset:
118 |             offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float().cuda()
119 |             x = x * mask + offset * (1 - mask)
120 |         else:
121 |             x = x * mask 
122 | 
123 |         return x.view(n,c,h,w)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!-- omit in toc -->
 2 | # VEDet: Viewpoint Equivariance for Multi-View 3D Object Detection (CVPR 2023)
 3 | 
 4 | This is the official implementation of CVPR 2023 paper [**Viewpoint Equivariance for Multi-View 3D Object Detection**](https://arxiv.org/abs/2303.14548) authored by [Dian Chen](https://scholar.google.com/citations?user=zdAyna8AAAAJ&hl=en), [Jie Li](https://scholar.google.com/citations?user=_I3COxAAAAAJ&hl=en), [Vitor Guizilini](https://scholar.google.com/citations?user=UH9tP6QAAAAJ&hl=en), [Rares Ambrus](https://scholar.google.com/citations?user=2xjjS3oAAAAJ&hl=en), and [Adrien Gaidon](https://scholar.google.com/citations?user=2StUgf4AAAAJ&hl=en), at [Toyota Research Institute](https://www.tri.global/). We introduce viewpoint equivariance on view-conditioned object queries achieving state-of-the-art 3D object performance.
 5 | 
 6 | ![framework](media/framework.png)
 7 |  - [May 4, 2023] Our code and models are released!
 8 |  - [Mar. 27, 2023] ~~Our code and models will be released soon. Please stay tuned!~~
 9 | 
10 | <!-- omit in toc -->
11 | ## Contents
12 | - [Install](#install)
13 | - [Dataset preparation](#dataset-preparation)
14 | - [Training](#training)
15 | - [Inference](#inference)
16 | - [License](#license)
17 | - [Reference](#reference)
18 | 
19 | 
20 | ## Install
21 | 
22 | We provide instructions for using docker environment and pip/conda environment (docker is recommended for portability and reproducibility). Please refer to [INSTALL.md](docs/INSTALL.md) for detailed instructions.
23 | 
24 | ## Dataset preparation
25 | Please download the full [NuScenes dataset from the official website](https://www.nuscenes.org/nuscenes#download), and preprocess the meta data following the [instructions from MMDetection3D](https://github.com/open-mmlab/mmdetection3d/blob/master/docs/en/data_preparation.md) to obtain the `.pkl` files with mmdet3d format. For convenience we provide the preprocessed `.pkl` files for nuscenes dataset [here](https://tri-ml-public.s3.amazonaws.com/github/vedet/nuscenes_infos.zip). Put the `.pkl` files under the NuScenes folder.
26 | 
27 | ## Training
28 | To train a model with the provided configs, please run the following:
29 | ```bash
30 | # run distributed training with 8 GPUs
31 | # tools/dist_train.sh <config path> 8 --work-dir <save dir> --cfg-options <overrides>
32 | 
33 | # for example:
34 | tools/dist_train.sh projects/configs/vedet_vovnet_p4_1600x640_2vview_2frame.py 8 --work-dir work_dirs/vedet_vovnet_p4_1600x640_2vview_2frame/
35 | ```
36 | Before running the training with V2-99 backbone, please download the [DD3D](https://arxiv.org/abs/2108.06417) pre-trained weights from [here](https://tri-ml-public.s3.amazonaws.com/github/vedet/fcos3d_vovnet_imgbackbone-remapped.pth).
37 | 
38 | We provide results on the NuScenes `val` set from the paper, as summarized below.
39 | 
40 | | config | mAP | NDS | resolution | backbone | context | download |
41 | |:------:|:---:|:---:|:----------:|:-------:|:-----:|:-----:|
42 | |  [vedet_vovnet_p4_1600x640_2vview_2frame](projects/configs/vedet_vovnet_p4_1600x640_2vview_2frame.py)  | 0.451 | 0.527  | 1600x640 | V2-99 |  current + 1 past frame  |  [model](https://tri-ml-public.s3.amazonaws.com/github/vedet/vedet_vovnet_p4_1600x640_2vview_2frame/latest.pth) / [log](https://tri-ml-public.s3.amazonaws.com/github/vedet/vedet_vovnet_p4_1600x640_2vview_2frame/20230130_000443.log)   |
43 | 
44 | 
45 | ## Inference
46 | To run inference with a checkpoint, please run the following:
47 | ```bash
48 | # run distributed evaluation with 8 GPUs
49 | # tools/dist_test.sh <config path> <ckpt path> 8 --eval bbox
50 | 
51 | # for example:
52 | tools/dist_test.sh projects/configs/vedet_vovnet_p4_1600x640_2vview_2frame.py work_dirs/vedet_vovnet_p4_1600x640_2vview_2frame/latest.pth 8 --eval bbox
53 | ```
54 | 
55 | ## License
56 | We release this repo under the [CC BY-NC 4.0](LICENSE.md) license.
57 | 
58 | ## Reference
59 | If you have any questions, feel free to open an issue under this repo, or contact us at <dian.chen@tri.global>.
60 | If you find this work helpful to your research, please consider citing us:
61 | 
62 | ```
63 | @article{chen2023viewpoint,
64 |   title={Viewpoint Equivariance for Multi-View 3D Object Detection},
65 |   author={Chen, Dian and Li, Jie and Guizilini, Vitor and Ambrus, Rares and Gaidon, Adrien},
66 |   journal={arXiv preprint arXiv:2303.14548},
67 |   year={2023}
68 | }
69 | ```
70 | We also thank the authors of [detr3d](https://github.com/WangYueFt/detr3d) and [petr/petrv2](https://github.com/megvii-research/PETR).
71 | 


--------------------------------------------------------------------------------
/tools/data_converter/indoor_converter.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import mmcv
  3 | import numpy as np
  4 | import os
  5 | 
  6 | from tools.data_converter.s3dis_data_utils import S3DISData, S3DISSegData
  7 | from tools.data_converter.scannet_data_utils import ScanNetData, ScanNetSegData
  8 | from tools.data_converter.sunrgbd_data_utils import SUNRGBDData
  9 | 
 10 | 
 11 | def create_indoor_info_file(data_path,
 12 |                             pkl_prefix='sunrgbd',
 13 |                             save_path=None,
 14 |                             use_v1=False,
 15 |                             workers=4):
 16 |     """Create indoor information file.
 17 | 
 18 |     Get information of the raw data and save it to the pkl file.
 19 | 
 20 |     Args:
 21 |         data_path (str): Path of the data.
 22 |         pkl_prefix (str): Prefix of the pkl to be saved. Default: 'sunrgbd'.
 23 |         save_path (str): Path of the pkl to be saved. Default: None.
 24 |         use_v1 (bool): Whether to use v1. Default: False.
 25 |         workers (int): Number of threads to be used. Default: 4.
 26 |     """
 27 |     assert os.path.exists(data_path)
 28 |     assert pkl_prefix in ['sunrgbd', 'scannet', 's3dis'], \
 29 |         f'unsupported indoor dataset {pkl_prefix}'
 30 |     save_path = data_path if save_path is None else save_path
 31 |     assert os.path.exists(save_path)
 32 | 
 33 |     # generate infos for both detection and segmentation task
 34 |     if pkl_prefix in ['sunrgbd', 'scannet']:
 35 |         train_filename = os.path.join(save_path,
 36 |                                       f'{pkl_prefix}_infos_train.pkl')
 37 |         val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl')
 38 |         if pkl_prefix == 'sunrgbd':
 39 |             # SUN RGB-D has a train-val split
 40 |             train_dataset = SUNRGBDData(
 41 |                 root_path=data_path, split='train', use_v1=use_v1)
 42 |             val_dataset = SUNRGBDData(
 43 |                 root_path=data_path, split='val', use_v1=use_v1)
 44 |         else:
 45 |             # ScanNet has a train-val-test split
 46 |             train_dataset = ScanNetData(root_path=data_path, split='train')
 47 |             val_dataset = ScanNetData(root_path=data_path, split='val')
 48 |             test_dataset = ScanNetData(root_path=data_path, split='test')
 49 |             test_filename = os.path.join(save_path,
 50 |                                          f'{pkl_prefix}_infos_test.pkl')
 51 | 
 52 |         infos_train = train_dataset.get_infos(
 53 |             num_workers=workers, has_label=True)
 54 |         mmcv.dump(infos_train, train_filename, 'pkl')
 55 |         print(f'{pkl_prefix} info train file is saved to {train_filename}')
 56 | 
 57 |         infos_val = val_dataset.get_infos(num_workers=workers, has_label=True)
 58 |         mmcv.dump(infos_val, val_filename, 'pkl')
 59 |         print(f'{pkl_prefix} info val file is saved to {val_filename}')
 60 | 
 61 |     if pkl_prefix == 'scannet':
 62 |         infos_test = test_dataset.get_infos(
 63 |             num_workers=workers, has_label=False)
 64 |         mmcv.dump(infos_test, test_filename, 'pkl')
 65 |         print(f'{pkl_prefix} info test file is saved to {test_filename}')
 66 | 
 67 |     # generate infos for the semantic segmentation task
 68 |     # e.g. re-sampled scene indexes and label weights
 69 |     # scene indexes are used to re-sample rooms with different number of points
 70 |     # label weights are used to balance classes with different number of points
 71 |     if pkl_prefix == 'scannet':
 72 |         # label weight computation function is adopted from
 73 |         # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
 74 |         train_dataset = ScanNetSegData(
 75 |             data_root=data_path,
 76 |             ann_file=train_filename,
 77 |             split='train',
 78 |             num_points=8192,
 79 |             label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
 80 |         # TODO: do we need to generate on val set?
 81 |         val_dataset = ScanNetSegData(
 82 |             data_root=data_path,
 83 |             ann_file=val_filename,
 84 |             split='val',
 85 |             num_points=8192,
 86 |             label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
 87 |         # no need to generate for test set
 88 |         train_dataset.get_seg_infos()
 89 |         val_dataset.get_seg_infos()
 90 |     elif pkl_prefix == 's3dis':
 91 |         # S3DIS doesn't have a fixed train-val split
 92 |         # it has 6 areas instead, so we generate info file for each of them
 93 |         # in training, we will use dataset to wrap different areas
 94 |         splits = [f'Area_{i}' for i in [1, 2, 3, 4, 5, 6]]
 95 |         for split in splits:
 96 |             dataset = S3DISData(root_path=data_path, split=split)
 97 |             info = dataset.get_infos(num_workers=workers, has_label=True)
 98 |             filename = os.path.join(save_path,
 99 |                                     f'{pkl_prefix}_infos_{split}.pkl')
100 |             mmcv.dump(info, filename, 'pkl')
101 |             print(f'{pkl_prefix} info {split} file is saved to {filename}')
102 |             seg_dataset = S3DISSegData(
103 |                 data_root=data_path,
104 |                 ann_file=filename,
105 |                 split=split,
106 |                 num_points=4096,
107 |                 label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
108 |             seg_dataset.get_seg_infos()
109 | 


--------------------------------------------------------------------------------
/tools/model_converters/convert_votenet_checkpoints.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import tempfile
  4 | import torch
  5 | from mmcv import Config
  6 | from mmcv.runner import load_state_dict
  7 | 
  8 | from mmdet3d.models import build_detector
  9 | 
 10 | 
 11 | def parse_args():
 12 |     parser = argparse.ArgumentParser(
 13 |         description='MMDet3D upgrade model version(before v0.6.0) of VoteNet')
 14 |     parser.add_argument('checkpoint', help='checkpoint file')
 15 |     parser.add_argument('--out', help='path of the output checkpoint file')
 16 |     args = parser.parse_args()
 17 |     return args
 18 | 
 19 | 
 20 | def parse_config(config_strings):
 21 |     """Parse config from strings.
 22 | 
 23 |     Args:
 24 |         config_strings (string): strings of model config.
 25 | 
 26 |     Returns:
 27 |         Config: model config
 28 |     """
 29 |     temp_file = tempfile.NamedTemporaryFile()
 30 |     config_path = f'{temp_file.name}.py'
 31 |     with open(config_path, 'w') as f:
 32 |         f.write(config_strings)
 33 | 
 34 |     config = Config.fromfile(config_path)
 35 | 
 36 |     # Update backbone config
 37 |     if 'pool_mod' in config.model.backbone:
 38 |         config.model.backbone.pop('pool_mod')
 39 | 
 40 |     if 'sa_cfg' not in config.model.backbone:
 41 |         config.model.backbone['sa_cfg'] = dict(
 42 |             type='PointSAModule',
 43 |             pool_mod='max',
 44 |             use_xyz=True,
 45 |             normalize_xyz=True)
 46 | 
 47 |     if 'type' not in config.model.bbox_head.vote_aggregation_cfg:
 48 |         config.model.bbox_head.vote_aggregation_cfg['type'] = 'PointSAModule'
 49 | 
 50 |     # Update bbox_head config
 51 |     if 'pred_layer_cfg' not in config.model.bbox_head:
 52 |         config.model.bbox_head['pred_layer_cfg'] = dict(
 53 |             in_channels=128, shared_conv_channels=(128, 128), bias=True)
 54 | 
 55 |     if 'feat_channels' in config.model.bbox_head:
 56 |         config.model.bbox_head.pop('feat_channels')
 57 | 
 58 |     if 'vote_moudule_cfg' in config.model.bbox_head:
 59 |         config.model.bbox_head['vote_module_cfg'] = config.model.bbox_head.pop(
 60 |             'vote_moudule_cfg')
 61 | 
 62 |     if config.model.bbox_head.vote_aggregation_cfg.use_xyz:
 63 |         config.model.bbox_head.vote_aggregation_cfg.mlp_channels[0] -= 3
 64 | 
 65 |     temp_file.close()
 66 | 
 67 |     return config
 68 | 
 69 | 
 70 | def main():
 71 |     """Convert keys in checkpoints for VoteNet.
 72 | 
 73 |     There can be some breaking changes during the development of mmdetection3d,
 74 |     and this tool is used for upgrading checkpoints trained with old versions
 75 |     (before v0.6.0) to the latest one.
 76 |     """
 77 |     args = parse_args()
 78 |     checkpoint = torch.load(args.checkpoint)
 79 |     cfg = parse_config(checkpoint['meta']['config'])
 80 |     # Build the model and load checkpoint
 81 |     model = build_detector(
 82 |         cfg.model,
 83 |         train_cfg=cfg.get('train_cfg'),
 84 |         test_cfg=cfg.get('test_cfg'))
 85 |     orig_ckpt = checkpoint['state_dict']
 86 |     converted_ckpt = orig_ckpt.copy()
 87 | 
 88 |     if cfg['dataset_type'] == 'ScanNetDataset':
 89 |         NUM_CLASSES = 18
 90 |     elif cfg['dataset_type'] == 'SUNRGBDDataset':
 91 |         NUM_CLASSES = 10
 92 |     else:
 93 |         raise NotImplementedError
 94 | 
 95 |     RENAME_PREFIX = {
 96 |         'bbox_head.conv_pred.0': 'bbox_head.conv_pred.shared_convs.layer0',
 97 |         'bbox_head.conv_pred.1': 'bbox_head.conv_pred.shared_convs.layer1'
 98 |     }
 99 | 
100 |     DEL_KEYS = [
101 |         'bbox_head.conv_pred.0.bn.num_batches_tracked',
102 |         'bbox_head.conv_pred.1.bn.num_batches_tracked'
103 |     ]
104 | 
105 |     EXTRACT_KEYS = {
106 |         'bbox_head.conv_pred.conv_cls.weight':
107 |         ('bbox_head.conv_pred.conv_out.weight', [(0, 2), (-NUM_CLASSES, -1)]),
108 |         'bbox_head.conv_pred.conv_cls.bias':
109 |         ('bbox_head.conv_pred.conv_out.bias', [(0, 2), (-NUM_CLASSES, -1)]),
110 |         'bbox_head.conv_pred.conv_reg.weight':
111 |         ('bbox_head.conv_pred.conv_out.weight', [(2, -NUM_CLASSES)]),
112 |         'bbox_head.conv_pred.conv_reg.bias':
113 |         ('bbox_head.conv_pred.conv_out.bias', [(2, -NUM_CLASSES)])
114 |     }
115 | 
116 |     # Delete some useless keys
117 |     for key in DEL_KEYS:
118 |         converted_ckpt.pop(key)
119 | 
120 |     # Rename keys with specific prefix
121 |     RENAME_KEYS = dict()
122 |     for old_key in converted_ckpt.keys():
123 |         for rename_prefix in RENAME_PREFIX.keys():
124 |             if rename_prefix in old_key:
125 |                 new_key = old_key.replace(rename_prefix,
126 |                                           RENAME_PREFIX[rename_prefix])
127 |                 RENAME_KEYS[new_key] = old_key
128 |     for new_key, old_key in RENAME_KEYS.items():
129 |         converted_ckpt[new_key] = converted_ckpt.pop(old_key)
130 | 
131 |     # Extract weights and rename the keys
132 |     for new_key, (old_key, indices) in EXTRACT_KEYS.items():
133 |         cur_layers = orig_ckpt[old_key]
134 |         converted_layers = []
135 |         for (start, end) in indices:
136 |             if end != -1:
137 |                 converted_layers.append(cur_layers[start:end])
138 |             else:
139 |                 converted_layers.append(cur_layers[start:])
140 |         converted_layers = torch.cat(converted_layers, 0)
141 |         converted_ckpt[new_key] = converted_layers
142 |         if old_key in converted_ckpt.keys():
143 |             converted_ckpt.pop(old_key)
144 | 
145 |     # Check the converted checkpoint by loading to the model
146 |     load_state_dict(model, converted_ckpt, strict=True)
147 |     checkpoint['state_dict'] = converted_ckpt
148 |     torch.save(checkpoint, args.out)
149 | 
150 | 
151 | if __name__ == '__main__':
152 |     main()
153 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/assigners/hungarian_assigner_3d.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved.
  3 | # ------------------------------------------------------------------------
  4 | # Copyright (c) 2021 megvii-model. All Rights Reserved.
  5 | # ------------------------------------------------------------------------
  6 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d)
  7 | # Copyright (c) 2021 Wang, Yue
  8 | # ------------------------------------------------------------------------
  9 | # Modified from mmdetection (https://github.com/open-mmlab/mmdetection)
 10 | # Copyright (c) OpenMMLab. All rights reserved.
 11 | # ------------------------------------------------------------------------
 12 | import torch
 13 | 
 14 | from mmdet.core.bbox.builder import BBOX_ASSIGNERS
 15 | from mmdet.core.bbox.assigners import AssignResult
 16 | from mmdet.core.bbox.assigners import BaseAssigner
 17 | from mmdet.core.bbox.match_costs import build_match_cost
 18 | from mmdet.models.utils.transformer import inverse_sigmoid
 19 | from projects.mmdet3d_plugin.core.bbox.util import normalize_bbox
 20 | 
 21 | try:
 22 |     from scipy.optimize import linear_sum_assignment
 23 | except ImportError:
 24 |     linear_sum_assignment = None
 25 | 
 26 | 
 27 | @BBOX_ASSIGNERS.register_module()
 28 | class HungarianAssigner3D(BaseAssigner):
 29 |     """Computes one-to-one matching between predictions and ground truth.
 30 |     This class computes an assignment between the targets and the predictions
 31 |     based on the costs. The costs are weighted sum of three components:
 32 |     classification cost, regression L1 cost and regression iou cost. The
 33 |     targets don't include the no_object, so generally there are more
 34 |     predictions than targets. After the one-to-one matching, the un-matched
 35 |     are treated as backgrounds. Thus each query prediction will be assigned
 36 |     with `0` or a positive integer indicating the ground truth index:
 37 |     - 0: negative sample, no assigned gt
 38 |     - positive integer: positive sample, index (1-based) of assigned gt
 39 |     Args:
 40 |         cls_weight (int | float, optional): The scale factor for classification
 41 |             cost. Default 1.0.
 42 |         bbox_weight (int | float, optional): The scale factor for regression
 43 |             L1 cost. Default 1.0.
 44 |         iou_weight (int | float, optional): The scale factor for regression
 45 |             iou cost. Default 1.0.
 46 |         iou_calculator (dict | optional): The config for the iou calculation.
 47 |             Default type `BboxOverlaps2D`.
 48 |         iou_mode (str | optional): "iou" (intersection over union), "iof"
 49 |                 (intersection over foreground), or "giou" (generalized
 50 |                 intersection over union). Default "giou".
 51 |     """
 52 | 
 53 |     def __init__(self,
 54 |                  cls_cost=dict(type='ClassificationCost', weight=1.),
 55 |                  reg_cost=dict(type='BBoxL1Cost', weight=1.0),
 56 |                  iou_cost=dict(type='IoUCost', weight=0.0),
 57 |                  align_with_loss=False,
 58 |                  pc_range=None):
 59 |         self.cls_cost = build_match_cost(cls_cost)
 60 |         self.reg_cost = build_match_cost(reg_cost)
 61 |         self.iou_cost = build_match_cost(iou_cost)
 62 |         self.align_with_loss = align_with_loss
 63 |         self.pc_range = pc_range
 64 | 
 65 |     def assign(self, bbox_pred, cls_pred, gt_bboxes, gt_labels, gt_bboxes_ignore=None, code_weights=None, eps=1e-7):
 66 |         """Computes one-to-one matching based on the weighted costs.
 67 |         This method assign each query prediction to a ground truth or
 68 |         background. The `assigned_gt_inds` with -1 means don't care,
 69 |         0 means negative sample, and positive number is the index (1-based)
 70 |         of assigned gt.
 71 |         The assignment is done in the following steps, the order matters.
 72 |         1. assign every prediction to -1
 73 |         2. compute the weighted costs
 74 |         3. do Hungarian matching on CPU based on the costs
 75 |         4. assign all to 0 (background) first, then for each matched pair
 76 |            between predictions and gts, treat this prediction as foreground
 77 |            and assign the corresponding gt index (plus 1) to it.
 78 |         Args:
 79 |             bbox_pred (Tensor): Predicted boxes with normalized coordinates
 80 |                 (cx, cy, w, h), which are all in range [0, 1]. Shape
 81 |                 [num_query, 4].
 82 |             cls_pred (Tensor): Predicted classification logits, shape
 83 |                 [num_query, num_class].
 84 |             gt_bboxes (Tensor): Ground truth boxes with unnormalized
 85 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
 86 |             gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).
 87 |             gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are
 88 |                 labelled as `ignored`. Default None.
 89 |             eps (int | float, optional): A value added to the denominator for
 90 |                 numerical stability. Default 1e-7.
 91 |         Returns:
 92 |             :obj:`AssignResult`: The assigned result.
 93 |         """
 94 |         assert gt_bboxes_ignore is None, \
 95 |             'Only case when gt_bboxes_ignore is None is supported.'
 96 |         num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)
 97 | 
 98 |         # 1. assign -1 by default
 99 |         assigned_gt_inds = bbox_pred.new_full((num_bboxes, ), -1, dtype=torch.long)
100 |         assigned_labels = bbox_pred.new_full((num_bboxes, ), -1, dtype=torch.long)
101 |         if num_gts == 0 or num_bboxes == 0:
102 |             # No ground truth or boxes, return empty assignment
103 |             if num_gts == 0:
104 |                 # No ground truth, assign all to background
105 |                 assigned_gt_inds[:] = 0
106 |             return AssignResult(num_gts, assigned_gt_inds, None, labels=assigned_labels)
107 | 
108 |         # 2. compute the weighted costs
109 |         # classification and bboxcost.
110 |         cls_cost = self.cls_cost(cls_pred, gt_labels)
111 |         # regression L1 cost
112 |         normalized_gt_bboxes = normalize_bbox(gt_bboxes, self.pc_range)
113 |         if self.align_with_loss:
114 |             normalized_gt_bboxes = normalized_gt_bboxes * code_weights
115 |             bbox_pred = bbox_pred * code_weights
116 |             reg_cost = self.reg_cost(bbox_pred, normalized_gt_bboxes)
117 |         else:
118 |             reg_cost = self.reg_cost(bbox_pred[:, :8], normalized_gt_bboxes[:, :8])
119 | 
120 |         # weighted sum of above two costs
121 |         cost = cls_cost + reg_cost
122 | 
123 |         # 3. do Hungarian matching on CPU using linear_sum_assignment
124 |         cost = cost.detach().cpu()
125 |         if linear_sum_assignment is None:
126 |             raise ImportError('Please run "pip install scipy" '
127 |                               'to install scipy first.')
128 |         cost = torch.nan_to_num(cost, nan=100.0, posinf=100.0, neginf=-100.0)
129 |         matched_row_inds, matched_col_inds = linear_sum_assignment(cost)
130 |         matched_row_inds = torch.from_numpy(matched_row_inds).to(bbox_pred.device)
131 |         matched_col_inds = torch.from_numpy(matched_col_inds).to(bbox_pred.device)
132 | 
133 |         # 4. assign backgrounds and foregrounds
134 |         # assign all indices to backgrounds first
135 |         assigned_gt_inds[:] = 0
136 |         # assign foregrounds based on matching results
137 |         assigned_gt_inds[matched_row_inds] = matched_col_inds + 1
138 |         assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]
139 |         return AssignResult(num_gts, assigned_gt_inds, None, labels=assigned_labels)
140 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/vedet_transformer.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved.
  3 | # ------------------------------------------------------------------------
  4 | import torch
  5 | from mmcv.cnn.bricks.transformer import build_transformer_layer_sequence
  6 | from mmdet.models.utils.builder import TRANSFORMER
  7 | from mmdet.models.utils.transformer import inverse_sigmoid
  8 | from mmcv.cnn import xavier_init
  9 | from mmcv.runner.base_module import BaseModule
 10 | 
 11 | 
 12 | @TRANSFORMER.register_module()
 13 | class VETransformer(BaseModule):
 14 |     """Implements the DETR transformer.
 15 |     Following the official DETR implementation, this module copy-paste
 16 |     from torch.nn.Transformer with modifications:
 17 |         * positional encodings are passed in MultiheadAttention
 18 |         * extra LN at the end of encoder is removed
 19 |         * decoder returns a stack of activations from all decoding layers
 20 |     See `paper: End-to-End Object Detection with Transformers
 21 |     <https://arxiv.org/pdf/2005.12872>`_ for details.
 22 |     Args:
 23 |         encoder (`mmcv.ConfigDict` | Dict): Config of
 24 |             TransformerEncoder. Defaults to None.
 25 |         decoder ((`mmcv.ConfigDict` | Dict)): Config of
 26 |             TransformerDecoder. Defaults to None
 27 |         init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
 28 |             Defaults to None.
 29 |     """
 30 | 
 31 |     def __init__(self,
 32 |                  det_decoder=None,
 33 |                  seg_decoder=None,
 34 |                  use_iterative_refinement=False,
 35 |                  reduction='ego',
 36 |                  init_cfg=None):
 37 |         super(VETransformer, self).__init__(init_cfg=init_cfg)
 38 | 
 39 |         self.det_decoders = None
 40 |         if det_decoder is not None:
 41 |             self.det_decoders = build_transformer_layer_sequence(det_decoder)
 42 | 
 43 |         self.seg_decoders = None
 44 |         if seg_decoder is not None:
 45 |             self.seg_decoders = build_transformer_layer_sequence(seg_decoder)
 46 | 
 47 |         assert reduction in {'ego', 'mean'}
 48 |         self.reduction = reduction
 49 |         self.use_iterative_refinement = use_iterative_refinement
 50 | 
 51 |     def init_weights(self):
 52 |         # follow the official DETR to init parameters
 53 |         for m in self.modules():
 54 |             if hasattr(m, 'weight') and m.weight.dim() > 1:
 55 |                 xavier_init(m, distribution='uniform')
 56 |         self._is_init = True
 57 | 
 58 |     def forward(self,
 59 |                 x,
 60 |                 mask,
 61 |                 x_pos,
 62 |                 init_det_points,
 63 |                 init_det_points_mtv,
 64 |                 init_seg_points,
 65 |                 pos_encoder,
 66 |                 pos_seg_encoder,
 67 |                 reg_branch=None,
 68 |                 num_decode_views=2,
 69 |                 **kwargs):
 70 |         """Forward function for `Transformer`.
 71 |         Args:
 72 |             x (Tensor): Input query with shape [bs, c, h, w] where
 73 |                 c = embed_dims.
 74 |             mask (Tensor): The key_padding_mask used for encoder and decoder,
 75 |                 with shape [bs, h, w].
 76 |             query_embed (Tensor): The query embedding for decoder, with shape
 77 |                 [num_query, c].
 78 |             pos_embed (Tensor): The positional encoding for encoder and
 79 |                 decoder, with the same shape as `x`.
 80 |         Returns:
 81 |             tuple[Tensor]: results of decoder containing the following tensor.
 82 |                 - out_dec: Output from decoder. If return_intermediate_dec \
 83 |                       is True output has shape [num_dec_layers, bs,
 84 |                       num_query, embed_dims], else has shape [1, bs, \
 85 |                       num_query, embed_dims].
 86 |                 - memory: Output results from encoder, with shape \
 87 |                       [bs, embed_dims, h, w].
 88 |         """
 89 |         bs, n, hw, c = x.shape
 90 |         x = x.reshape(bs, n * hw, c)
 91 |         x_pos = x_pos.reshape(bs, n * hw, -1)
 92 | 
 93 |         mask = mask.view(bs, -1)  # [bs, n, h*w] -> [bs, n*h*w]
 94 | 
 95 |         # segmentation decoders
 96 |         seg_outputs = []
 97 |         if self.seg_decoders is not None:
 98 |             query_points = init_seg_points.flatten(1, -2)
 99 |             # query_embeds = pos_encoder(query_points)
100 |             query_embeds = pos_seg_encoder(query_points)
101 |             query = torch.zeros_like(query_embeds)
102 | 
103 |             seg_outputs = self.seg_decoders(
104 |                 query=query.transpose(0, 1),
105 |                 key=x.transpose(0, 1),
106 |                 value=x.transpose(0, 1),
107 |                 key_pos=None,
108 |                 query_pos=query_embeds.transpose(0, 1),
109 |                 key_padding_mask=None,
110 |                 reg_branch=None)
111 |             seg_outputs = seg_outputs.transpose(1, 2)
112 |             seg_outputs = torch.nan_to_num(seg_outputs)
113 | 
114 |         # detection decoders
115 |         det_outputs, regs = [], []
116 |         if self.det_decoders is not None:
117 |             memory = x.transpose(0, 1)
118 |             attn_masks = [None, None]
119 |             num_query = init_det_points.shape[-2]
120 |             total_num = num_query * (1 + num_decode_views)
121 |             self_attn_mask = memory.new_ones((total_num, total_num))
122 |             for i in range(1 + num_decode_views):
123 |                 self_attn_mask[i * num_query:(i + 1) * num_query, i * num_query:(i + 1) * num_query] = 0
124 |             attn_masks[0] = self_attn_mask
125 |             det_outputs, regs = self.decode_bboxes(init_det_points, init_det_points_mtv, memory, x_pos.transpose(0, 1),
126 |                                                    mask, attn_masks, pos_encoder, reg_branch, num_decode_views)
127 | 
128 |         return det_outputs, regs, seg_outputs
129 | 
130 |     def decode_bboxes(self, init_det_points, init_det_points_mtv, memory, key_pos, mask, attn_masks, pos_encoder,
131 |                       reg_branch, num_decode_views):
132 |         if init_det_points_mtv is not None:
133 |             # append queries from virtual views
134 |             query_points = torch.cat([init_det_points, init_det_points_mtv], dim=1).flatten(1, 2)
135 |         else:
136 |             query_points = init_det_points.flatten(1, 2)
137 | 
138 |         query_embeds = pos_encoder(query_points)
139 |         query = torch.zeros_like(query_embeds)
140 | 
141 |         regs = []
142 |         # output from layers' won't update next's layer's ref points
143 |         det_outputs = self.det_decoders(
144 |             query=query.transpose(0, 1),
145 |             key=memory,
146 |             value=memory,
147 |             key_pos=key_pos,
148 |             query_pos=query_embeds.transpose(0, 1),
149 |             key_padding_mask=mask,
150 |             attn_masks=attn_masks,
151 |             reg_branch=reg_branch)
152 |         det_outputs = det_outputs.transpose(1, 2)
153 |         det_outputs = torch.nan_to_num(det_outputs)
154 | 
155 |         for reg_brch, output in zip(reg_branch, det_outputs):
156 | 
157 |             reg = reg_brch(output)
158 |             reference = inverse_sigmoid(query_points[..., :3].clone())
159 |             reg[..., 0:2] += reference[..., 0:2]
160 |             reg[..., 0:2] = reg[..., 0:2].sigmoid()
161 |             reg[..., 4:5] += reference[..., 2:3]
162 |             reg[..., 4:5] = reg[..., 4:5].sigmoid()
163 | 
164 |             regs.append(reg)
165 | 
166 |         L, B, _, C = det_outputs.shape
167 |         # (L, B, V + 1, M, C)
168 |         det_outputs = det_outputs.reshape(L, B, num_decode_views + 1, -1, C)
169 |         # (L, B, V + 1, M, 10)
170 |         regs = torch.stack(regs).reshape(L, B, num_decode_views + 1, init_det_points.shape[-2], -1)
171 | 
172 |         # ego decode + mtv center decode, (L, B, M, V * 10)
173 |         regs = regs.permute(0, 1, 3, 2, 4).flatten(-2)
174 | 
175 |         return det_outputs, regs
176 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/loading.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved.
  3 | # ------------------------------------------------------------------------
  4 | # Copyright (c) 2022 megvii-model. All Rights Reserved.
  5 | # ------------------------------------------------------------------------
  6 | import math
  7 | 
  8 | import mmcv
  9 | import numpy as np
 10 | from mmdet.datasets.builder import PIPELINES
 11 | from einops import rearrange
 12 | 
 13 | 
 14 | @PIPELINES.register_module()
 15 | class LoadMapsFromFiles(object):
 16 | 
 17 |     def __init__(self, k=None):
 18 |         self.k = k
 19 | 
 20 |     def __call__(self, results):
 21 |         map_filename = results['map_filename']
 22 |         maps = np.load(map_filename)
 23 |         map_mask = maps['arr_0'].astype(np.float32)
 24 | 
 25 |         maps = map_mask.transpose((2, 0, 1))
 26 |         results['gt_map'] = maps
 27 |         maps = rearrange(maps, 'c (h h1) (w w2) -> (h w) c h1 w2 ', h1=16, w2=16)
 28 |         maps = maps.reshape(256, 3 * 256)
 29 |         results['map_shape'] = maps.shape
 30 |         results['maps'] = maps
 31 |         return results
 32 | 
 33 | 
 34 | @PIPELINES.register_module()
 35 | class LoadMultiViewImageFromMultiSweepsFiles(object):
 36 |     """Load multi channel images from a list of separate channel files.
 37 |     Expects results['img_filename'] to be a list of filenames.
 38 |     Args:
 39 |         to_float32 (bool): Whether to convert the img to float32.
 40 |             Defaults to False.
 41 |         color_type (str): Color type of the file. Defaults to 'unchanged'.
 42 |     """
 43 | 
 44 |     def __init__(
 45 |         self,
 46 |         sweeps_num=5,
 47 |         to_float32=False,
 48 |         file_client_args=dict(backend='disk'),
 49 |         pad_empty_sweeps=False,
 50 |         sweep_range=[3, 27],
 51 |         time_range=-1,
 52 |         sweeps_id=None,
 53 |         color_type='unchanged',
 54 |         sensors=['CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT'],
 55 |         test_mode=True,
 56 |         prob=1.0,
 57 |     ):
 58 | 
 59 |         self.sweeps_num = sweeps_num
 60 |         self.to_float32 = to_float32
 61 |         self.color_type = color_type
 62 |         self.file_client_args = file_client_args.copy()
 63 |         self.file_client = None
 64 |         self.pad_empty_sweeps = pad_empty_sweeps
 65 |         self.sensors = sensors
 66 |         self.test_mode = test_mode
 67 |         self.sweeps_id = sweeps_id
 68 |         self.sweep_range = sweep_range
 69 |         self.time_range = time_range
 70 |         self.prob = prob
 71 |         if self.sweeps_id:
 72 |             assert len(self.sweeps_id) == self.sweeps_num
 73 | 
 74 |     def __call__(self, results):
 75 |         """Call function to load multi-view image from files.
 76 |         Args:
 77 |             results (dict): Result dict containing multi-view image filenames.
 78 |         Returns:
 79 |             dict: The result dict containing the multi-view image data. \
 80 |                 Added keys and values are described below.
 81 |                 - filename (str): Multi-view image filenames.
 82 |                 - img (np.ndarray): Multi-view image arrays.
 83 |                 - img_shape (tuple[int]): Shape of multi-view image arrays.
 84 |                 - ori_shape (tuple[int]): Shape of original image arrays.
 85 |                 - pad_shape (tuple[int]): Shape of padded image arrays.
 86 |                 - scale_factor (float): Scale factor.
 87 |                 - img_norm_cfg (dict): Normalization configuration of images.
 88 |         """
 89 |         sweep_imgs_list = []
 90 |         timestamp_imgs_list = []
 91 |         imgs = results['img']
 92 |         img_timestamp = results['img_timestamp']
 93 |         lidar_timestamp = results['timestamp']
 94 |         img_timestamp = [lidar_timestamp - timestamp for timestamp in img_timestamp]
 95 |         sweep_imgs_list.extend(imgs)
 96 |         timestamp_imgs_list.extend(img_timestamp)
 97 |         nums = len(imgs)
 98 |         if self.pad_empty_sweeps and len(results['cam_sweeps']) == 0:
 99 |             for i in range(self.sweeps_num):
100 |                 sweep_imgs_list.extend(imgs)
101 |                 mean_time = (self.sweep_range[0] + self.sweep_range[1]) / 2.0 * 0.083
102 |                 timestamp_imgs_list.extend([time + mean_time for time in img_timestamp])
103 |                 for j in range(nums):
104 |                     results['filename'].append(results['filename'][j])
105 |                     results['lidar2img'].append(np.copy(results['lidar2img'][j]))
106 |                     results['intrinsics'].append(np.copy(results['intrinsics'][j]))
107 |                     results['extrinsics'].append(np.copy(results['extrinsics'][j]))
108 |         else:
109 |             if self.sweeps_id:
110 |                 choices = self.sweeps_id
111 |             elif len(results['cam_sweeps']) <= self.sweeps_num:
112 |                 choices = np.arange(len(results['cam_sweeps']))
113 |             elif self.test_mode:
114 |                 # choices = [int((self.sweep_range[0] + self.sweep_range[1]) / 2) - 1]
115 |                 max_range = min(self.sweep_range[1], len(results['cam_sweeps']))
116 |                 if max_range - self.sweep_range[0] < self.sweeps_num:
117 |                     choices = list(range(self.sweep_range[0], max_range))
118 |                     choices = (choices * math.ceil(self.sweeps_num / len(choices)))[:self.sweeps_num]
119 |                 else:
120 |                     interval = int((max_range - self.sweep_range[0]) / (self.sweeps_num + 1))
121 |                     choices = [self.sweep_range[0] + interval * (i + 1) for i in range(self.sweeps_num)]
122 |             else:
123 |                 if np.random.random() < self.prob:
124 |                     max_range = min(self.sweep_range[1], len(results['cam_sweeps']))
125 |                     sweep_range = list(range(self.sweep_range[0], max_range))
126 |                     choices = np.random.choice(
127 |                         sweep_range, self.sweeps_num, replace=max_range - self.sweep_range[0] < self.sweeps_num)
128 | 
129 |                 else:
130 |                     choices = [int((self.sweep_range[0] + self.sweep_range[1]) / 2) - 1]
131 | 
132 |             choices = sorted(choices)
133 |             for idx in choices:
134 |                 sweep_idx = min(idx, len(results['cam_sweeps']) - 1)
135 |                 sweep = results['cam_sweeps'][sweep_idx]
136 |                 if len(sweep.keys()) < len(self.sensors):
137 |                     sweep = results['cam_sweeps'][sweep_idx - 1]
138 |                 results['filename'].extend([sweep[sensor]['data_path'] for sensor in self.sensors])
139 | 
140 |                 img = np.stack([mmcv.imread(sweep[sensor]['data_path'], self.color_type) for sensor in self.sensors],
141 |                                axis=-1)
142 | 
143 |                 if self.to_float32:
144 |                     img = img.astype(np.float32)
145 |                 img = [img[..., i] for i in range(img.shape[-1])]
146 |                 sweep_imgs_list.extend(img)
147 |                 sweep_ts = [lidar_timestamp - sweep[sensor]['timestamp'] / 1e6 for sensor in self.sensors]
148 |                 timestamp_imgs_list.extend(sweep_ts)
149 |                 for sensor in self.sensors:
150 |                     results['lidar2img'].append(sweep[sensor]['lidar2img'])
151 |                     results['intrinsics'].append(sweep[sensor]['intrinsics'])
152 |                     # due to inverse convention in our repo
153 |                     results['extrinsics'].append(np.linalg.inv(sweep[sensor]['extrinsics']).T)
154 |         results['img'] = sweep_imgs_list
155 |         if self.time_range > 0:
156 |             timestamp_imgs_list = [time / self.time_range for time in timestamp_imgs_list]
157 |         results['timestamp'] = timestamp_imgs_list
158 | 
159 |         return results
160 | 
161 |     def __repr__(self):
162 |         """str: Return a string that describes the module."""
163 |         repr_str = self.__class__.__name__
164 |         repr_str += f'(to_float32={self.to_float32}, '
165 |         repr_str += f"color_type='{self.color_type}')"
166 |         return repr_str
167 | 


--------------------------------------------------------------------------------
/tools/analysis_tools/analyze_logs.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import json
  4 | import numpy as np
  5 | import seaborn as sns
  6 | from collections import defaultdict
  7 | from matplotlib import pyplot as plt
  8 | 
  9 | 
 10 | def cal_train_time(log_dicts, args):
 11 |     for i, log_dict in enumerate(log_dicts):
 12 |         print(f'{"-" * 5}Analyze train time of {args.json_logs[i]}{"-" * 5}')
 13 |         all_times = []
 14 |         for epoch in log_dict.keys():
 15 |             if args.include_outliers:
 16 |                 all_times.append(log_dict[epoch]['time'])
 17 |             else:
 18 |                 all_times.append(log_dict[epoch]['time'][1:])
 19 |         all_times = np.array(all_times)
 20 |         epoch_ave_time = all_times.mean(-1)
 21 |         slowest_epoch = epoch_ave_time.argmax()
 22 |         fastest_epoch = epoch_ave_time.argmin()
 23 |         std_over_epoch = epoch_ave_time.std()
 24 |         print(f'slowest epoch {slowest_epoch + 1}, '
 25 |               f'average time is {epoch_ave_time[slowest_epoch]:.4f}')
 26 |         print(f'fastest epoch {fastest_epoch + 1}, '
 27 |               f'average time is {epoch_ave_time[fastest_epoch]:.4f}')
 28 |         print(f'time std over epochs is {std_over_epoch:.4f}')
 29 |         print(f'average iter time: {np.mean(all_times):.4f} s/iter')
 30 |         print()
 31 | 
 32 | 
 33 | def plot_curve(log_dicts, args):
 34 |     if args.backend is not None:
 35 |         plt.switch_backend(args.backend)
 36 |     sns.set_style(args.style)
 37 |     # if legend is None, use {filename}_{key} as legend
 38 |     legend = args.legend
 39 |     if legend is None:
 40 |         legend = []
 41 |         for json_log in args.json_logs:
 42 |             for metric in args.keys:
 43 |                 legend.append(f'{json_log}_{metric}')
 44 |     assert len(legend) == (len(args.json_logs) * len(args.keys))
 45 |     metrics = args.keys
 46 | 
 47 |     num_metrics = len(metrics)
 48 |     for i, log_dict in enumerate(log_dicts):
 49 |         epochs = list(log_dict.keys())
 50 |         for j, metric in enumerate(metrics):
 51 |             print(f'plot curve of {args.json_logs[i]}, metric is {metric}')
 52 |             if metric not in log_dict[epochs[args.interval - 1]]:
 53 |                 raise KeyError(
 54 |                     f'{args.json_logs[i]} does not contain metric {metric}')
 55 | 
 56 |             if args.mode == 'eval':
 57 |                 if min(epochs) == args.interval:
 58 |                     x0 = args.interval
 59 |                 else:
 60 |                     # if current training is resumed from previous checkpoint
 61 |                     # we lost information in early epochs
 62 |                     # `xs` should start according to `min(epochs)`
 63 |                     if min(epochs) % args.interval == 0:
 64 |                         x0 = min(epochs)
 65 |                     else:
 66 |                         # find the first epoch that do eval
 67 |                         x0 = min(epochs) + args.interval - \
 68 |                             min(epochs) % args.interval
 69 |                 xs = np.arange(x0, max(epochs) + 1, args.interval)
 70 |                 ys = []
 71 |                 for epoch in epochs[args.interval - 1::args.interval]:
 72 |                     ys += log_dict[epoch][metric]
 73 | 
 74 |                 # if training is aborted before eval of the last epoch
 75 |                 # `xs` and `ys` will have different length and cause an error
 76 |                 # check if `ys[-1]` is empty here
 77 |                 if not log_dict[epoch][metric]:
 78 |                     xs = xs[:-1]
 79 | 
 80 |                 ax = plt.gca()
 81 |                 ax.set_xticks(xs)
 82 |                 plt.xlabel('epoch')
 83 |                 plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o')
 84 |             else:
 85 |                 xs = []
 86 |                 ys = []
 87 |                 num_iters_per_epoch = \
 88 |                     log_dict[epochs[args.interval-1]]['iter'][-1]
 89 |                 for epoch in epochs[args.interval - 1::args.interval]:
 90 |                     iters = log_dict[epoch]['iter']
 91 |                     if log_dict[epoch]['mode'][-1] == 'val':
 92 |                         iters = iters[:-1]
 93 |                     xs.append(
 94 |                         np.array(iters) + (epoch - 1) * num_iters_per_epoch)
 95 |                     ys.append(np.array(log_dict[epoch][metric][:len(iters)]))
 96 |                 xs = np.concatenate(xs)
 97 |                 ys = np.concatenate(ys)
 98 |                 plt.xlabel('iter')
 99 |                 plt.plot(
100 |                     xs, ys, label=legend[i * num_metrics + j], linewidth=0.5)
101 |             plt.legend()
102 |         if args.title is not None:
103 |             plt.title(args.title)
104 |     if args.out is None:
105 |         plt.show()
106 |     else:
107 |         print(f'save curve to: {args.out}')
108 |         plt.savefig(args.out)
109 |         plt.cla()
110 | 
111 | 
112 | def add_plot_parser(subparsers):
113 |     parser_plt = subparsers.add_parser(
114 |         'plot_curve', help='parser for plotting curves')
115 |     parser_plt.add_argument(
116 |         'json_logs',
117 |         type=str,
118 |         nargs='+',
119 |         help='path of train log in json format')
120 |     parser_plt.add_argument(
121 |         '--keys',
122 |         type=str,
123 |         nargs='+',
124 |         default=['mAP_0.25'],
125 |         help='the metric that you want to plot')
126 |     parser_plt.add_argument('--title', type=str, help='title of figure')
127 |     parser_plt.add_argument(
128 |         '--legend',
129 |         type=str,
130 |         nargs='+',
131 |         default=None,
132 |         help='legend of each plot')
133 |     parser_plt.add_argument(
134 |         '--backend', type=str, default=None, help='backend of plt')
135 |     parser_plt.add_argument(
136 |         '--style', type=str, default='dark', help='style of plt')
137 |     parser_plt.add_argument('--out', type=str, default=None)
138 |     parser_plt.add_argument('--mode', type=str, default='train')
139 |     parser_plt.add_argument('--interval', type=int, default=1)
140 | 
141 | 
142 | def add_time_parser(subparsers):
143 |     parser_time = subparsers.add_parser(
144 |         'cal_train_time',
145 |         help='parser for computing the average time per training iteration')
146 |     parser_time.add_argument(
147 |         'json_logs',
148 |         type=str,
149 |         nargs='+',
150 |         help='path of train log in json format')
151 |     parser_time.add_argument(
152 |         '--include-outliers',
153 |         action='store_true',
154 |         help='include the first value of every epoch when computing '
155 |         'the average time')
156 | 
157 | 
158 | def parse_args():
159 |     parser = argparse.ArgumentParser(description='Analyze Json Log')
160 |     # currently only support plot curve and calculate average train time
161 |     subparsers = parser.add_subparsers(dest='task', help='task parser')
162 |     add_plot_parser(subparsers)
163 |     add_time_parser(subparsers)
164 |     args = parser.parse_args()
165 |     return args
166 | 
167 | 
168 | def load_json_logs(json_logs):
169 |     # load and convert json_logs to log_dict, key is epoch, value is a sub dict
170 |     # keys of sub dict is different metrics, e.g. memory, bbox_mAP
171 |     # value of sub dict is a list of corresponding values of all iterations
172 |     log_dicts = [dict() for _ in json_logs]
173 |     for json_log, log_dict in zip(json_logs, log_dicts):
174 |         with open(json_log, 'r') as log_file:
175 |             for line in log_file:
176 |                 log = json.loads(line.strip())
177 |                 # skip lines without `epoch` field
178 |                 if 'epoch' not in log:
179 |                     continue
180 |                 epoch = log.pop('epoch')
181 |                 if epoch not in log_dict:
182 |                     log_dict[epoch] = defaultdict(list)
183 |                 for k, v in log.items():
184 |                     log_dict[epoch][k].append(v)
185 |     return log_dicts
186 | 
187 | 
188 | def main():
189 |     args = parse_args()
190 | 
191 |     json_logs = args.json_logs
192 |     for json_log in json_logs:
193 |         assert json_log.endswith('.json')
194 | 
195 |     log_dicts = load_json_logs(json_logs)
196 | 
197 |     eval(args.task)(log_dicts, args)
198 | 
199 | 
200 | if __name__ == '__main__':
201 |     main()
202 | 


--------------------------------------------------------------------------------
/tools/data_converter/nuimage_converter.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import base64
  4 | import mmcv
  5 | import numpy as np
  6 | from nuimages import NuImages
  7 | from nuimages.utils.utils import mask_decode, name_to_index_mapping
  8 | from os import path as osp
  9 | 
 10 | nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
 11 |                   'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
 12 |                   'barrier')
 13 | 
 14 | NAME_MAPPING = {
 15 |     'movable_object.barrier': 'barrier',
 16 |     'vehicle.bicycle': 'bicycle',
 17 |     'vehicle.bus.bendy': 'bus',
 18 |     'vehicle.bus.rigid': 'bus',
 19 |     'vehicle.car': 'car',
 20 |     'vehicle.construction': 'construction_vehicle',
 21 |     'vehicle.motorcycle': 'motorcycle',
 22 |     'human.pedestrian.adult': 'pedestrian',
 23 |     'human.pedestrian.child': 'pedestrian',
 24 |     'human.pedestrian.construction_worker': 'pedestrian',
 25 |     'human.pedestrian.police_officer': 'pedestrian',
 26 |     'movable_object.trafficcone': 'traffic_cone',
 27 |     'vehicle.trailer': 'trailer',
 28 |     'vehicle.truck': 'truck',
 29 | }
 30 | 
 31 | 
 32 | def parse_args():
 33 |     parser = argparse.ArgumentParser(description='Data converter arg parser')
 34 |     parser.add_argument(
 35 |         '--data-root',
 36 |         type=str,
 37 |         default='./data/nuimages',
 38 |         help='specify the root path of dataset')
 39 |     parser.add_argument(
 40 |         '--version',
 41 |         type=str,
 42 |         nargs='+',
 43 |         default=['v1.0-mini'],
 44 |         required=False,
 45 |         help='specify the dataset version')
 46 |     parser.add_argument(
 47 |         '--out-dir',
 48 |         type=str,
 49 |         default='./data/nuimages/annotations/',
 50 |         required=False,
 51 |         help='path to save the exported json')
 52 |     parser.add_argument(
 53 |         '--nproc',
 54 |         type=int,
 55 |         default=4,
 56 |         required=False,
 57 |         help='workers to process semantic masks')
 58 |     parser.add_argument('--extra-tag', type=str, default='nuimages')
 59 |     args = parser.parse_args()
 60 |     return args
 61 | 
 62 | 
 63 | def get_img_annos(nuim, img_info, cat2id, out_dir, data_root, seg_root):
 64 |     """Get semantic segmentation map for an image.
 65 | 
 66 |     Args:
 67 |         nuim (obj:`NuImages`): NuImages dataset object
 68 |         img_info (dict): Meta information of img
 69 | 
 70 |     Returns:
 71 |         np.ndarray: Semantic segmentation map of the image
 72 |     """
 73 |     sd_token = img_info['token']
 74 |     image_id = img_info['id']
 75 |     name_to_index = name_to_index_mapping(nuim.category)
 76 | 
 77 |     # Get image data.
 78 |     width, height = img_info['width'], img_info['height']
 79 |     semseg_mask = np.zeros((height, width)).astype('uint8')
 80 | 
 81 |     # Load stuff / surface regions.
 82 |     surface_anns = [
 83 |         o for o in nuim.surface_ann if o['sample_data_token'] == sd_token
 84 |     ]
 85 | 
 86 |     # Draw stuff / surface regions.
 87 |     for ann in surface_anns:
 88 |         # Get color and mask.
 89 |         category_token = ann['category_token']
 90 |         category_name = nuim.get('category', category_token)['name']
 91 |         if ann['mask'] is None:
 92 |             continue
 93 |         mask = mask_decode(ann['mask'])
 94 | 
 95 |         # Draw mask for semantic segmentation.
 96 |         semseg_mask[mask == 1] = name_to_index[category_name]
 97 | 
 98 |     # Load object instances.
 99 |     object_anns = [
100 |         o for o in nuim.object_ann if o['sample_data_token'] == sd_token
101 |     ]
102 | 
103 |     # Sort by token to ensure that objects always appear in the
104 |     # instance mask in the same order.
105 |     object_anns = sorted(object_anns, key=lambda k: k['token'])
106 | 
107 |     # Draw object instances.
108 |     # The 0 index is reserved for background; thus, the instances
109 |     # should start from index 1.
110 |     annotations = []
111 |     for i, ann in enumerate(object_anns, start=1):
112 |         # Get color, box, mask and name.
113 |         category_token = ann['category_token']
114 |         category_name = nuim.get('category', category_token)['name']
115 |         if ann['mask'] is None:
116 |             continue
117 |         mask = mask_decode(ann['mask'])
118 | 
119 |         # Draw masks for semantic segmentation and instance segmentation.
120 |         semseg_mask[mask == 1] = name_to_index[category_name]
121 | 
122 |         if category_name in NAME_MAPPING:
123 |             cat_name = NAME_MAPPING[category_name]
124 |             cat_id = cat2id[cat_name]
125 | 
126 |             x_min, y_min, x_max, y_max = ann['bbox']
127 |             # encode calibrated instance mask
128 |             mask_anno = dict()
129 |             mask_anno['counts'] = base64.b64decode(
130 |                 ann['mask']['counts']).decode()
131 |             mask_anno['size'] = ann['mask']['size']
132 | 
133 |             data_anno = dict(
134 |                 image_id=image_id,
135 |                 category_id=cat_id,
136 |                 bbox=[x_min, y_min, x_max - x_min, y_max - y_min],
137 |                 area=(x_max - x_min) * (y_max - y_min),
138 |                 segmentation=mask_anno,
139 |                 iscrowd=0)
140 |             annotations.append(data_anno)
141 | 
142 |     # after process, save semantic masks
143 |     img_filename = img_info['file_name']
144 |     seg_filename = img_filename.replace('jpg', 'png')
145 |     seg_filename = osp.join(seg_root, seg_filename)
146 |     mmcv.imwrite(semseg_mask, seg_filename)
147 |     return annotations, np.max(semseg_mask)
148 | 
149 | 
150 | def export_nuim_to_coco(nuim, data_root, out_dir, extra_tag, version, nproc):
151 |     print('Process category information')
152 |     categories = []
153 |     categories = [
154 |         dict(id=nus_categories.index(cat_name), name=cat_name)
155 |         for cat_name in nus_categories
156 |     ]
157 |     cat2id = {k_v['name']: k_v['id'] for k_v in categories}
158 | 
159 |     images = []
160 |     print('Process image meta information...')
161 |     for sample_info in mmcv.track_iter_progress(nuim.sample_data):
162 |         if sample_info['is_key_frame']:
163 |             img_idx = len(images)
164 |             images.append(
165 |                 dict(
166 |                     id=img_idx,
167 |                     token=sample_info['token'],
168 |                     file_name=sample_info['filename'],
169 |                     width=sample_info['width'],
170 |                     height=sample_info['height']))
171 | 
172 |     seg_root = f'{out_dir}semantic_masks'
173 |     mmcv.mkdir_or_exist(seg_root)
174 |     mmcv.mkdir_or_exist(osp.join(data_root, 'calibrated'))
175 | 
176 |     global process_img_anno
177 | 
178 |     def process_img_anno(img_info):
179 |         single_img_annos, max_cls_id = get_img_annos(nuim, img_info, cat2id,
180 |                                                      out_dir, data_root,
181 |                                                      seg_root)
182 |         return single_img_annos, max_cls_id
183 | 
184 |     print('Process img annotations...')
185 |     if nproc > 1:
186 |         outputs = mmcv.track_parallel_progress(
187 |             process_img_anno, images, nproc=nproc)
188 |     else:
189 |         outputs = []
190 |         for img_info in mmcv.track_iter_progress(images):
191 |             outputs.append(process_img_anno(img_info))
192 | 
193 |     # Determine the index of object annotation
194 |     print('Process annotation information...')
195 |     annotations = []
196 |     max_cls_ids = []
197 |     for single_img_annos, max_cls_id in outputs:
198 |         max_cls_ids.append(max_cls_id)
199 |         for img_anno in single_img_annos:
200 |             img_anno.update(id=len(annotations))
201 |             annotations.append(img_anno)
202 | 
203 |     max_cls_id = max(max_cls_ids)
204 |     print(f'Max ID of class in the semantic map: {max_cls_id}')
205 | 
206 |     coco_format_json = dict(
207 |         images=images, annotations=annotations, categories=categories)
208 | 
209 |     mmcv.mkdir_or_exist(out_dir)
210 |     out_file = osp.join(out_dir, f'{extra_tag}_{version}.json')
211 |     print(f'Annotation dumped to {out_file}')
212 |     mmcv.dump(coco_format_json, out_file)
213 | 
214 | 
215 | def main():
216 |     args = parse_args()
217 |     for version in args.version:
218 |         nuim = NuImages(
219 |             dataroot=args.data_root, version=version, verbose=True, lazy=True)
220 |         export_nuim_to_coco(nuim, args.data_root, args.out_dir, args.extra_tag,
221 |                             version, args.nproc)
222 | 
223 | 
224 | if __name__ == '__main__':
225 |     main()
226 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/nuscenes_dataset.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved.
  3 | # ------------------------------------------------------------------------
  4 | # Copyright (c) 2022 megvii-model. All Rights Reserved.
  5 | # ------------------------------------------------------------------------
  6 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d)
  7 | # Copyright (c) 2021 Wang, Yue
  8 | # ------------------------------------------------------------------------
  9 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d)
 10 | # Copyright (c) OpenMMLab. All rights reserved.
 11 | # ------------------------------------------------------------------------
 12 | import numpy as np
 13 | from mmdet.datasets import DATASETS
 14 | from mmdet3d.datasets import NuScenesDataset
 15 | 
 16 | 
 17 | @DATASETS.register_module()
 18 | class CustomNuScenesDataset(NuScenesDataset):
 19 |     r"""NuScenes Dataset.
 20 |     This datset only add camera intrinsics and extrinsics to the results.
 21 |     """
 22 | 
 23 |     def __init__(self,
 24 |                  ann_file,
 25 |                  pipeline=None,
 26 |                  data_root=None,
 27 |                  classes=None,
 28 |                  load_interval=1,
 29 |                  with_velocity=True,
 30 |                  modality=None,
 31 |                  box_type_3d='LiDAR',
 32 |                  filter_empty_gt=True,
 33 |                  test_mode=False,
 34 |                  eval_version='detection_cvpr_2019',
 35 |                  use_valid_flag=False):
 36 |         self.load_interval = load_interval
 37 |         self.use_valid_flag = use_valid_flag
 38 |         super().__init__(
 39 |             data_root=data_root,
 40 |             ann_file=ann_file,
 41 |             pipeline=pipeline,
 42 |             classes=classes,
 43 |             modality=modality,
 44 |             box_type_3d=box_type_3d,
 45 |             filter_empty_gt=filter_empty_gt,
 46 |             test_mode=test_mode)
 47 | 
 48 |         self.with_velocity = with_velocity
 49 |         self.eval_version = eval_version
 50 |         from nuscenes.eval.detection.config import config_factory
 51 |         self.eval_detection_configs = config_factory(self.eval_version)
 52 |         if self.modality is None:
 53 |             self.modality = dict(
 54 |                 use_camera=False,
 55 |                 use_lidar=True,
 56 |                 use_radar=False,
 57 |                 use_map=False,
 58 |                 use_external=False,
 59 |             )
 60 | 
 61 |     def _get_scene_mapping(self):
 62 |         scene_mapping = dict()
 63 |         for scene in self.nusc.scene:
 64 |             idx = 0
 65 |             sample_token = scene['first_sample_token']
 66 |             scene_mapping[sample_token] = (scene['name'], idx)
 67 |             while sample_token is not '':
 68 |                 idx += 1
 69 |                 sample_token = self.nusc.get('sample', sample_token)['next']
 70 |                 scene_mapping[sample_token] = (scene['name'], idx)
 71 | 
 72 |         return scene_mapping
 73 | 
 74 |     def get_data_info(self, index):
 75 |         """Get data info according to the given index.
 76 |         Args:
 77 |             index (int): Index of the sample data to get.
 78 |         Returns:
 79 |             dict: Data information that will be passed to the data \
 80 |                 preprocessing pipelines. It includes the following keys:
 81 | 
 82 |                 - sample_idx (str): Sample index.
 83 |                 - pts_filename (str): Filename of point clouds.
 84 |                 - sweeps (list[dict]): Infos of sweeps.
 85 |                 - timestamp (float): Sample timestamp.
 86 |                 - img_filename (str, optional): Image filename.
 87 |                 - lidar2img (list[np.ndarray], optional): Transformations \
 88 |                     from lidar to different cameras.
 89 |                 - ann_info (dict): Annotation info.
 90 |         """
 91 |         info = self.data_infos[index]
 92 |         # TODO: dirty work-around to use the pre-generated info files
 93 |         info['lidar_path'] = info['lidar_path'].replace('/data/Dataset/nuScenes', 'data/nuscenes')
 94 |         for sweep in info.get('cam_sweeps', []):
 95 |             for _, cam_info in sweep.items():
 96 |                 if isinstance(cam_info, dict):
 97 |                     cam_info['data_path'] = cam_info['data_path'].replace('/data/Dataset/nuScenes', 'data/nuscenes')
 98 |         # standard protocal modified from SECOND.Pytorch
 99 |         input_dict = dict(
100 |             sample_idx=info['token'],
101 |             pts_filename=info['lidar_path'],
102 |             sweeps=info.get('sweeps', []),  # lidar sweeps
103 |             cam_sweeps=info.get('cam_sweeps', []),  # camera sweeps
104 |             timestamp=info['timestamp'] / 1e6,
105 |         )
106 | 
107 |         if self.modality['use_camera']:
108 |             image_paths = []
109 |             lidar2img_rts = []
110 |             intrinsics = []
111 |             extrinsics = []
112 |             img_timestamp = []
113 |             for cam_type, cam_info in info['cams'].items():
114 |                 img_timestamp.append(cam_info['timestamp'] / 1e6)
115 |                 # TODO: dirty work-around to use the pre-generated info files
116 |                 cam_info['data_path'] = cam_info['data_path'].replace('/data/Dataset/nuScenes', 'data/nuscenes')
117 |                 image_paths.append(cam_info['data_path'])
118 |                 # obtain lidar to image transformation matrix
119 |                 lidar2cam_r = np.linalg.inv(cam_info['sensor2lidar_rotation'])
120 |                 lidar2cam_t = cam_info['sensor2lidar_translation'] @ lidar2cam_r.T
121 |                 lidar2cam_rt = np.eye(4)
122 |                 lidar2cam_rt[:3, :3] = lidar2cam_r.T
123 |                 lidar2cam_rt[3, :3] = -lidar2cam_t
124 |                 intrinsic = cam_info['cam_intrinsic']
125 |                 viewpad = np.eye(4)
126 |                 viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] = intrinsic
127 |                 lidar2img_rt = (viewpad @ lidar2cam_rt.T)
128 |                 # NOTE: transformation & points use column major multiplication, i.e., x' = Tx
129 |                 intrinsics.append(viewpad)
130 |                 extrinsics.append(np.linalg.inv(lidar2cam_rt.T))
131 |                 lidar2img_rts.append(lidar2img_rt)
132 | 
133 |             input_dict.update(
134 |                 dict(
135 |                     img_timestamp=img_timestamp,
136 |                     img_filename=image_paths,
137 |                     lidar2img=lidar2img_rts,
138 |                     intrinsics=intrinsics,
139 |                     extrinsics=extrinsics))
140 | 
141 |         if not self.test_mode:
142 |             annos = self.get_ann_info(index)
143 |             input_dict['ann_info'] = annos
144 |         return input_dict
145 | 
146 |     def evaluate(self,
147 |                  results,
148 |                  metric=['bbox'],
149 |                  logger=None,
150 |                  jsonfile_prefix=None,
151 |                  result_names=['pts_bbox'],
152 |                  show=False,
153 |                  out_dir=None,
154 |                  pipeline=None):
155 |         """Evaluation in nuScenes protocol.
156 | 
157 |         Args:
158 |             results (list[dict]): Testing results of the dataset.
159 |             metric (str | list[str]): Metrics to be evaluated.
160 |             logger (logging.Logger | str | None): Logger used for printing
161 |                 related information during evaluation. Default: None.
162 |             jsonfile_prefix (str | None): The prefix of json files. It includes
163 |                 the file path and the prefix of filename, e.g., "a/b/prefix".
164 |                 If not specified, a temp file will be created. Default: None.
165 |             show (bool): Whether to visualize.
166 |                 Default: False.
167 |             out_dir (str): Path to save the visualization results.
168 |                 Default: None.
169 |             pipeline (list[dict], optional): raw data loading for showing.
170 |                 Default: None.
171 | 
172 |         Returns:
173 |             dict[str, float]: Results of each evaluation metric.
174 |         """
175 |         results_dict = dict()
176 |         if 'bbox' in metric:
177 |             result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
178 | 
179 |             if isinstance(result_files, dict):
180 |                 for name in result_names:
181 |                     print('Evaluating bboxes of {}'.format(name))
182 |                     ret_dict = self._evaluate_single(result_files[name])
183 |                 results_dict.update(ret_dict)
184 |             elif isinstance(result_files, str):
185 |                 results_dict = self._evaluate_single(result_files)
186 | 
187 |             if tmp_dir is not None:
188 |                 tmp_dir.cleanup()
189 | 
190 |             if show:
191 |                 self.show(results, out_dir, pipeline=pipeline)
192 | 
193 |         return results_dict
194 | 


--------------------------------------------------------------------------------
/tools/misc/browse_dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import numpy as np
  4 | import warnings
  5 | from mmcv import Config, DictAction, mkdir_or_exist, track_iter_progress
  6 | from os import path as osp
  7 | 
  8 | from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
  9 |                                DepthInstance3DBoxes, LiDARInstance3DBoxes)
 10 | from mmdet3d.core.visualizer import (show_multi_modality_result, show_result,
 11 |                                      show_seg_result)
 12 | from mmdet3d.datasets import build_dataset
 13 | 
 14 | 
 15 | def parse_args():
 16 |     parser = argparse.ArgumentParser(description='Browse a dataset')
 17 |     parser.add_argument('config', help='train config file path')
 18 |     parser.add_argument(
 19 |         '--skip-type',
 20 |         type=str,
 21 |         nargs='+',
 22 |         default=['Normalize'],
 23 |         help='skip some useless pipeline')
 24 |     parser.add_argument(
 25 |         '--output-dir',
 26 |         default=None,
 27 |         type=str,
 28 |         help='If there is no display interface, you can save it')
 29 |     parser.add_argument(
 30 |         '--task',
 31 |         type=str,
 32 |         choices=['det', 'seg', 'multi_modality-det', 'mono-det'],
 33 |         help='Determine the visualization method depending on the task.')
 34 |     parser.add_argument(
 35 |         '--online',
 36 |         action='store_true',
 37 |         help='Whether to perform online visualization. Note that you often '
 38 |         'need a monitor to do so.')
 39 |     parser.add_argument(
 40 |         '--cfg-options',
 41 |         nargs='+',
 42 |         action=DictAction,
 43 |         help='override some settings in the used config, the key-value pair '
 44 |         'in xxx=yyy format will be merged into config file. If the value to '
 45 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 46 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 47 |         'Note that the quotation marks are necessary and that no white space '
 48 |         'is allowed.')
 49 |     args = parser.parse_args()
 50 |     return args
 51 | 
 52 | 
 53 | def build_data_cfg(config_path, skip_type, cfg_options):
 54 |     """Build data config for loading visualization data."""
 55 |     cfg = Config.fromfile(config_path)
 56 |     if cfg_options is not None:
 57 |         cfg.merge_from_dict(cfg_options)
 58 |     # import modules from string list.
 59 |     if cfg.get('custom_imports', None):
 60 |         from mmcv.utils import import_modules_from_strings
 61 |         import_modules_from_strings(**cfg['custom_imports'])
 62 |     # extract inner dataset of `RepeatDataset` as `cfg.data.train`
 63 |     # so we don't need to worry about it later
 64 |     if cfg.data.train['type'] == 'RepeatDataset':
 65 |         cfg.data.train = cfg.data.train.dataset
 66 |     # use only first dataset for `ConcatDataset`
 67 |     if cfg.data.train['type'] == 'ConcatDataset':
 68 |         cfg.data.train = cfg.data.train.datasets[0]
 69 |     train_data_cfg = cfg.data.train
 70 |     # eval_pipeline purely consists of loading functions
 71 |     # use eval_pipeline for data loading
 72 |     train_data_cfg['pipeline'] = [
 73 |         x for x in cfg.eval_pipeline if x['type'] not in skip_type
 74 |     ]
 75 | 
 76 |     return cfg
 77 | 
 78 | 
 79 | def to_depth_mode(points, bboxes):
 80 |     """Convert points and bboxes to Depth Coord and Depth Box mode."""
 81 |     if points is not None:
 82 |         points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR,
 83 |                                            Coord3DMode.DEPTH)
 84 |     if bboxes is not None:
 85 |         bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR,
 86 |                                    Box3DMode.DEPTH)
 87 |     return points, bboxes
 88 | 
 89 | 
 90 | def show_det_data(idx, dataset, out_dir, filename, show=False):
 91 |     """Visualize 3D point cloud and 3D bboxes."""
 92 |     example = dataset.prepare_train_data(idx)
 93 |     points = example['points']._data.numpy()
 94 |     gt_bboxes = dataset.get_ann_info(idx)['gt_bboxes_3d'].tensor
 95 |     if dataset.box_mode_3d != Box3DMode.DEPTH:
 96 |         points, gt_bboxes = to_depth_mode(points, gt_bboxes)
 97 |     show_result(
 98 |         points,
 99 |         gt_bboxes.clone(),
100 |         None,
101 |         out_dir,
102 |         filename,
103 |         show=show,
104 |         snapshot=True)
105 | 
106 | 
107 | def show_seg_data(idx, dataset, out_dir, filename, show=False):
108 |     """Visualize 3D point cloud and segmentation mask."""
109 |     example = dataset.prepare_train_data(idx)
110 |     points = example['points']._data.numpy()
111 |     gt_seg = example['pts_semantic_mask']._data.numpy()
112 |     show_seg_result(
113 |         points,
114 |         gt_seg.copy(),
115 |         None,
116 |         out_dir,
117 |         filename,
118 |         np.array(dataset.PALETTE),
119 |         dataset.ignore_index,
120 |         show=show,
121 |         snapshot=True)
122 | 
123 | 
124 | def show_proj_bbox_img(idx,
125 |                        dataset,
126 |                        out_dir,
127 |                        filename,
128 |                        show=False,
129 |                        is_nus_mono=False):
130 |     """Visualize 3D bboxes on 2D image by projection."""
131 |     try:
132 |         example = dataset.prepare_train_data(idx)
133 |     except AttributeError:  # for Mono-3D datasets
134 |         example = dataset.prepare_train_img(idx)
135 |     gt_bboxes = dataset.get_ann_info(idx)['gt_bboxes_3d']
136 |     img_metas = example['img_metas']._data
137 |     img = example['img']._data.numpy()
138 |     # need to transpose channel to first dim
139 |     img = img.transpose(1, 2, 0)
140 |     # no 3D gt bboxes, just show img
141 |     if gt_bboxes.tensor.shape[0] == 0:
142 |         gt_bboxes = None
143 |     if isinstance(gt_bboxes, DepthInstance3DBoxes):
144 |         show_multi_modality_result(
145 |             img,
146 |             gt_bboxes,
147 |             None,
148 |             None,
149 |             out_dir,
150 |             filename,
151 |             box_mode='depth',
152 |             img_metas=img_metas,
153 |             show=show)
154 |     elif isinstance(gt_bboxes, LiDARInstance3DBoxes):
155 |         show_multi_modality_result(
156 |             img,
157 |             gt_bboxes,
158 |             None,
159 |             img_metas['lidar2img'],
160 |             out_dir,
161 |             filename,
162 |             box_mode='lidar',
163 |             img_metas=img_metas,
164 |             show=show)
165 |     elif isinstance(gt_bboxes, CameraInstance3DBoxes):
166 |         show_multi_modality_result(
167 |             img,
168 |             gt_bboxes,
169 |             None,
170 |             img_metas['cam2img'],
171 |             out_dir,
172 |             filename,
173 |             box_mode='camera',
174 |             img_metas=img_metas,
175 |             show=show)
176 |     else:
177 |         # can't project, just show img
178 |         warnings.warn(
179 |             f'unrecognized gt box type {type(gt_bboxes)}, only show image')
180 |         show_multi_modality_result(
181 |             img, None, None, None, out_dir, filename, show=show)
182 | 
183 | 
184 | def main():
185 |     args = parse_args()
186 | 
187 |     if args.output_dir is not None:
188 |         mkdir_or_exist(args.output_dir)
189 | 
190 |     cfg = build_data_cfg(args.config, args.skip_type, args.cfg_options)
191 |     try:
192 |         dataset = build_dataset(
193 |             cfg.data.train, default_args=dict(filter_empty_gt=False))
194 |     except TypeError:  # seg dataset doesn't have `filter_empty_gt` key
195 |         dataset = build_dataset(cfg.data.train)
196 |     data_infos = dataset.data_infos
197 |     dataset_type = cfg.dataset_type
198 | 
199 |     # configure visualization mode
200 |     vis_task = args.task  # 'det', 'seg', 'multi_modality-det', 'mono-det'
201 | 
202 |     for idx, data_info in enumerate(track_iter_progress(data_infos)):
203 |         if dataset_type in ['KittiDataset', 'WaymoDataset']:
204 |             data_path = data_info['point_cloud']['velodyne_path']
205 |         elif dataset_type in [
206 |                 'ScanNetDataset', 'SUNRGBDDataset', 'ScanNetSegDataset',
207 |                 'S3DISSegDataset', 'S3DISDataset'
208 |         ]:
209 |             data_path = data_info['pts_path']
210 |         elif dataset_type in ['NuScenesDataset', 'LyftDataset']:
211 |             data_path = data_info['lidar_path']
212 |         elif dataset_type in ['NuScenesMonoDataset']:
213 |             data_path = data_info['file_name']
214 |         else:
215 |             raise NotImplementedError(
216 |                 f'unsupported dataset type {dataset_type}')
217 | 
218 |         file_name = osp.splitext(osp.basename(data_path))[0]
219 | 
220 |         if vis_task in ['det', 'multi_modality-det']:
221 |             # show 3D bboxes on 3D point clouds
222 |             show_det_data(
223 |                 idx, dataset, args.output_dir, file_name, show=args.online)
224 |         if vis_task in ['multi_modality-det', 'mono-det']:
225 |             # project 3D bboxes to 2D image
226 |             show_proj_bbox_img(
227 |                 idx,
228 |                 dataset,
229 |                 args.output_dir,
230 |                 file_name,
231 |                 show=args.online,
232 |                 is_nus_mono=(dataset_type == 'NuScenesMonoDataset'))
233 |         elif vis_task in ['seg']:
234 |             # show 3D segmentation mask on 3D point clouds
235 |             show_seg_data(
236 |                 idx, dataset, args.output_dir, file_name, show=args.online)
237 | 
238 | 
239 | if __name__ == '__main__':
240 |     main()
241 | 


--------------------------------------------------------------------------------
/tools/data_converter/sunrgbd_data_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import mmcv
  3 | import numpy as np
  4 | from concurrent import futures as futures
  5 | from os import path as osp
  6 | from scipy import io as sio
  7 | 
  8 | 
  9 | def random_sampling(points, num_points, replace=None, return_choices=False):
 10 |     """Random sampling.
 11 | 
 12 |     Sampling point cloud to a certain number of points.
 13 | 
 14 |     Args:
 15 |         points (ndarray): Point cloud.
 16 |         num_points (int): The number of samples.
 17 |         replace (bool): Whether the sample is with or without replacement.
 18 |         return_choices (bool): Whether to return choices.
 19 | 
 20 |     Returns:
 21 |         points (ndarray): Point cloud after sampling.
 22 |     """
 23 | 
 24 |     if replace is None:
 25 |         replace = (points.shape[0] < num_points)
 26 |     choices = np.random.choice(points.shape[0], num_points, replace=replace)
 27 |     if return_choices:
 28 |         return points[choices], choices
 29 |     else:
 30 |         return points[choices]
 31 | 
 32 | 
 33 | class SUNRGBDInstance(object):
 34 | 
 35 |     def __init__(self, line):
 36 |         data = line.split(' ')
 37 |         data[1:] = [float(x) for x in data[1:]]
 38 |         self.classname = data[0]
 39 |         self.xmin = data[1]
 40 |         self.ymin = data[2]
 41 |         self.xmax = data[1] + data[3]
 42 |         self.ymax = data[2] + data[4]
 43 |         self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax])
 44 |         self.centroid = np.array([data[5], data[6], data[7]])
 45 |         self.w = data[8]
 46 |         self.l = data[9]  # noqa: E741
 47 |         self.h = data[10]
 48 |         self.orientation = np.zeros((3, ))
 49 |         self.orientation[0] = data[11]
 50 |         self.orientation[1] = data[12]
 51 |         self.heading_angle = -1 * np.arctan2(self.orientation[1],
 52 |                                              self.orientation[0])
 53 |         self.box3d = np.concatenate([
 54 |             self.centroid,
 55 |             np.array([self.l * 2, self.w * 2, self.h * 2, self.heading_angle])
 56 |         ])
 57 | 
 58 | 
 59 | class SUNRGBDData(object):
 60 |     """SUNRGBD data.
 61 | 
 62 |     Generate scannet infos for sunrgbd_converter.
 63 | 
 64 |     Args:
 65 |         root_path (str): Root path of the raw data.
 66 |         split (str): Set split type of the data. Default: 'train'.
 67 |         use_v1 (bool): Whether to use v1. Default: False.
 68 |     """
 69 | 
 70 |     def __init__(self, root_path, split='train', use_v1=False):
 71 |         self.root_dir = root_path
 72 |         self.split = split
 73 |         self.split_dir = osp.join(root_path, 'sunrgbd_trainval')
 74 |         self.classes = [
 75 |             'bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
 76 |             'night_stand', 'bookshelf', 'bathtub'
 77 |         ]
 78 |         self.cat2label = {cat: self.classes.index(cat) for cat in self.classes}
 79 |         self.label2cat = {
 80 |             label: self.classes[label]
 81 |             for label in range(len(self.classes))
 82 |         }
 83 |         assert split in ['train', 'val', 'test']
 84 |         split_file = osp.join(self.split_dir, f'{split}_data_idx.txt')
 85 |         mmcv.check_file_exist(split_file)
 86 |         self.sample_id_list = map(int, mmcv.list_from_file(split_file))
 87 |         self.image_dir = osp.join(self.split_dir, 'image')
 88 |         self.calib_dir = osp.join(self.split_dir, 'calib')
 89 |         self.depth_dir = osp.join(self.split_dir, 'depth')
 90 |         if use_v1:
 91 |             self.label_dir = osp.join(self.split_dir, 'label_v1')
 92 |         else:
 93 |             self.label_dir = osp.join(self.split_dir, 'label')
 94 | 
 95 |     def __len__(self):
 96 |         return len(self.sample_id_list)
 97 | 
 98 |     def get_image(self, idx):
 99 |         img_filename = osp.join(self.image_dir, f'{idx:06d}.jpg')
100 |         return mmcv.imread(img_filename)
101 | 
102 |     def get_image_shape(self, idx):
103 |         image = self.get_image(idx)
104 |         return np.array(image.shape[:2], dtype=np.int32)
105 | 
106 |     def get_depth(self, idx):
107 |         depth_filename = osp.join(self.depth_dir, f'{idx:06d}.mat')
108 |         depth = sio.loadmat(depth_filename)['instance']
109 |         return depth
110 | 
111 |     def get_calibration(self, idx):
112 |         calib_filepath = osp.join(self.calib_dir, f'{idx:06d}.txt')
113 |         lines = [line.rstrip() for line in open(calib_filepath)]
114 |         Rt = np.array([float(x) for x in lines[0].split(' ')])
115 |         Rt = np.reshape(Rt, (3, 3), order='F').astype(np.float32)
116 |         K = np.array([float(x) for x in lines[1].split(' ')])
117 |         K = np.reshape(K, (3, 3), order='F').astype(np.float32)
118 |         return K, Rt
119 | 
120 |     def get_label_objects(self, idx):
121 |         label_filename = osp.join(self.label_dir, f'{idx:06d}.txt')
122 |         lines = [line.rstrip() for line in open(label_filename)]
123 |         objects = [SUNRGBDInstance(line) for line in lines]
124 |         return objects
125 | 
126 |     def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
127 |         """Get data infos.
128 | 
129 |         This method gets information from the raw data.
130 | 
131 |         Args:
132 |             num_workers (int): Number of threads to be used. Default: 4.
133 |             has_label (bool): Whether the data has label. Default: True.
134 |             sample_id_list (list[int]): Index list of the sample.
135 |                 Default: None.
136 | 
137 |         Returns:
138 |             infos (list[dict]): Information of the raw data.
139 |         """
140 | 
141 |         def process_single_scene(sample_idx):
142 |             print(f'{self.split} sample_idx: {sample_idx}')
143 |             # convert depth to points
144 |             SAMPLE_NUM = 50000
145 |             # TODO: Check whether can move the point
146 |             #  sampling process during training.
147 |             pc_upright_depth = self.get_depth(sample_idx)
148 |             pc_upright_depth_subsampled = random_sampling(
149 |                 pc_upright_depth, SAMPLE_NUM)
150 | 
151 |             info = dict()
152 |             pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
153 |             info['point_cloud'] = pc_info
154 | 
155 |             mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))
156 |             pc_upright_depth_subsampled.tofile(
157 |                 osp.join(self.root_dir, 'points', f'{sample_idx:06d}.bin'))
158 | 
159 |             info['pts_path'] = osp.join('points', f'{sample_idx:06d}.bin')
160 |             img_path = osp.join('image', f'{sample_idx:06d}.jpg')
161 |             image_info = {
162 |                 'image_idx': sample_idx,
163 |                 'image_shape': self.get_image_shape(sample_idx),
164 |                 'image_path': img_path
165 |             }
166 |             info['image'] = image_info
167 | 
168 |             K, Rt = self.get_calibration(sample_idx)
169 |             calib_info = {'K': K, 'Rt': Rt}
170 |             info['calib'] = calib_info
171 | 
172 |             if has_label:
173 |                 obj_list = self.get_label_objects(sample_idx)
174 |                 annotations = {}
175 |                 annotations['gt_num'] = len([
176 |                     obj.classname for obj in obj_list
177 |                     if obj.classname in self.cat2label.keys()
178 |                 ])
179 |                 if annotations['gt_num'] != 0:
180 |                     annotations['name'] = np.array([
181 |                         obj.classname for obj in obj_list
182 |                         if obj.classname in self.cat2label.keys()
183 |                     ])
184 |                     annotations['bbox'] = np.concatenate([
185 |                         obj.box2d.reshape(1, 4) for obj in obj_list
186 |                         if obj.classname in self.cat2label.keys()
187 |                     ],
188 |                                                          axis=0)
189 |                     annotations['location'] = np.concatenate([
190 |                         obj.centroid.reshape(1, 3) for obj in obj_list
191 |                         if obj.classname in self.cat2label.keys()
192 |                     ],
193 |                                                              axis=0)
194 |                     annotations['dimensions'] = 2 * np.array([
195 |                         [obj.l, obj.w, obj.h] for obj in obj_list
196 |                         if obj.classname in self.cat2label.keys()
197 |                     ])  # lwh (depth) format
198 |                     annotations['rotation_y'] = np.array([
199 |                         obj.heading_angle for obj in obj_list
200 |                         if obj.classname in self.cat2label.keys()
201 |                     ])
202 |                     annotations['index'] = np.arange(
203 |                         len(obj_list), dtype=np.int32)
204 |                     annotations['class'] = np.array([
205 |                         self.cat2label[obj.classname] for obj in obj_list
206 |                         if obj.classname in self.cat2label.keys()
207 |                     ])
208 |                     annotations['gt_boxes_upright_depth'] = np.stack(
209 |                         [
210 |                             obj.box3d for obj in obj_list
211 |                             if obj.classname in self.cat2label.keys()
212 |                         ],
213 |                         axis=0)  # (K,8)
214 |                 info['annos'] = annotations
215 |             return info
216 | 
217 |         sample_id_list = sample_id_list if \
218 |             sample_id_list is not None else self.sample_id_list
219 |         with futures.ThreadPoolExecutor(num_workers) as executor:
220 |             infos = executor.map(process_single_scene, sample_id_list)
221 |         return list(infos)
222 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved.
  3 | # ------------------------------------------------------------------------
  4 | # Copyright (c) 2021 megvii-model. All Rights Reserved.
  5 | # ------------------------------------------------------------------------
  6 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d)
  7 | # Copyright (c) 2021 Wang, Yue
  8 | # ------------------------------------------------------------------------
  9 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d)
 10 | # Copyright (c) OpenMMLab. All rights reserved.
 11 | # ------------------------------------------------------------------------
 12 | import torch
 13 | 
 14 | from mmdet.core.bbox import BaseBBoxCoder
 15 | from mmdet.core.bbox.builder import BBOX_CODERS
 16 | from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox
 17 | import torch.nn.functional as F
 18 | 
 19 | 
 20 | @BBOX_CODERS.register_module()
 21 | class NMSFreeCoder(BaseBBoxCoder):
 22 |     """Bbox coder for NMS-free detector.
 23 |     Args:
 24 |         pc_range (list[float]): Range of point cloud.
 25 |         post_center_range (list[float]): Limit of the center.
 26 |             Default: None.
 27 |         max_num (int): Max number to be kept. Default: 100.
 28 |         score_threshold (float): Threshold to filter boxes based on score.
 29 |             Default: None.
 30 |         code_size (int): Code size of bboxes. Default: 9
 31 |     """
 32 | 
 33 |     def __init__(self,
 34 |                  pc_range,
 35 |                  voxel_size=None,
 36 |                  post_center_range=None,
 37 |                  max_num=100,
 38 |                  score_threshold=None,
 39 |                  num_classes=10):
 40 | 
 41 |         self.pc_range = pc_range
 42 |         self.voxel_size = voxel_size
 43 |         self.post_center_range = post_center_range
 44 |         self.max_num = max_num
 45 |         self.score_threshold = score_threshold
 46 |         self.num_classes = num_classes
 47 | 
 48 |     def encode(self):
 49 |         pass
 50 | 
 51 |     def decode_single(self, cls_scores, bbox_preds):
 52 |         """Decode bboxes.
 53 |         Args:
 54 |             cls_scores (Tensor): Outputs from the classification head, \
 55 |                 shape [num_query, cls_out_channels]. Note \
 56 |                 cls_out_channels should includes background.
 57 |             bbox_preds (Tensor): Outputs from the regression \
 58 |                 head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
 59 |                 Shape [num_query, 9].
 60 |         Returns:
 61 |             list[dict]: Decoded boxes.
 62 |         """
 63 |         max_num = self.max_num
 64 | 
 65 |         cls_scores = cls_scores.sigmoid()
 66 |         scores, indexs = cls_scores.view(-1).topk(max_num)
 67 |         labels = indexs % self.num_classes
 68 |         bbox_index = indexs // self.num_classes
 69 |         bbox_preds = bbox_preds[bbox_index]
 70 | 
 71 |         final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)
 72 |         final_scores = scores
 73 |         final_preds = labels
 74 | 
 75 |         # use score threshold
 76 |         if self.score_threshold is not None:
 77 |             thresh_mask = final_scores > self.score_threshold
 78 |         if self.post_center_range is not None:
 79 |             self.post_center_range = torch.tensor(self.post_center_range, device=scores.device)
 80 | 
 81 |             mask = (final_box_preds[..., :3] >= self.post_center_range[:3]).all(1)
 82 |             mask &= (final_box_preds[..., :3] <= self.post_center_range[3:]).all(1)
 83 | 
 84 |             if self.score_threshold:
 85 |                 mask &= thresh_mask
 86 | 
 87 |             boxes3d = final_box_preds[mask]
 88 |             scores = final_scores[mask]
 89 |             labels = final_preds[mask]
 90 |             predictions_dict = {'bboxes': boxes3d, 'scores': scores, 'labels': labels}
 91 | 
 92 |         else:
 93 |             raise NotImplementedError('Need to reorganize output as a batch, only '
 94 |                                       'support post_center_range is not None for now!')
 95 |         return predictions_dict
 96 | 
 97 |     def decode(self, preds_dicts):
 98 |         """Decode bboxes.
 99 |         Args:
100 |             all_cls_scores (Tensor): Outputs from the classification head, \
101 |                 shape [nb_dec, bs, num_query, cls_out_channels]. Note \
102 |                 cls_out_channels should includes background.
103 |             all_bbox_preds (Tensor): Sigmoid outputs from the regression \
104 |                 head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
105 |                 Shape [nb_dec, bs, num_query, 9].
106 |         Returns:
107 |             list[dict]: Decoded boxes.
108 |         """
109 |         all_cls_scores = preds_dicts['all_cls_scores'][-1]
110 |         all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
111 | 
112 |         batch_size = all_cls_scores.size()[0]
113 |         predictions_list = []
114 |         for i in range(batch_size):
115 |             predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i]))
116 |         return predictions_list
117 | 
118 | 
119 | @BBOX_CODERS.register_module()
120 | class NMSFreeClsCoder(BaseBBoxCoder):
121 |     """Bbox coder for NMS-free detector.
122 |     Args:
123 |         pc_range (list[float]): Range of point cloud.
124 |         post_center_range (list[float]): Limit of the center.
125 |             Default: None.
126 |         max_num (int): Max number to be kept. Default: 100.
127 |         score_threshold (float): Threshold to filter boxes based on score.
128 |             Default: None.
129 |         code_size (int): Code size of bboxes. Default: 9
130 |     """
131 | 
132 |     def __init__(self,
133 |                  pc_range,
134 |                  voxel_size=None,
135 |                  post_center_range=None,
136 |                  max_num=100,
137 |                  score_threshold=None,
138 |                  num_classes=10):
139 | 
140 |         self.pc_range = pc_range
141 |         self.voxel_size = voxel_size
142 |         self.post_center_range = post_center_range
143 |         self.max_num = max_num
144 |         self.score_threshold = score_threshold
145 |         self.num_classes = num_classes
146 | 
147 |     def encode(self):
148 |         pass
149 | 
150 |     def decode_single(self, cls_scores, bbox_preds):
151 |         """Decode bboxes.
152 |         Args:
153 |             cls_scores (Tensor): Outputs from the classification head, \
154 |                 shape [num_query, cls_out_channels]. Note \
155 |                 cls_out_channels should includes background.
156 |             bbox_preds (Tensor): Outputs from the regression \
157 |                 head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
158 |                 Shape [num_query, 9].
159 |         Returns:
160 |             list[dict]: Decoded boxes.
161 |         """
162 |         max_num = self.max_num
163 | 
164 |         # cls_scores = cls_scores.sigmoid()
165 |         # scores, indexs = cls_scores.view(-1).topk(max_num)
166 |         # labels = indexs % self.num_classes
167 |         # bbox_index = indexs // self.num_classes
168 |         # bbox_preds = bbox_preds[bbox_index]
169 | 
170 |         cls_scores, labels = F.softmax(cls_scores, dim=-1)[..., :-1].max(-1)
171 |         scores, indexs = cls_scores.view(-1).topk(max_num)
172 |         labels = labels[indexs]
173 |         bbox_preds = bbox_preds[indexs]
174 | 
175 |         final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)
176 |         final_scores = scores
177 |         final_preds = labels
178 | 
179 |         # use score threshold
180 |         if self.score_threshold is not None:
181 |             thresh_mask = final_scores > self.score_threshold
182 |         if self.post_center_range is not None:
183 |             self.post_center_range = torch.tensor(self.post_center_range, device=scores.device)
184 | 
185 |             mask = (final_box_preds[..., :3] >= self.post_center_range[:3]).all(1)
186 |             mask &= (final_box_preds[..., :3] <= self.post_center_range[3:]).all(1)
187 | 
188 |             if self.score_threshold:
189 |                 mask &= thresh_mask
190 | 
191 |             boxes3d = final_box_preds[mask]
192 |             scores = final_scores[mask]
193 |             labels = final_preds[mask]
194 |             predictions_dict = {'bboxes': boxes3d, 'scores': scores, 'labels': labels}
195 | 
196 |         else:
197 |             raise NotImplementedError('Need to reorganize output as a batch, only '
198 |                                       'support post_center_range is not None for now!')
199 |         return predictions_dict
200 | 
201 |     def decode(self, preds_dicts):
202 |         """Decode bboxes.
203 |         Args:
204 |             all_cls_scores (Tensor): Outputs from the classification head, \
205 |                 shape [nb_dec, bs, num_query, cls_out_channels]. Note \
206 |                 cls_out_channels should includes background.
207 |             all_bbox_preds (Tensor): Sigmoid outputs from the regression \
208 |                 head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
209 |                 Shape [nb_dec, bs, num_query, 9].
210 |         Returns:
211 |             list[dict]: Decoded boxes.
212 |         """
213 |         all_cls_scores = preds_dicts['all_cls_scores'][-1]
214 |         all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
215 | 
216 |         batch_size = all_cls_scores.size()[0]
217 |         predictions_list = []
218 |         for i in range(batch_size):
219 |             predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i]))
220 |         return predictions_list
221 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/necks/cp_fpn.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Copyright (c) 2022 megvii-model. All Rights Reserved.
  3 | # ------------------------------------------------------------------------
  4 | # Modified from mmdetection (https://github.com/open-mmlab/mmdetection)
  5 | # Copyright (c) OpenMMLab. All rights reserved.
  6 | # ------------------------------------------------------------------------
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from mmcv.cnn import ConvModule
 10 | from mmcv.runner import BaseModule, auto_fp16
 11 | 
 12 | from mmdet.models import NECKS
 13 | 
 14 | ####This FPN remove the unused parameters which can used with checkpoint (with_cp = True in Backbone)
 15 | @NECKS.register_module()
 16 | class CPFPN(BaseModule):
 17 |     r"""Feature Pyramid Network.
 18 | 
 19 |     This is an implementation of paper `Feature Pyramid Networks for Object
 20 |     Detection <https://arxiv.org/abs/1612.03144>`_.
 21 | 
 22 |     Args:
 23 |         in_channels (List[int]): Number of input channels per scale.
 24 |         out_channels (int): Number of output channels (used at each scale)
 25 |         num_outs (int): Number of output scales.
 26 |         start_level (int): Index of the start input backbone level used to
 27 |             build the feature pyramid. Default: 0.
 28 |         end_level (int): Index of the end input backbone level (exclusive) to
 29 |             build the feature pyramid. Default: -1, which means the last level.
 30 |         add_extra_convs (bool | str): If bool, it decides whether to add conv
 31 |             layers on top of the original feature maps. Default to False.
 32 |             If True, it is equivalent to `add_extra_convs='on_input'`.
 33 |             If str, it specifies the source feature map of the extra convs.
 34 |             Only the following options are allowed
 35 | 
 36 |             - 'on_input': Last feat map of neck inputs (i.e. backbone feature).
 37 |             - 'on_lateral':  Last feature map after lateral convs.
 38 |             - 'on_output': The last output feature map after fpn convs.
 39 |         relu_before_extra_convs (bool): Whether to apply relu before the extra
 40 |             conv. Default: False.
 41 |         no_norm_on_lateral (bool): Whether to apply norm on lateral.
 42 |             Default: False.
 43 |         conv_cfg (dict): Config dict for convolution layer. Default: None.
 44 |         norm_cfg (dict): Config dict for normalization layer. Default: None.
 45 |         act_cfg (str): Config dict for activation layer in ConvModule.
 46 |             Default: None.
 47 |         upsample_cfg (dict): Config dict for interpolate layer.
 48 |             Default: `dict(mode='nearest')`
 49 |         init_cfg (dict or list[dict], optional): Initialization config dict.
 50 | 
 51 |     Example:
 52 |         >>> import torch
 53 |         >>> in_channels = [2, 3, 5, 7]
 54 |         >>> scales = [340, 170, 84, 43]
 55 |         >>> inputs = [torch.rand(1, c, s, s)
 56 |         ...           for c, s in zip(in_channels, scales)]
 57 |         >>> self = FPN(in_channels, 11, len(in_channels)).eval()
 58 |         >>> outputs = self.forward(inputs)
 59 |         >>> for i in range(len(outputs)):
 60 |         ...     print(f'outputs[{i}].shape = {outputs[i].shape}')
 61 |         outputs[0].shape = torch.Size([1, 11, 340, 340])
 62 |         outputs[1].shape = torch.Size([1, 11, 170, 170])
 63 |         outputs[2].shape = torch.Size([1, 11, 84, 84])
 64 |         outputs[3].shape = torch.Size([1, 11, 43, 43])
 65 |     """
 66 | 
 67 |     def __init__(self,
 68 |                  in_channels,
 69 |                  out_channels,
 70 |                  num_outs,
 71 |                  start_level=0,
 72 |                  end_level=-1,
 73 |                  add_extra_convs=False,
 74 |                  relu_before_extra_convs=False,
 75 |                  no_norm_on_lateral=False,
 76 |                  conv_cfg=None,
 77 |                  norm_cfg=None,
 78 |                  act_cfg=None,
 79 |                  upsample_cfg=dict(mode='nearest'),
 80 |                  init_cfg=dict(
 81 |                      type='Xavier', layer='Conv2d', distribution='uniform')):
 82 |         super(CPFPN, self).__init__(init_cfg)
 83 |         assert isinstance(in_channels, list)
 84 |         self.in_channels = in_channels
 85 |         self.out_channels = out_channels
 86 |         self.num_ins = len(in_channels)
 87 |         self.num_outs = num_outs
 88 |         self.relu_before_extra_convs = relu_before_extra_convs
 89 |         self.no_norm_on_lateral = no_norm_on_lateral
 90 |         self.fp16_enabled = False
 91 |         self.upsample_cfg = upsample_cfg.copy()
 92 | 
 93 |         if end_level == -1:
 94 |             self.backbone_end_level = self.num_ins
 95 |             assert num_outs >= self.num_ins - start_level
 96 |         else:
 97 |             # if end_level < inputs, no extra level is allowed
 98 |             self.backbone_end_level = end_level
 99 |             assert end_level <= len(in_channels)
100 |             assert num_outs == end_level - start_level
101 |         self.start_level = start_level
102 |         self.end_level = end_level
103 |         self.add_extra_convs = add_extra_convs
104 |         assert isinstance(add_extra_convs, (str, bool))
105 |         if isinstance(add_extra_convs, str):
106 |             # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'
107 |             assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')
108 |         elif add_extra_convs:  # True
109 |             self.add_extra_convs = 'on_input'
110 | 
111 |         self.lateral_convs = nn.ModuleList()
112 |         self.fpn_convs = nn.ModuleList()
113 | 
114 |         for i in range(self.start_level, self.backbone_end_level):
115 |             l_conv = ConvModule(
116 |                 in_channels[i],
117 |                 out_channels,
118 |                 1,
119 |                 conv_cfg=conv_cfg,
120 |                 norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,
121 |                 act_cfg=act_cfg,
122 |                 inplace=False)
123 |             self.lateral_convs.append(l_conv)
124 |             if i == 0 :
125 |                 fpn_conv = ConvModule(
126 |                     out_channels,
127 |                     out_channels,
128 |                     3,
129 |                     padding=1,
130 |                     conv_cfg=conv_cfg,
131 |                     norm_cfg=norm_cfg,
132 |                     act_cfg=act_cfg,
133 |                     inplace=False)
134 |                 self.fpn_convs.append(fpn_conv)
135 | 
136 |         # add extra conv layers (e.g., RetinaNet)
137 |         extra_levels = num_outs - self.backbone_end_level + self.start_level
138 |         if self.add_extra_convs and extra_levels >= 1:
139 |             for i in range(extra_levels):
140 |                 if i == 0 and self.add_extra_convs == 'on_input':
141 |                     in_channels = self.in_channels[self.backbone_end_level - 1]
142 |                 else:
143 |                     in_channels = out_channels
144 |                 extra_fpn_conv = ConvModule(
145 |                     in_channels,
146 |                     out_channels,
147 |                     3,
148 |                     stride=2,
149 |                     padding=1,
150 |                     conv_cfg=conv_cfg,
151 |                     norm_cfg=norm_cfg,
152 |                     act_cfg=act_cfg,
153 |                     inplace=False)
154 |                 self.fpn_convs.append(extra_fpn_conv)
155 | 
156 |     @auto_fp16()
157 |     def forward(self, inputs):
158 |         """Forward function."""
159 |         assert len(inputs) == len(self.in_channels)
160 | 
161 |         # build laterals
162 |         laterals = [
163 |             lateral_conv(inputs[i + self.start_level])
164 |             for i, lateral_conv in enumerate(self.lateral_convs)
165 |         ]
166 | 
167 |         # build top-down path
168 |         used_backbone_levels = len(laterals)
169 |         for i in range(used_backbone_levels - 1, 0, -1):
170 |             # In some cases, fixing `scale factor` (e.g. 2) is preferred, but
171 |             #  it cannot co-exist with `size` in `F.interpolate`.
172 |             if 'scale_factor' in self.upsample_cfg:
173 |                 laterals[i - 1] += F.interpolate(laterals[i],
174 |                                                  **self.upsample_cfg)
175 |             else:
176 |                 prev_shape = laterals[i - 1].shape[2:]
177 |                 laterals[i - 1] += F.interpolate(
178 |                     laterals[i], size=prev_shape, **self.upsample_cfg)
179 | 
180 |         # build outputs
181 |         # part 1: from original levels
182 |         outs = [
183 |             self.fpn_convs[i](laterals[i]) if i==0 else laterals[i] for i in range(used_backbone_levels)
184 |         ]
185 |         # part 2: add extra levels
186 |         if self.num_outs > len(outs):
187 |             # use max pool to get more levels on top of outputs
188 |             # (e.g., Faster R-CNN, Mask R-CNN)
189 |             if not self.add_extra_convs:
190 |                 for i in range(self.num_outs - used_backbone_levels):
191 |                     outs.append(F.max_pool2d(outs[-1], 1, stride=2))
192 |             # add conv layers on top of original feature maps (RetinaNet)
193 |             else:
194 |                 if self.add_extra_convs == 'on_input':
195 |                     extra_source = inputs[self.backbone_end_level - 1]
196 |                 elif self.add_extra_convs == 'on_lateral':
197 |                     extra_source = laterals[-1]
198 |                 elif self.add_extra_convs == 'on_output':
199 |                     extra_source = outs[-1]
200 |                 else:
201 |                     raise NotImplementedError
202 |                 outs.append(self.fpn_convs[used_backbone_levels](extra_source))
203 |                 for i in range(used_backbone_levels + 1, self.num_outs):
204 |                     if self.relu_before_extra_convs:
205 |                         outs.append(self.fpn_convs[i](F.relu(outs[-1])))
206 |                     else:
207 |                         outs.append(self.fpn_convs[i](outs[-1]))
208 |         return tuple(outs)
209 | 


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import argparse
  3 | import mmcv
  4 | import os
  5 | import torch
  6 | import warnings
  7 | from mmcv import Config, DictAction
  8 | from mmcv.cnn import fuse_conv_bn
  9 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
 10 | from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,
 11 |                          wrap_fp16_model)
 12 | 
 13 | from mmdet3d.apis import single_gpu_test
 14 | from mmdet3d.datasets import build_dataloader, build_dataset
 15 | from mmdet3d.models import build_model
 16 | from mmdet.apis import multi_gpu_test, set_random_seed
 17 | from mmdet.datasets import replace_ImageToTensor
 18 | 
 19 | 
 20 | def parse_args():
 21 |     parser = argparse.ArgumentParser(
 22 |         description='MMDet test (and eval) a model')
 23 |     parser.add_argument('config', help='test config file path')
 24 |     parser.add_argument('checkpoint', help='checkpoint file')
 25 |     parser.add_argument('--out', help='output result file in pickle format')
 26 |     parser.add_argument(
 27 |         '--fuse-conv-bn',
 28 |         action='store_true',
 29 |         help='Whether to fuse conv and bn, this will slightly increase'
 30 |         'the inference speed')
 31 |     parser.add_argument(
 32 |         '--format-only',
 33 |         action='store_true',
 34 |         help='Format the output results without perform evaluation. It is'
 35 |         'useful when you want to format the result to a specific format and '
 36 |         'submit it to the test server')
 37 |     parser.add_argument(
 38 |         '--eval',
 39 |         type=str,
 40 |         nargs='+',
 41 |         help='evaluation metrics, which depends on the dataset, e.g., "bbox",'
 42 |         ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
 43 |     parser.add_argument('--show', action='store_true', help='show results')
 44 |     parser.add_argument(
 45 |         '--show-dir', help='directory where results will be saved')
 46 |     parser.add_argument(
 47 |         '--gpu-collect',
 48 |         action='store_true',
 49 |         help='whether to use gpu to collect results.')
 50 |     parser.add_argument(
 51 |         '--tmpdir',
 52 |         help='tmp directory used for collecting results from multiple '
 53 |         'workers, available when gpu-collect is not specified')
 54 |     parser.add_argument('--seed', type=int, default=0, help='random seed')
 55 |     parser.add_argument(
 56 |         '--deterministic',
 57 |         action='store_true',
 58 |         help='whether to set deterministic options for CUDNN backend.')
 59 |     parser.add_argument(
 60 |         '--cfg-options',
 61 |         nargs='+',
 62 |         action=DictAction,
 63 |         help='override some settings in the used config, the key-value pair '
 64 |         'in xxx=yyy format will be merged into config file. If the value to '
 65 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 66 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 67 |         'Note that the quotation marks are necessary and that no white space '
 68 |         'is allowed.')
 69 |     parser.add_argument(
 70 |         '--options',
 71 |         nargs='+',
 72 |         action=DictAction,
 73 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
 74 |         'format will be kwargs for dataset.evaluate() function (deprecate), '
 75 |         'change to --eval-options instead.')
 76 |     parser.add_argument(
 77 |         '--eval-options',
 78 |         nargs='+',
 79 |         action=DictAction,
 80 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
 81 |         'format will be kwargs for dataset.evaluate() function')
 82 |     parser.add_argument(
 83 |         '--launcher',
 84 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 85 |         default='none',
 86 |         help='job launcher')
 87 |     parser.add_argument('--local_rank', type=int, default=0)
 88 |     args = parser.parse_args()
 89 |     if 'LOCAL_RANK' not in os.environ:
 90 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 91 | 
 92 |     if args.options and args.eval_options:
 93 |         raise ValueError(
 94 |             '--options and --eval-options cannot be both specified, '
 95 |             '--options is deprecated in favor of --eval-options')
 96 |     if args.options:
 97 |         warnings.warn('--options is deprecated in favor of --eval-options')
 98 |         args.eval_options = args.options
 99 |     return args
100 | 
101 | 
102 | def main():
103 |     args = parse_args()
104 | 
105 |     assert args.out or args.eval or args.format_only or args.show \
106 |         or args.show_dir, \
107 |         ('Please specify at least one operation (save/eval/format/show the '
108 |          'results / save the results) with the argument "--out", "--eval"'
109 |          ', "--format-only", "--show" or "--show-dir"')
110 | 
111 |     if args.eval and args.format_only:
112 |         raise ValueError('--eval and --format_only cannot be both specified')
113 | 
114 |     if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):
115 |         raise ValueError('The output file must be a pkl file.')
116 | 
117 |     cfg = Config.fromfile(args.config)
118 |     if args.cfg_options is not None:
119 |         cfg.merge_from_dict(args.cfg_options)
120 |     # import modules from string list.
121 |     if cfg.get('custom_imports', None):
122 |         from mmcv.utils import import_modules_from_strings
123 |         import_modules_from_strings(**cfg['custom_imports'])
124 |     
125 |     # import modules from plguin/xx, registry will be updated
126 |     if hasattr(cfg, 'plugin'):
127 |         if cfg.plugin:
128 |             import importlib
129 |             if hasattr(cfg, 'plugin_dir'):
130 |                 plugin_dir = cfg.plugin_dir
131 |                 _module_dir = os.path.dirname(plugin_dir)
132 |                 _module_dir = _module_dir.split('/')
133 |                 _module_path = _module_dir[0]
134 | 
135 |                 for m in _module_dir[1:]:
136 |                     _module_path = _module_path + '.' + m
137 |                 print(_module_path)
138 |                 plg_lib = importlib.import_module(_module_path)
139 |             else:
140 |                 # import dir is the dirpath for the config file
141 |                 _module_dir = os.path.dirname(args.config)
142 |                 _module_dir = _module_dir.split('/')
143 |                 _module_path = _module_dir[0]
144 |                 for m in _module_dir[1:]:
145 |                     _module_path = _module_path + '.' + m
146 |                 print(_module_path)
147 |                 plg_lib = importlib.import_module(_module_path)
148 | 
149 |     # set cudnn_benchmark
150 |     if cfg.get('cudnn_benchmark', False):
151 |         torch.backends.cudnn.benchmark = True
152 | 
153 |     cfg.model.pretrained = None
154 |     # in case the test dataset is concatenated
155 |     samples_per_gpu = 1
156 |     if isinstance(cfg.data.test, dict):
157 |         cfg.data.test.test_mode = True
158 |         samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
159 |         if samples_per_gpu > 1:
160 |             # Replace 'ImageToTensor' to 'DefaultFormatBundle'
161 |             cfg.data.test.pipeline = replace_ImageToTensor(
162 |                 cfg.data.test.pipeline)
163 |     elif isinstance(cfg.data.test, list):
164 |         for ds_cfg in cfg.data.test:
165 |             ds_cfg.test_mode = True
166 |         samples_per_gpu = max(
167 |             [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])
168 |         if samples_per_gpu > 1:
169 |             for ds_cfg in cfg.data.test:
170 |                 ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)
171 | 
172 |     # init distributed env first, since logger depends on the dist info.
173 |     if args.launcher == 'none':
174 |         distributed = False
175 |     else:
176 |         distributed = True
177 |         init_dist(args.launcher, **cfg.dist_params)
178 | 
179 |     # set random seeds
180 |     if args.seed is not None:
181 |         set_random_seed(args.seed, deterministic=args.deterministic)
182 | 
183 |     # build the dataloader
184 |     dataset = build_dataset(cfg.data.test)
185 |     data_loader = build_dataloader(
186 |         dataset,
187 |         samples_per_gpu=samples_per_gpu,
188 |         workers_per_gpu=cfg.data.workers_per_gpu,
189 |         dist=distributed,
190 |         shuffle=False)
191 | 
192 |     # build the model and load checkpoint
193 |     cfg.model.train_cfg = None
194 |     model = build_model(cfg.model, test_cfg=cfg.get('test_cfg'))
195 |     fp16_cfg = cfg.get('fp16', None)
196 |     if fp16_cfg is not None:
197 |         wrap_fp16_model(model)
198 |     checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')
199 |     if args.fuse_conv_bn:
200 |         model = fuse_conv_bn(model)
201 |     # old versions did not save class info in checkpoints, this walkaround is
202 |     # for backward compatibility
203 |     if 'CLASSES' in checkpoint.get('meta', {}):
204 |         model.CLASSES = checkpoint['meta']['CLASSES']
205 |     else:
206 |         model.CLASSES = dataset.CLASSES
207 |     # palette for visualization in segmentation tasks
208 |     if 'PALETTE' in checkpoint.get('meta', {}):
209 |         model.PALETTE = checkpoint['meta']['PALETTE']
210 |     elif hasattr(dataset, 'PALETTE'):
211 |         # segmentation dataset has `PALETTE` attribute
212 |         model.PALETTE = dataset.PALETTE
213 | 
214 |     if not distributed:
215 |         model = MMDataParallel(model, device_ids=[0])
216 |         outputs = single_gpu_test(model, data_loader, args.show, args.show_dir)
217 |     else:
218 |         model = MMDistributedDataParallel(
219 |             model.cuda(),
220 |             device_ids=[torch.cuda.current_device()],
221 |             broadcast_buffers=False)
222 |         outputs = multi_gpu_test(model, data_loader, args.tmpdir,
223 |                                  args.gpu_collect)
224 | 
225 |     rank, _ = get_dist_info()
226 |     if rank == 0:
227 |         if args.out:
228 |             print(f'\nwriting results to {args.out}')
229 |             mmcv.dump(outputs, args.out)
230 |         kwargs = {} if args.eval_options is None else args.eval_options
231 |         if args.format_only:
232 |             dataset.format_results(outputs, **kwargs)
233 |         if args.eval:
234 |             eval_kwargs = cfg.get('evaluation', {}).copy()
235 |             # hard-code way to remove EvalHook args
236 |             for key in [
237 |                     'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
238 |                     'rule'
239 |             ]:
240 |                 eval_kwargs.pop(key, None)
241 |             eval_kwargs.update(dict(metric=args.eval, **kwargs))
242 |             print(dataset.evaluate(outputs, **eval_kwargs))
243 | 
244 | 
245 | if __name__ == '__main__':
246 |     main()
247 | 


--------------------------------------------------------------------------------
/projects/configs/vedet_vovnet_p4_1600x640_2vview_2frame.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '/workspace/mmlab/mmdetection3d/configs/_base_/datasets/nus-3d.py',
  3 |     '/workspace/mmlab/mmdetection3d/configs/_base_/default_runtime.py'
  4 | ]
  5 | backbone_norm_cfg = dict(type='LN', requires_grad=True)
  6 | plugin = True
  7 | plugin_dir = 'projects/mmdet3d_plugin/'
  8 | 
  9 | log_config = dict(
 10 |     interval=10,
 11 |     hooks=[
 12 |         dict(type='TextLoggerHook'),
 13 |         dict(type='TensorboardLoggerHook'),
 14 |         dict(
 15 |             type='WandbLoggerHook',
 16 |             init_kwargs={'project': 'mmdet3d'},
 17 |             interval=10,
 18 |         )
 19 |     ])
 20 | 
 21 | # If point cloud range is changed, the models should also change their point
 22 | # cloud range accordingly
 23 | point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
 24 | voxel_size = [0.2, 0.2, 8]
 25 | img_norm_cfg = dict(mean=[103.530, 116.280, 123.675], std=[57.375, 57.120, 58.395], to_rgb=False)
 26 | # For nuScenes we usually do 10-class detection
 27 | class_names = [
 28 |     'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier', 'motorcycle', 'bicycle', 'pedestrian',
 29 |     'traffic_cone'
 30 | ]
 31 | input_modality = dict(use_lidar=False, use_camera=True, use_radar=False, use_map=False, use_external=False)
 32 | bands, max_freq = 64, 8
 33 | num_views = 2
 34 | code_weights = [1.0] * 10 + [0.0] * 10 * num_views
 35 | code_weights[8] = 0.2
 36 | code_weights[9] = 0.2
 37 | virtual_weights = 0.2
 38 | for i in range(1, num_views + 1):
 39 |     code_weights[i * 10] = virtual_weights  # x
 40 |     code_weights[i * 10 + 1] = virtual_weights  # y
 41 |     code_weights[i * 10 + 4] = virtual_weights  # z
 42 |     code_weights[i * 10 + 6] = virtual_weights  # sin(yaw)
 43 |     code_weights[i * 10 + 7] = virtual_weights  # cos(yaw)
 44 |     code_weights[i * 10 + 2] = virtual_weights  # w
 45 |     code_weights[i * 10 + 3] = virtual_weights  # l
 46 |     code_weights[i * 10 + 5] = virtual_weights  # h
 47 |     code_weights[i * 10 + 8] = 0.2 * virtual_weights  # vx
 48 |     code_weights[i * 10 + 9] = 0.2 * virtual_weights  # vy
 49 | model = dict(
 50 |     type='VEDet',
 51 |     use_grid_mask=True,
 52 |     img_backbone=dict(
 53 |         type='VoVNetCP',
 54 |         spec_name='V-99-eSE',
 55 |         norm_eval=True,
 56 |         frozen_stages=-1,
 57 |         input_ch=3,
 58 |         out_features=(
 59 |             'stage4',
 60 |             'stage5',
 61 |         )),
 62 |     img_neck=dict(type='CPFPN', in_channels=[768, 1024], out_channels=256, num_outs=2),
 63 |     gt_depth_sup=False,  # use cache to supervise
 64 |     pts_bbox_head=dict(
 65 |         type='VEDetHead',
 66 |         num_classes=10,
 67 |         in_channels=256,
 68 |         num_query=900,
 69 |         position_range=point_cloud_range,
 70 |         reg_hidden_dims=[512, 512],
 71 |         code_size=(num_views + 1) * 10,
 72 |         code_weights=code_weights,
 73 |         reg_channels=10,
 74 |         num_decode_views=num_views,
 75 |         with_time=True,
 76 |         det_transformer=dict(
 77 |             type='VETransformer',
 78 |             det_decoder=dict(
 79 |                 type='PETRTransformerDecoder',
 80 |                 return_intermediate=True,
 81 |                 num_layers=6,
 82 |                 transformerlayers=dict(
 83 |                     type='PETRTransformerDecoderLayer',
 84 |                     attn_cfgs=[
 85 |                         dict(type='MultiheadAttention', embed_dims=256, num_heads=8, dropout=0.1),
 86 |                         dict(type='PETRMultiheadAttention', embed_dims=256, num_heads=8, dropout=0.1),
 87 |                     ],
 88 |                     feedforward_channels=2048,
 89 |                     ffn_dropout=0.1,
 90 |                     with_cp=True,
 91 |                     operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm')),
 92 |             )),
 93 |         bbox_coder=dict(
 94 |             type='NMSFreeCoder',
 95 |             post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
 96 |             pc_range=point_cloud_range,
 97 |             max_num=300,
 98 |             voxel_size=voxel_size,
 99 |             num_classes=10),
100 |         input_ray_encoding=dict(
101 |             type='FourierMLPEncoding',
102 |             input_channels=10,
103 |             hidden_dims=[int(1.5 * 10 * 2 * bands)],
104 |             embed_dim=256,
105 |             fourier_type='linear',
106 |             fourier_channels=10 * 2 * bands,
107 |             max_frequency=max_freq),
108 |         output_det_encoding=dict(
109 |             type='FourierMLPEncoding',
110 |             input_channels=10,
111 |             hidden_dims=[int(1.5 * 10 * 2 * bands)],
112 |             embed_dim=256,
113 |             fourier_type='linear',
114 |             fourier_channels=10 * 2 * bands,
115 |             max_frequency=max_freq),
116 |         loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=2.0),
117 |         loss_bbox=dict(type='L1Loss', loss_weight=0.25),
118 |         loss_iou=dict(type='GIoULoss', loss_weight=0.0),
119 |     ),
120 |     # model training and testing settings
121 |     train_cfg=dict(
122 |         pts=dict(
123 |             grid_size=[512, 512, 1],
124 |             voxel_size=voxel_size,
125 |             point_cloud_range=point_cloud_range,
126 |             out_size_factor=4,
127 |             assigner=dict(
128 |                 type='HungarianAssigner3D',
129 |                 cls_cost=dict(type='FocalLossCost', weight=2.0),
130 |                 reg_cost=dict(type='BBox3DL1Cost', weight=0.25),
131 |                 iou_cost=dict(type='IoUCost',
132 |                               weight=0.0),  # Fake cost. This is just to make it compatible with DETR head. 
133 |                 align_with_loss=True,
134 |                 pc_range=point_cloud_range))))
135 | 
136 | dataset_type = 'CustomNuScenesDataset'
137 | data_root = 'data/nuscenes/'
138 | 
139 | file_client_args = dict(backend='disk')
140 | ida_aug_conf = {
141 |     "resize_lim": (0.94, 1.25),
142 |     "final_dim": (640, 1600),
143 |     "bot_pct_lim": (0.0, 0.0),
144 |     "rot_lim": (0.0, 0.0),
145 |     "H": 900,
146 |     "W": 1600,
147 |     "rand_flip": True,
148 | }
149 | meta_keys = ('filename', 'ori_shape', 'img_shape', 'lidar2img', 'depth2img', 'cam2img', 'pad_shape', 'scale_factor',
150 |              'flip', 'pcd_horizontal_flip', 'pcd_vertical_flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg',
151 |              'pcd_trans', 'sample_idx', 'pcd_scale_factor', 'pcd_rotation', 'pts_filename', 'transformation_3d_flow',
152 |              'intrinsics', 'extrinsics', 'scale_ratio', 'dec_extrinsics', 'timestamp')
153 | train_pipeline = [
154 |     dict(type='LoadMultiViewImageFromFiles', to_float32=True),
155 |     dict(
156 |         type='LoadMultiViewImageFromMultiSweepsFiles',
157 |         sweeps_num=1,
158 |         to_float32=True,
159 |         pad_empty_sweeps=True,
160 |         test_mode=False,
161 |         time_range=3,
162 |         sweep_range=[3, 27]),
163 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
164 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
165 |     dict(type='ObjectNameFilter', classes=class_names),
166 |     dict(type='ResizeCropFlipImageFull3D', data_aug_conf=ida_aug_conf, training=True),
167 |     dict(
168 |         type='GlobalRotScaleTransImage',
169 |         rot_range=[-0.3925, 0.3925],
170 |         translation_std=[0, 0, 0],
171 |         scale_ratio_range=[0.95, 1.05],
172 |         reverse_angle=True,
173 |         training=True),
174 |     dict(type='ComputeMultiviewTargets', local_frame=True, visible_only=False, use_virtual=True, num_views=num_views),
175 |     dict(type='NormalizeMultiviewImage', **img_norm_cfg),
176 |     dict(type='PadMultiViewImage', size_divisor=32),
177 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
178 |     dict(type='Collect3D', keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'], meta_keys=meta_keys)
179 | ]
180 | test_pipeline = [
181 |     dict(type='LoadMultiViewImageFromFiles', to_float32=True),
182 |     dict(
183 |         type='LoadMultiViewImageFromMultiSweepsFiles',
184 |         sweeps_num=1,
185 |         to_float32=True,
186 |         pad_empty_sweeps=True,
187 |         time_range=3,
188 |         sweep_range=[3, 27]),
189 |     dict(type='ResizeCropFlipImageFull3D', data_aug_conf=ida_aug_conf, training=False),
190 |     dict(type='ComputeMultiviewTargets', local_frame=True, visible_only=False),
191 |     dict(type='NormalizeMultiviewImage', **img_norm_cfg),
192 |     dict(type='PadMultiViewImage', size_divisor=32),
193 |     dict(
194 |         type='MultiScaleFlipAug3D',
195 |         img_scale=(1333, 800),
196 |         pts_scale_ratio=1,
197 |         flip=False,
198 |         transforms=[
199 |             dict(type='DefaultFormatBundle3D', class_names=class_names, with_label=False),
200 |             dict(type='Collect3D', keys=['img'], meta_keys=meta_keys)
201 |         ])
202 | ]
203 | 
204 | data = dict(
205 |     samples_per_gpu=1,
206 |     workers_per_gpu=4,
207 |     train=dict(
208 |         type=dataset_type,
209 |         data_root=data_root,
210 |         ann_file=data_root + 'mmdet3d_nuscenes_30f_infos_train.pkl',
211 |         pipeline=train_pipeline,
212 |         classes=class_names,
213 |         modality=input_modality,
214 |         test_mode=False,
215 |         use_valid_flag=True,
216 |         # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
217 |         # and box_type_3d='Depth' in sunrgbd and scannet dataset.
218 |         box_type_3d='LiDAR'),
219 |     val=dict(
220 |         type=dataset_type,
221 |         pipeline=test_pipeline,
222 |         ann_file=data_root + 'mmdet3d_nuscenes_30f_infos_val.pkl',
223 |         classes=class_names,
224 |         modality=input_modality),
225 |     test=dict(
226 |         type=dataset_type,
227 |         pipeline=test_pipeline,
228 |         ann_file=data_root + 'mmdet3d_nuscenes_30f_infos_val.pkl',
229 |         classes=class_names,
230 |         modality=input_modality))
231 | 
232 | optimizer = dict(
233 |     type='AdamW', lr=2e-4, paramwise_cfg=dict(custom_keys={
234 |         'img_backbone': dict(lr_mult=0.1),
235 |     }), weight_decay=0.01)
236 | 
237 | optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512., grad_clip=dict(max_norm=35, norm_type=2))
238 | 
239 | # learning policy
240 | lr_config = dict(
241 |     policy='CosineAnnealing',
242 |     warmup='linear',
243 |     warmup_iters=500,
244 |     warmup_ratio=1.0 / 3,
245 |     min_lr_ratio=1e-3,
246 | )
247 | total_epochs = 24
248 | evaluation = dict(interval=2, pipeline=test_pipeline, metric=['bbox'])
249 | checkpoint_config = dict(interval=24)
250 | find_unused_parameters = False
251 | 
252 | runner = dict(type='EpochBasedRunner', max_epochs=total_epochs)
253 | load_from = 'ckpts/fcos3d_vovnet_imgbackbone-remapped.pth'
254 | resume_from = None
255 | 


--------------------------------------------------------------------------------
/tools/data_converter/s3dis_data_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import mmcv
  3 | import numpy as np
  4 | import os
  5 | from concurrent import futures as futures
  6 | from os import path as osp
  7 | 
  8 | 
  9 | class S3DISData(object):
 10 |     """S3DIS data.
 11 | 
 12 |     Generate s3dis infos for s3dis_converter.
 13 | 
 14 |     Args:
 15 |         root_path (str): Root path of the raw data.
 16 |         split (str): Set split type of the data. Default: 'Area_1'.
 17 |     """
 18 | 
 19 |     def __init__(self, root_path, split='Area_1'):
 20 |         self.root_dir = root_path
 21 |         self.split = split
 22 |         self.data_dir = osp.join(root_path,
 23 |                                  'Stanford3dDataset_v1.2_Aligned_Version')
 24 | 
 25 |         # Following `GSDN <https://arxiv.org/abs/2006.12356>`_, use 5 furniture
 26 |         # classes for detection: table, chair, sofa, bookcase, board.
 27 |         self.cat_ids = np.array([7, 8, 9, 10, 11])
 28 |         self.cat_ids2class = {
 29 |             cat_id: i
 30 |             for i, cat_id in enumerate(list(self.cat_ids))
 31 |         }
 32 | 
 33 |         assert split in [
 34 |             'Area_1', 'Area_2', 'Area_3', 'Area_4', 'Area_5', 'Area_6'
 35 |         ]
 36 |         self.sample_id_list = os.listdir(osp.join(self.data_dir,
 37 |                                                   split))  # conferenceRoom_1
 38 |         for sample_id in self.sample_id_list:
 39 |             if os.path.isfile(osp.join(self.data_dir, split, sample_id)):
 40 |                 self.sample_id_list.remove(sample_id)
 41 | 
 42 |     def __len__(self):
 43 |         return len(self.sample_id_list)
 44 | 
 45 |     def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
 46 |         """Get data infos.
 47 | 
 48 |         This method gets information from the raw data.
 49 | 
 50 |         Args:
 51 |             num_workers (int): Number of threads to be used. Default: 4.
 52 |             has_label (bool): Whether the data has label. Default: True.
 53 |             sample_id_list (list[int]): Index list of the sample.
 54 |                 Default: None.
 55 | 
 56 |         Returns:
 57 |             infos (list[dict]): Information of the raw data.
 58 |         """
 59 | 
 60 |         def process_single_scene(sample_idx):
 61 |             print(f'{self.split} sample_idx: {sample_idx}')
 62 |             info = dict()
 63 |             pc_info = {
 64 |                 'num_features': 6,
 65 |                 'lidar_idx': f'{self.split}_{sample_idx}'
 66 |             }
 67 |             info['point_cloud'] = pc_info
 68 |             pts_filename = osp.join(self.root_dir, 's3dis_data',
 69 |                                     f'{self.split}_{sample_idx}_point.npy')
 70 |             pts_instance_mask_path = osp.join(
 71 |                 self.root_dir, 's3dis_data',
 72 |                 f'{self.split}_{sample_idx}_ins_label.npy')
 73 |             pts_semantic_mask_path = osp.join(
 74 |                 self.root_dir, 's3dis_data',
 75 |                 f'{self.split}_{sample_idx}_sem_label.npy')
 76 | 
 77 |             points = np.load(pts_filename).astype(np.float32)
 78 |             pts_instance_mask = np.load(pts_instance_mask_path).astype(np.int)
 79 |             pts_semantic_mask = np.load(pts_semantic_mask_path).astype(np.int)
 80 | 
 81 |             mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))
 82 |             mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask'))
 83 |             mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask'))
 84 | 
 85 |             points.tofile(
 86 |                 osp.join(self.root_dir, 'points',
 87 |                          f'{self.split}_{sample_idx}.bin'))
 88 |             pts_instance_mask.tofile(
 89 |                 osp.join(self.root_dir, 'instance_mask',
 90 |                          f'{self.split}_{sample_idx}.bin'))
 91 |             pts_semantic_mask.tofile(
 92 |                 osp.join(self.root_dir, 'semantic_mask',
 93 |                          f'{self.split}_{sample_idx}.bin'))
 94 | 
 95 |             info['pts_path'] = osp.join('points',
 96 |                                         f'{self.split}_{sample_idx}.bin')
 97 |             info['pts_instance_mask_path'] = osp.join(
 98 |                 'instance_mask', f'{self.split}_{sample_idx}.bin')
 99 |             info['pts_semantic_mask_path'] = osp.join(
100 |                 'semantic_mask', f'{self.split}_{sample_idx}.bin')
101 |             info['annos'] = self.get_bboxes(points, pts_instance_mask,
102 |                                             pts_semantic_mask)
103 | 
104 |             return info
105 | 
106 |         sample_id_list = sample_id_list if sample_id_list is not None \
107 |             else self.sample_id_list
108 |         with futures.ThreadPoolExecutor(num_workers) as executor:
109 |             infos = executor.map(process_single_scene, sample_id_list)
110 |         return list(infos)
111 | 
112 |     def get_bboxes(self, points, pts_instance_mask, pts_semantic_mask):
113 |         """Convert instance masks to axis-aligned bounding boxes.
114 | 
115 |         Args:
116 |             points (np.array): Scene points of shape (n, 6).
117 |             pts_instance_mask (np.ndarray): Instance labels of shape (n,).
118 |             pts_semantic_mask (np.ndarray): Semantic labels of shape (n,).
119 | 
120 |         Returns:
121 |             dict: A dict containing detection infos with following keys:
122 | 
123 |                 - gt_boxes_upright_depth (np.ndarray): Bounding boxes
124 |                     of shape (n, 6)
125 |                 - class (np.ndarray): Box labels of shape (n,)
126 |                 - gt_num (int): Number of boxes.
127 |         """
128 |         bboxes, labels = [], []
129 |         for i in range(1, pts_instance_mask.max()):
130 |             ids = pts_instance_mask == i
131 |             # check if all instance points have same semantic label
132 |             assert pts_semantic_mask[ids].min() == pts_semantic_mask[ids].max()
133 |             label = pts_semantic_mask[ids][0]
134 |             # keep only furniture objects
135 |             if label in self.cat_ids2class:
136 |                 labels.append(self.cat_ids2class[pts_semantic_mask[ids][0]])
137 |                 pts = points[:, :3][ids]
138 |                 min_pts = pts.min(axis=0)
139 |                 max_pts = pts.max(axis=0)
140 |                 locations = (min_pts + max_pts) / 2
141 |                 dimensions = max_pts - min_pts
142 |                 bboxes.append(np.concatenate((locations, dimensions)))
143 |         annotation = dict()
144 |         # follow ScanNet and SUN RGB-D keys
145 |         annotation['gt_boxes_upright_depth'] = np.array(bboxes)
146 |         annotation['class'] = np.array(labels)
147 |         annotation['gt_num'] = len(labels)
148 |         return annotation
149 | 
150 | 
151 | class S3DISSegData(object):
152 |     """S3DIS dataset used to generate infos for semantic segmentation task.
153 | 
154 |     Args:
155 |         data_root (str): Root path of the raw data.
156 |         ann_file (str): The generated scannet infos.
157 |         split (str): Set split type of the data. Default: 'train'.
158 |         num_points (int): Number of points in each data input. Default: 8192.
159 |         label_weight_func (function): Function to compute the label weight.
160 |             Default: None.
161 |     """
162 | 
163 |     def __init__(self,
164 |                  data_root,
165 |                  ann_file,
166 |                  split='Area_1',
167 |                  num_points=4096,
168 |                  label_weight_func=None):
169 |         self.data_root = data_root
170 |         self.data_infos = mmcv.load(ann_file)
171 |         self.split = split
172 |         self.num_points = num_points
173 | 
174 |         self.all_ids = np.arange(13)  # all possible ids
175 |         self.cat_ids = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
176 |                                  12])  # used for seg task
177 |         self.ignore_index = len(self.cat_ids)
178 | 
179 |         self.cat_id2class = np.ones((self.all_ids.shape[0],), dtype=np.int) * \
180 |             self.ignore_index
181 |         for i, cat_id in enumerate(self.cat_ids):
182 |             self.cat_id2class[cat_id] = i
183 | 
184 |         # label weighting function is taken from
185 |         # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
186 |         self.label_weight_func = (lambda x: 1.0 / np.log(1.2 + x)) if \
187 |             label_weight_func is None else label_weight_func
188 | 
189 |     def get_seg_infos(self):
190 |         scene_idxs, label_weight = self.get_scene_idxs_and_label_weight()
191 |         save_folder = osp.join(self.data_root, 'seg_info')
192 |         mmcv.mkdir_or_exist(save_folder)
193 |         np.save(
194 |             osp.join(save_folder, f'{self.split}_resampled_scene_idxs.npy'),
195 |             scene_idxs)
196 |         np.save(
197 |             osp.join(save_folder, f'{self.split}_label_weight.npy'),
198 |             label_weight)
199 |         print(f'{self.split} resampled scene index and label weight saved')
200 | 
201 |     def _convert_to_label(self, mask):
202 |         """Convert class_id in loaded segmentation mask to label."""
203 |         if isinstance(mask, str):
204 |             if mask.endswith('npy'):
205 |                 mask = np.load(mask)
206 |             else:
207 |                 mask = np.fromfile(mask, dtype=np.long)
208 |         label = self.cat_id2class[mask]
209 |         return label
210 | 
211 |     def get_scene_idxs_and_label_weight(self):
212 |         """Compute scene_idxs for data sampling and label weight for loss \
213 |         calculation.
214 | 
215 |         We sample more times for scenes with more points. Label_weight is
216 |         inversely proportional to number of class points.
217 |         """
218 |         num_classes = len(self.cat_ids)
219 |         num_point_all = []
220 |         label_weight = np.zeros((num_classes + 1, ))  # ignore_index
221 |         for data_info in self.data_infos:
222 |             label = self._convert_to_label(
223 |                 osp.join(self.data_root, data_info['pts_semantic_mask_path']))
224 |             num_point_all.append(label.shape[0])
225 |             class_count, _ = np.histogram(label, range(num_classes + 2))
226 |             label_weight += class_count
227 | 
228 |         # repeat scene_idx for num_scene_point // num_sample_point times
229 |         sample_prob = np.array(num_point_all) / float(np.sum(num_point_all))
230 |         num_iter = int(np.sum(num_point_all) / float(self.num_points))
231 |         scene_idxs = []
232 |         for idx in range(len(self.data_infos)):
233 |             scene_idxs.extend([idx] * int(round(sample_prob[idx] * num_iter)))
234 |         scene_idxs = np.array(scene_idxs).astype(np.int32)
235 | 
236 |         # calculate label weight, adopted from PointNet++
237 |         label_weight = label_weight[:-1].astype(np.float32)
238 |         label_weight = label_weight / label_weight.sum()
239 |         label_weight = self.label_weight_func(label_weight).astype(np.float32)
240 | 
241 |         return scene_idxs, label_weight
242 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Copyright (c) 2022 megvii-model. All Rights Reserved.
  3 | # ------------------------------------------------------------------------
  4 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d)
  5 | # Copyright (c) 2021 Wang, Yue
  6 | # ------------------------------------------------------------------------
  7 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d)
  8 | # Copyright (c) OpenMMLab. All rights reserved.
  9 | # ------------------------------------------------------------------------
 10 | 
 11 | from __future__ import division
 12 | 
 13 | import argparse
 14 | import copy
 15 | import mmcv
 16 | import os
 17 | import time
 18 | import torch
 19 | import warnings
 20 | from mmcv import Config, DictAction
 21 | from mmcv.runner import get_dist_info, init_dist
 22 | from os import path as osp
 23 | 
 24 | from mmdet import __version__ as mmdet_version
 25 | from mmdet3d import __version__ as mmdet3d_version
 26 | from mmdet3d.apis import train_model
 27 | from mmdet3d.datasets import build_dataset
 28 | from mmdet3d.models import build_model
 29 | from mmdet3d.utils import collect_env, get_root_logger
 30 | from mmdet.apis import set_random_seed
 31 | from mmseg import __version__ as mmseg_version
 32 | from mmdet.utils import get_device
 33 | 
 34 | def parse_args():
 35 |     parser = argparse.ArgumentParser(description='Train a detector')
 36 |     parser.add_argument('config', help='train config file path')
 37 |     parser.add_argument('--work-dir', help='the dir to save logs and models')
 38 |     parser.add_argument(
 39 |         '--resume-from', help='the checkpoint file to resume from')
 40 |     parser.add_argument(
 41 |         '--no-validate',
 42 |         action='store_true',
 43 |         help='whether not to evaluate the checkpoint during training')
 44 |     group_gpus = parser.add_mutually_exclusive_group()
 45 |     group_gpus.add_argument(
 46 |         '--gpus',
 47 |         type=int,
 48 |         help='number of gpus to use '
 49 |         '(only applicable to non-distributed training)')
 50 |     group_gpus.add_argument(
 51 |         '--gpu-ids',
 52 |         type=int,
 53 |         nargs='+',
 54 |         help='ids of gpus to use '
 55 |         '(only applicable to non-distributed training)')
 56 |     parser.add_argument('--seed', type=int, default=0, help='random seed')
 57 |     parser.add_argument(
 58 |         '--deterministic',
 59 |         action='store_true',
 60 |         help='whether to set deterministic options for CUDNN backend.')
 61 |     parser.add_argument(
 62 |         '--options',
 63 |         nargs='+',
 64 |         action=DictAction,
 65 |         help='override some settings in the used config, the key-value pair '
 66 |         'in xxx=yyy format will be merged into config file (deprecate), '
 67 |         'change to --cfg-options instead.')
 68 |     parser.add_argument(
 69 |         '--cfg-options',
 70 |         nargs='+',
 71 |         action=DictAction,
 72 |         help='override some settings in the used config, the key-value pair '
 73 |         'in xxx=yyy format will be merged into config file. If the value to '
 74 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
 75 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
 76 |         'Note that the quotation marks are necessary and that no white space '
 77 |         'is allowed.')
 78 |     parser.add_argument(
 79 |         '--launcher',
 80 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 81 |         default='none',
 82 |         help='job launcher')
 83 |     parser.add_argument('--local_rank', type=int, default=0)
 84 |     parser.add_argument(
 85 |         '--autoscale-lr',
 86 |         action='store_true',
 87 |         help='automatically scale lr with the number of gpus')
 88 |     args = parser.parse_args()
 89 |     if 'LOCAL_RANK' not in os.environ:
 90 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 91 | 
 92 |     if args.options and args.cfg_options:
 93 |         raise ValueError(
 94 |             '--options and --cfg-options cannot be both specified, '
 95 |             '--options is deprecated in favor of --cfg-options')
 96 |     if args.options:
 97 |         warnings.warn('--options is deprecated in favor of --cfg-options')
 98 |         args.cfg_options = args.options
 99 | 
100 |     return args
101 | 
102 | 
103 | def main():
104 |     args = parse_args()
105 | 
106 |     cfg = Config.fromfile(args.config)
107 |     if args.cfg_options is not None:
108 |         cfg.merge_from_dict(args.cfg_options)
109 |     # import modules from string list.
110 |     if cfg.get('custom_imports', None):
111 |         from mmcv.utils import import_modules_from_strings
112 |         import_modules_from_strings(**cfg['custom_imports'])
113 | 
114 |     # import modules from plguin/xx, registry will be updated
115 |     if hasattr(cfg, 'plugin'):
116 |         if cfg.plugin:
117 |             import importlib
118 |             if hasattr(cfg, 'plugin_dir'):
119 |                 plugin_dir = cfg.plugin_dir
120 |                 _module_dir = os.path.dirname(plugin_dir)
121 |                 _module_dir = _module_dir.split('/')
122 |                 _module_path = _module_dir[0]
123 | 
124 |                 for m in _module_dir[1:]:
125 |                     _module_path = _module_path + '.' + m
126 |                 print(_module_path)
127 |                 plg_lib = importlib.import_module(_module_path)
128 |             else:
129 |                 # import dir is the dirpath for the config file
130 |                 _module_dir = os.path.dirname(args.config)
131 |                 _module_dir = _module_dir.split('/')
132 |                 _module_path = _module_dir[0]
133 |                 for m in _module_dir[1:]:
134 |                     _module_path = _module_path + '.' + m
135 |                 print(_module_path)
136 |                 plg_lib = importlib.import_module(_module_path)
137 | 
138 |     # set cudnn_benchmark
139 |     if cfg.get('cudnn_benchmark', False):
140 |         torch.backends.cudnn.benchmark = True
141 | 
142 |     # work_dir is determined in this priority: CLI > segment in file > filename
143 |     if args.work_dir is not None:
144 |         # update configs according to CLI args if args.work_dir is not None
145 |         cfg.work_dir = args.work_dir
146 |     elif cfg.get('work_dir', None) is None:
147 |         # use config filename as default work_dir if cfg.work_dir is None
148 |         cfg.work_dir = osp.join('./work_dirs',
149 |                                 osp.splitext(osp.basename(args.config))[0])
150 |     if args.resume_from is not None:
151 |         cfg.resume_from = args.resume_from
152 |     if args.gpu_ids is not None:
153 |         cfg.gpu_ids = args.gpu_ids
154 |     else:
155 |         cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
156 | 
157 |     if args.autoscale_lr:
158 |         # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
159 |         cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
160 | 
161 |     # init distributed env first, since logger depends on the dist info.
162 |     if args.launcher == 'none':
163 |         distributed = False
164 |     else:
165 |         distributed = True
166 |         init_dist(args.launcher, **cfg.dist_params)
167 |         # re-set gpu_ids with distributed training mode
168 |         _, world_size = get_dist_info()
169 |         cfg.gpu_ids = range(world_size)
170 | 
171 |     # create work_dir
172 |     mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
173 |     # dump config
174 |     cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
175 |     # init the logger before other steps
176 |     timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
177 |     log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
178 |     # specify logger name, if we still use 'mmdet', the output info will be
179 |     # filtered and won't be saved in the log_file
180 |     # TODO: ugly workaround to judge whether we are training det or seg model
181 |     if cfg.model.type in ['EncoderDecoder3D']:
182 |         logger_name = 'mmseg'
183 |     else:
184 |         logger_name = 'mmdet'
185 |     logger = get_root_logger(
186 |         log_file=log_file, log_level=cfg.log_level, name=logger_name)
187 | 
188 |     # init the meta dict to record some important information such as
189 |     # environment info and seed, which will be logged
190 |     meta = dict()
191 |     # log env info
192 |     env_info_dict = collect_env()
193 |     env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
194 |     dash_line = '-' * 60 + '\n'
195 |     logger.info('Environment info:\n' + dash_line + env_info + '\n' +
196 |                 dash_line)
197 |     meta['env_info'] = env_info
198 |     meta['config'] = cfg.pretty_text
199 |     cfg.device = get_device()
200 |     # log some basic info
201 |     logger.info(f'Distributed training: {distributed}')
202 |     logger.info(f'Config:\n{cfg.pretty_text}')
203 | 
204 |     # set random seeds
205 |     if args.seed is not None:
206 |         logger.info(f'Set random seed to {args.seed}, '
207 |                     f'deterministic: {args.deterministic}')
208 |         set_random_seed(args.seed, deterministic=args.deterministic)
209 |     cfg.seed = args.seed
210 |     meta['seed'] = args.seed
211 |     meta['exp_name'] = osp.basename(args.config)
212 | 
213 |     model = build_model(
214 |         cfg.model,
215 |         train_cfg=cfg.get('train_cfg'),
216 |         test_cfg=cfg.get('test_cfg'))
217 |     model.init_weights()
218 | 
219 |     logger.info(f'Model:\n{model}')
220 |     datasets = [build_dataset(cfg.data.train)]
221 |     if len(cfg.workflow) == 2:
222 |         val_dataset = copy.deepcopy(cfg.data.val)
223 |         # in case we use a dataset wrapper
224 |         if 'dataset' in cfg.data.train:
225 |             val_dataset.pipeline = cfg.data.train.dataset.pipeline
226 |         else:
227 |             val_dataset.pipeline = cfg.data.train.pipeline
228 |         # set test_mode=False here in deep copied config
229 |         # which do not affect AP/AR calculation later
230 |         # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow  # noqa
231 |         val_dataset.test_mode = False
232 |         datasets.append(build_dataset(val_dataset))
233 |     if cfg.checkpoint_config is not None:
234 |         # save mmdet version, config file content and class names in
235 |         # checkpoints as meta data
236 |         cfg.checkpoint_config.meta = dict(
237 |             mmdet_version=mmdet_version,
238 |             mmseg_version=mmseg_version,
239 |             mmdet3d_version=mmdet3d_version,
240 |             config=cfg.pretty_text,
241 |             CLASSES=datasets[0].CLASSES,
242 |             PALETTE=datasets[0].PALETTE  # for segmentors
243 |             if hasattr(datasets[0], 'PALETTE') else None)
244 |     # add an attribute for visualization convenience
245 |     model.CLASSES = datasets[0].CLASSES
246 |     train_model(
247 |         model,
248 |         datasets,
249 |         cfg,
250 |         distributed=distributed,
251 |         validate=(not args.no_validate),
252 |         timestamp=timestamp,
253 |         meta=meta)
254 | 
255 | 
256 | if __name__ == '__main__':
257 |     main()
258 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/detectors/vedet.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------
  2 | # Copyright (c) 2022 Toyota Research Institute, Dian Chen. All Rights Reserved.
  3 | # ------------------------------------------------------------------------
  4 | # Copyright (c) 2022 megvii-model. All Rights Reserved.
  5 | # ------------------------------------------------------------------------
  6 | # Modified from DETR3D (https://github.com/WangYueFt/detr3d)
  7 | # Copyright (c) 2021 Wang, Yue
  8 | # ------------------------------------------------------------------------
  9 | # Modified from mmdetection3d (https://github.com/open-mmlab/mmdetection3d)
 10 | # Copyright (c) OpenMMLab. All rights reserved.
 11 | # ------------------------------------------------------------------------
 12 | 
 13 | import torch
 14 | import torch.nn.functional as F
 15 | from einops import rearrange
 16 | from mmcv.runner import force_fp32, auto_fp16
 17 | from mmdet.models import DETECTORS
 18 | from mmdet3d.core import bbox3d2result
 19 | from mmdet3d.models.detectors.mvx_two_stage import MVXTwoStageDetector
 20 | from projects.mmdet3d_plugin.models.utils.grid_mask import GridMask
 21 | 
 22 | 
 23 | @DETECTORS.register_module()
 24 | class VEDet(MVXTwoStageDetector):
 25 |     """Petr3D."""
 26 | 
 27 |     def __init__(self,
 28 |                  use_grid_mask=False,
 29 |                  use_gt_scale=False,
 30 |                  pts_voxel_layer=None,
 31 |                  pts_voxel_encoder=None,
 32 |                  pts_middle_encoder=None,
 33 |                  pts_fusion_layer=None,
 34 |                  img_backbone=None,
 35 |                  pts_backbone=None,
 36 |                  img_neck=None,
 37 |                  pts_neck=None,
 38 |                  pts_bbox_head=None,
 39 |                  img_roi_head=None,
 40 |                  img_rpn_head=None,
 41 |                  train_cfg=None,
 42 |                  test_cfg=None,
 43 |                  pretrained=None,
 44 |                  gt_depth_sup=True):
 45 |         super(VEDet, self).__init__(pts_voxel_layer, pts_voxel_encoder, pts_middle_encoder, pts_fusion_layer,
 46 |                                     img_backbone, pts_backbone, img_neck, pts_neck, pts_bbox_head, img_roi_head,
 47 |                                     img_rpn_head, train_cfg, test_cfg, pretrained)
 48 |         self.grid_mask = GridMask(True, True, rotate=1, offset=False, ratio=0.5, mode=1, prob=0.7)
 49 |         self.use_grid_mask = use_grid_mask
 50 |         self.use_gt_scale = use_gt_scale
 51 |         self.gt_depth_sup = gt_depth_sup
 52 | 
 53 |     def extract_img_feat(self, img, img_metas):
 54 |         """Extract features of images."""
 55 |         # print(img[0].size())
 56 |         if isinstance(img, list):
 57 |             img = torch.stack(img, dim=0)
 58 | 
 59 |         B = img.size(0)
 60 |         if img is not None:
 61 |             input_shape = img.shape[-2:]
 62 |             # update real input shape of each single img
 63 |             for img_meta in img_metas:
 64 |                 img_meta.update(input_shape=input_shape)
 65 |             if img.dim() == 5:
 66 |                 if img.size(0) == 1 and img.size(1) != 1:
 67 |                     img.squeeze_()
 68 |                 else:
 69 |                     B, N, C, H, W = img.size()
 70 |                     img = img.view(B * N, C, H, W)
 71 |             if self.use_grid_mask:
 72 |                 img = self.grid_mask(img)
 73 |             img_feats = self.img_backbone(img)
 74 |             if isinstance(img_feats, dict):
 75 |                 img_feats = list(img_feats.values())
 76 |         else:
 77 |             return None
 78 |         if self.with_img_neck:
 79 |             img_feats = self.img_neck(img_feats)
 80 |         img_feats_reshaped = []
 81 |         for img_feat in img_feats:
 82 |             BN, C, H, W = img_feat.size()
 83 |             img_feats_reshaped.append(img_feat.view(B, int(BN / B), C, H, W))
 84 |         return img_feats_reshaped
 85 | 
 86 |     @auto_fp16(apply_to=('img'), out_fp32=True)
 87 |     def extract_feat(self, img, img_metas):
 88 |         """Extract features from images and points."""
 89 |         img_feats = self.extract_img_feat(img, img_metas)
 90 |         return img_feats
 91 | 
 92 |     def forward_pts_train(self, pts_feats, gt_bboxes_3d, gt_labels_3d, gt_maps, img_metas):
 93 |         """Forward function for point cloud branch.
 94 |         Args:
 95 |             pts_feats (list[torch.Tensor]): Features of point cloud branch
 96 |             gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
 97 |                 boxes for each sample.
 98 |             gt_labels_3d (list[torch.Tensor]): Ground truth labels for
 99 |                 boxes of each sampole
100 |             img_metas (list[dict]): Meta information of samples.
101 |             gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth
102 |                 boxes to be ignored. Defaults to None.
103 |         Returns:
104 |             dict: Losses of each branch.
105 |         """
106 |         outs = self.pts_bbox_head(pts_feats, img_metas)
107 |         loss_inputs = [gt_bboxes_3d, gt_labels_3d, gt_maps, outs]
108 |         losses = self.pts_bbox_head.loss(*loss_inputs)
109 | 
110 |         return losses
111 | 
112 |     @force_fp32(apply_to=('img', 'points'))
113 |     def forward(self, return_loss=True, **kwargs):
114 |         """Calls either forward_train or forward_test depending on whether
115 |         return_loss=True.
116 |         Note this setting will change the expected inputs. When
117 |         `return_loss=True`, img and img_metas are single-nested (i.e.
118 |         torch.Tensor and list[dict]), and when `resturn_loss=False`, img and
119 |         img_metas should be double nested (i.e.  list[torch.Tensor],
120 |         list[list[dict]]), with the outer list indicating test time
121 |         augmentations.
122 |         """
123 |         if return_loss:
124 |             return self.forward_train(**kwargs)
125 |         else:
126 |             return self.forward_test(**kwargs)
127 | 
128 |     def forward_train(self, img_metas=None, gt_bboxes_3d=None, gt_labels_3d=None, maps=None, img=None):
129 |         """Forward training function.
130 |         Args:
131 |             points (list[torch.Tensor], optional): Points of each sample.
132 |                 Defaults to None.
133 |             img_metas (list[dict], optional): Meta information of each sample.
134 |                 Defaults to None.
135 |             gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional):
136 |                 Ground truth 3D boxes. Defaults to None.
137 |             gt_labels_3d (list[torch.Tensor], optional): Ground truth labels
138 |                 of 3D boxes. Defaults to None.
139 |             gt_labels (list[torch.Tensor], optional): Ground truth labels
140 |                 of 2D boxes in images. Defaults to None.
141 |             gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in
142 |                 images. Defaults to None.
143 |             img (torch.Tensor optional): Images of each sample with shape
144 |                 (N, C, H, W). Defaults to None.
145 |             proposals ([list[torch.Tensor], optional): Predicted proposals
146 |                 used for training Fast RCNN. Defaults to None.
147 |             gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth
148 |                 2D boxes in images to be ignored. Defaults to None.
149 |         Returns:
150 |             dict: Losses of different branches.
151 |         """
152 | 
153 |         img_feats = self.extract_feat(img=img, img_metas=img_metas)
154 | 
155 |         losses = dict()
156 |         losses_pts = self.forward_pts_train(img_feats, gt_bboxes_3d, gt_labels_3d, maps, img_metas)
157 |         losses.update(losses_pts)
158 |         return losses
159 | 
160 |     def forward_test(self, img_metas, img=None, gt_map=None, **kwargs):
161 |         for var, name in [(img_metas, 'img_metas')]:
162 |             if not isinstance(var, list):
163 |                 raise TypeError('{} must be a list, but got {}'.format(name, type(var)))
164 |         img = [img] if img is None else img
165 |         return self.simple_test(img_metas[0], img[0], gt_map[0] if gt_map is not None else None, **kwargs)
166 | 
167 |     def simple_test_pts(self, x, img_metas, gt_map=None, rescale=False):
168 |         """Test function of point cloud branch."""
169 |         outs = self.pts_bbox_head(x, img_metas)
170 |         results = dict()
171 |         if outs.get('all_cls_scores', None) is not None:
172 |             bbox_list = self.pts_bbox_head.get_bboxes(outs, img_metas, rescale=rescale)
173 |             bbox_results = [bbox3d2result(bboxes, scores, labels) for bboxes, scores, labels in bbox_list]
174 |             results['bbox_results'] = bbox_results
175 | 
176 |         if gt_map is not None:
177 |             seg_results = self.compute_seg_iou(outs)
178 |             results['seg_results'] = seg_results
179 | 
180 |         return results
181 | 
182 |     def simple_test(self, img_metas, img=None, gt_map=None, rescale=False):
183 |         """Test function without augmentaiton."""
184 |         img_feats = self.extract_feat(img=img, img_metas=img_metas)
185 | 
186 |         results_list = [dict() for i in range(len(img_metas))]
187 |         results = self.simple_test_pts(img_feats, img_metas, gt_map, rescale=rescale)
188 |         if 'bbox_results' in results:
189 |             for result_dict, pts_bbox in zip(results_list, results['bbox_results']):
190 |                 result_dict['pts_bbox'] = pts_bbox
191 | 
192 |         return results_list
193 | 
194 |     @torch.no_grad()
195 |     def compute_seg_iou(self, outs):
196 |         lane_preds = outs['all_seg_preds'][-1].squeeze(0)  #[B,N,H,W]
197 | 
198 |         pred_maps = lane_preds.view(256, 3, 16, 16)
199 | 
200 |         f_lane = rearrange(pred_maps, '(h w) c h1 w2 -> c (h h1) (w w2)', h=16, w=16)
201 |         f_lane = f_lane.sigmoid()
202 |         f_lane[f_lane >= 0.5] = 1
203 |         f_lane[f_lane < 0.5] = 0
204 | 
205 |         f_lane = f_lane.view(3, -1)
206 |         gt_map = gt_map.view(3, -1)
207 | 
208 |         ret_iou = IOU(f_lane, gt_map).cpu()
209 |         return ret_iou
210 | 
211 |     def aug_test_pts(self, feats, img_metas, rescale=False):
212 |         feats_list = []
213 |         for j in range(len(feats[0])):
214 |             feats_list_level = []
215 |             for i in range(len(feats)):
216 |                 feats_list_level.append(feats[i][j])
217 |             feats_list.append(torch.stack(feats_list_level, -1).mean(-1))
218 |         outs = self.pts_bbox_head(feats_list, img_metas)
219 |         bbox_list = self.pts_bbox_head.get_bboxes(outs, img_metas, rescale=rescale)
220 |         bbox_results = [bbox3d2result(bboxes, scores, labels) for bboxes, scores, labels in bbox_list]
221 |         return bbox_results
222 | 
223 |     def aug_test(self, img_metas, imgs=None, rescale=False):
224 |         """Test function with augmentaiton."""
225 |         img_feats = self.extract_feats(img_metas, imgs)
226 |         img_metas = img_metas[0]
227 |         bbox_list = [dict() for i in range(len(img_metas))]
228 |         bbox_pts = self.aug_test_pts(img_feats, img_metas, rescale)
229 |         for result_dict, pts_bbox in zip(bbox_list, bbox_pts):
230 |             result_dict['pts_bbox'] = pts_bbox
231 |         return bbox_list
232 | 
233 | 
234 | def IOU(intputs, targets):
235 |     numerator = 2 * (intputs * targets).sum(dim=1)
236 |     denominator = intputs.sum(dim=1) + targets.sum(dim=1)
237 |     loss = (numerator + 0.01) / (denominator + 0.01)
238 |     return loss
239 | 


--------------------------------------------------------------------------------
/tools/data_converter/lyft_converter.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) OpenMMLab. All rights reserved.
  2 | import mmcv
  3 | import numpy as np
  4 | import os
  5 | from logging import warning
  6 | from lyft_dataset_sdk.lyftdataset import LyftDataset as Lyft
  7 | from os import path as osp
  8 | from pyquaternion import Quaternion
  9 | 
 10 | from mmdet3d.datasets import LyftDataset
 11 | from .nuscenes_converter import (get_2d_boxes, get_available_scenes,
 12 |                                  obtain_sensor2top)
 13 | 
 14 | lyft_categories = ('car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle',
 15 |                    'motorcycle', 'bicycle', 'pedestrian', 'animal')
 16 | 
 17 | 
 18 | def create_lyft_infos(root_path,
 19 |                       info_prefix,
 20 |                       version='v1.01-train',
 21 |                       max_sweeps=10):
 22 |     """Create info file of lyft dataset.
 23 | 
 24 |     Given the raw data, generate its related info file in pkl format.
 25 | 
 26 |     Args:
 27 |         root_path (str): Path of the data root.
 28 |         info_prefix (str): Prefix of the info file to be generated.
 29 |         version (str): Version of the data.
 30 |             Default: 'v1.01-train'
 31 |         max_sweeps (int): Max number of sweeps.
 32 |             Default: 10
 33 |     """
 34 |     lyft = Lyft(
 35 |         data_path=osp.join(root_path, version),
 36 |         json_path=osp.join(root_path, version, version),
 37 |         verbose=True)
 38 |     available_vers = ['v1.01-train', 'v1.01-test']
 39 |     assert version in available_vers
 40 |     if version == 'v1.01-train':
 41 |         train_scenes = mmcv.list_from_file('data/lyft/train.txt')
 42 |         val_scenes = mmcv.list_from_file('data/lyft/val.txt')
 43 |     elif version == 'v1.01-test':
 44 |         train_scenes = mmcv.list_from_file('data/lyft/test.txt')
 45 |         val_scenes = []
 46 |     else:
 47 |         raise ValueError('unknown')
 48 | 
 49 |     # filter existing scenes.
 50 |     available_scenes = get_available_scenes(lyft)
 51 |     available_scene_names = [s['name'] for s in available_scenes]
 52 |     train_scenes = list(
 53 |         filter(lambda x: x in available_scene_names, train_scenes))
 54 |     val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
 55 |     train_scenes = set([
 56 |         available_scenes[available_scene_names.index(s)]['token']
 57 |         for s in train_scenes
 58 |     ])
 59 |     val_scenes = set([
 60 |         available_scenes[available_scene_names.index(s)]['token']
 61 |         for s in val_scenes
 62 |     ])
 63 | 
 64 |     test = 'test' in version
 65 |     if test:
 66 |         print(f'test scene: {len(train_scenes)}')
 67 |     else:
 68 |         print(f'train scene: {len(train_scenes)}, \
 69 |                 val scene: {len(val_scenes)}')
 70 |     train_lyft_infos, val_lyft_infos = _fill_trainval_infos(
 71 |         lyft, train_scenes, val_scenes, test, max_sweeps=max_sweeps)
 72 | 
 73 |     metadata = dict(version=version)
 74 |     if test:
 75 |         print(f'test sample: {len(train_lyft_infos)}')
 76 |         data = dict(infos=train_lyft_infos, metadata=metadata)
 77 |         info_name = f'{info_prefix}_infos_test'
 78 |         info_path = osp.join(root_path, f'{info_name}.pkl')
 79 |         mmcv.dump(data, info_path)
 80 |     else:
 81 |         print(f'train sample: {len(train_lyft_infos)}, \
 82 |                 val sample: {len(val_lyft_infos)}')
 83 |         data = dict(infos=train_lyft_infos, metadata=metadata)
 84 |         train_info_name = f'{info_prefix}_infos_train'
 85 |         info_path = osp.join(root_path, f'{train_info_name}.pkl')
 86 |         mmcv.dump(data, info_path)
 87 |         data['infos'] = val_lyft_infos
 88 |         val_info_name = f'{info_prefix}_infos_val'
 89 |         info_val_path = osp.join(root_path, f'{val_info_name}.pkl')
 90 |         mmcv.dump(data, info_val_path)
 91 | 
 92 | 
 93 | def _fill_trainval_infos(lyft,
 94 |                          train_scenes,
 95 |                          val_scenes,
 96 |                          test=False,
 97 |                          max_sweeps=10):
 98 |     """Generate the train/val infos from the raw data.
 99 | 
100 |     Args:
101 |         lyft (:obj:`LyftDataset`): Dataset class in the Lyft dataset.
102 |         train_scenes (list[str]): Basic information of training scenes.
103 |         val_scenes (list[str]): Basic information of validation scenes.
104 |         test (bool): Whether use the test mode. In the test mode, no
105 |             annotations can be accessed. Default: False.
106 |         max_sweeps (int): Max number of sweeps. Default: 10.
107 | 
108 |     Returns:
109 |         tuple[list[dict]]: Information of training set and
110 |             validation set that will be saved to the info file.
111 |     """
112 |     train_lyft_infos = []
113 |     val_lyft_infos = []
114 | 
115 |     for sample in mmcv.track_iter_progress(lyft.sample):
116 |         lidar_token = sample['data']['LIDAR_TOP']
117 |         sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP'])
118 |         cs_record = lyft.get('calibrated_sensor',
119 |                              sd_rec['calibrated_sensor_token'])
120 |         pose_record = lyft.get('ego_pose', sd_rec['ego_pose_token'])
121 |         abs_lidar_path, boxes, _ = lyft.get_sample_data(lidar_token)
122 |         # nuScenes devkit returns more convenient relative paths while
123 |         # lyft devkit returns absolute paths
124 |         abs_lidar_path = str(abs_lidar_path)  # absolute path
125 |         lidar_path = abs_lidar_path.split(f'{os.getcwd()}/')[-1]
126 |         # relative path
127 | 
128 |         mmcv.check_file_exist(lidar_path)
129 | 
130 |         info = {
131 |             'lidar_path': lidar_path,
132 |             'token': sample['token'],
133 |             'sweeps': [],
134 |             'cams': dict(),
135 |             'lidar2ego_translation': cs_record['translation'],
136 |             'lidar2ego_rotation': cs_record['rotation'],
137 |             'ego2global_translation': pose_record['translation'],
138 |             'ego2global_rotation': pose_record['rotation'],
139 |             'timestamp': sample['timestamp'],
140 |         }
141 | 
142 |         l2e_r = info['lidar2ego_rotation']
143 |         l2e_t = info['lidar2ego_translation']
144 |         e2g_r = info['ego2global_rotation']
145 |         e2g_t = info['ego2global_translation']
146 |         l2e_r_mat = Quaternion(l2e_r).rotation_matrix
147 |         e2g_r_mat = Quaternion(e2g_r).rotation_matrix
148 | 
149 |         # obtain 6 image's information per frame
150 |         camera_types = [
151 |             'CAM_FRONT',
152 |             'CAM_FRONT_RIGHT',
153 |             'CAM_FRONT_LEFT',
154 |             'CAM_BACK',
155 |             'CAM_BACK_LEFT',
156 |             'CAM_BACK_RIGHT',
157 |         ]
158 |         for cam in camera_types:
159 |             cam_token = sample['data'][cam]
160 |             cam_path, _, cam_intrinsic = lyft.get_sample_data(cam_token)
161 |             cam_info = obtain_sensor2top(lyft, cam_token, l2e_t, l2e_r_mat,
162 |                                          e2g_t, e2g_r_mat, cam)
163 |             cam_info.update(cam_intrinsic=cam_intrinsic)
164 |             info['cams'].update({cam: cam_info})
165 | 
166 |         # obtain sweeps for a single key-frame
167 |         sd_rec = lyft.get('sample_data', sample['data']['LIDAR_TOP'])
168 |         sweeps = []
169 |         while len(sweeps) < max_sweeps:
170 |             if not sd_rec['prev'] == '':
171 |                 sweep = obtain_sensor2top(lyft, sd_rec['prev'], l2e_t,
172 |                                           l2e_r_mat, e2g_t, e2g_r_mat, 'lidar')
173 |                 sweeps.append(sweep)
174 |                 sd_rec = lyft.get('sample_data', sd_rec['prev'])
175 |             else:
176 |                 break
177 |         info['sweeps'] = sweeps
178 |         # obtain annotation
179 |         if not test:
180 |             annotations = [
181 |                 lyft.get('sample_annotation', token)
182 |                 for token in sample['anns']
183 |             ]
184 |             locs = np.array([b.center for b in boxes]).reshape(-1, 3)
185 |             dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)
186 |             rots = np.array([b.orientation.yaw_pitch_roll[0]
187 |                              for b in boxes]).reshape(-1, 1)
188 | 
189 |             names = [b.name for b in boxes]
190 |             for i in range(len(names)):
191 |                 if names[i] in LyftDataset.NameMapping:
192 |                     names[i] = LyftDataset.NameMapping[names[i]]
193 |             names = np.array(names)
194 | 
195 |             # we need to convert rot to SECOND format.
196 |             gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1)
197 |             assert len(gt_boxes) == len(
198 |                 annotations), f'{len(gt_boxes)}, {len(annotations)}'
199 |             info['gt_boxes'] = gt_boxes
200 |             info['gt_names'] = names
201 |             info['num_lidar_pts'] = np.array(
202 |                 [a['num_lidar_pts'] for a in annotations])
203 |             info['num_radar_pts'] = np.array(
204 |                 [a['num_radar_pts'] for a in annotations])
205 | 
206 |         if sample['scene_token'] in train_scenes:
207 |             train_lyft_infos.append(info)
208 |         else:
209 |             val_lyft_infos.append(info)
210 | 
211 |     return train_lyft_infos, val_lyft_infos
212 | 
213 | 
214 | def export_2d_annotation(root_path, info_path, version):
215 |     """Export 2d annotation from the info file and raw data.
216 | 
217 |     Args:
218 |         root_path (str): Root path of the raw data.
219 |         info_path (str): Path of the info file.
220 |         version (str): Dataset version.
221 |     """
222 |     warning.warn('DeprecationWarning: 2D annotations are not used on the '
223 |                  'Lyft dataset. The function export_2d_annotation will be '
224 |                  'deprecated.')
225 |     # get bbox annotations for camera
226 |     camera_types = [
227 |         'CAM_FRONT',
228 |         'CAM_FRONT_RIGHT',
229 |         'CAM_FRONT_LEFT',
230 |         'CAM_BACK',
231 |         'CAM_BACK_LEFT',
232 |         'CAM_BACK_RIGHT',
233 |     ]
234 |     lyft_infos = mmcv.load(info_path)['infos']
235 |     lyft = Lyft(
236 |         data_path=osp.join(root_path, version),
237 |         json_path=osp.join(root_path, version, version),
238 |         verbose=True)
239 |     # info_2d_list = []
240 |     cat2Ids = [
241 |         dict(id=lyft_categories.index(cat_name), name=cat_name)
242 |         for cat_name in lyft_categories
243 |     ]
244 |     coco_ann_id = 0
245 |     coco_2d_dict = dict(annotations=[], images=[], categories=cat2Ids)
246 |     for info in mmcv.track_iter_progress(lyft_infos):
247 |         for cam in camera_types:
248 |             cam_info = info['cams'][cam]
249 |             coco_infos = get_2d_boxes(
250 |                 lyft,
251 |                 cam_info['sample_data_token'],
252 |                 visibilities=['', '1', '2', '3', '4'])
253 |             (height, width, _) = mmcv.imread(cam_info['data_path']).shape
254 |             coco_2d_dict['images'].append(
255 |                 dict(
256 |                     file_name=cam_info['data_path'],
257 |                     id=cam_info['sample_data_token'],
258 |                     width=width,
259 |                     height=height))
260 |             for coco_info in coco_infos:
261 |                 if coco_info is None:
262 |                     continue
263 |                 # add an empty key for coco format
264 |                 coco_info['segmentation'] = []
265 |                 coco_info['id'] = coco_ann_id
266 |                 coco_2d_dict['annotations'].append(coco_info)
267 |                 coco_ann_id += 1
268 |     mmcv.dump(coco_2d_dict, f'{info_path[:-4]}.coco.json')
269 | 


--------------------------------------------------------------------------------