├── tools ├── data_converter │ ├── __init__.py │ ├── argoverse_converter.py │ └── nuscenes_converter.py ├── dist_train.sh ├── dist_test.sh ├── create_data.sh ├── slurm_test.sh ├── slurm_train.sh ├── misc │ ├── print_config.py │ ├── visualize_results.py │ ├── fuse_conv_bn.py │ └── browse_dataset.py ├── model_converters │ ├── publish_model.py │ ├── regnet2mmdet.py │ └── convert_votenet_checkpoints.py ├── analysis_tools │ ├── get_flops.py │ ├── benchmark.py │ └── analyze_logs.py ├── builder.py ├── visualization │ ├── visualize_dataset.py │ └── renderer.py ├── mmdet_train.py ├── mmdet_test.py └── train.py ├── plugin ├── datasets │ ├── evaluation │ │ ├── __init__.py │ │ └── precision_recall │ │ │ ├── __init__.py │ │ │ └── tgfg.py │ ├── nuscences_utils │ │ └── __init__.py │ ├── __init__.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── vectorize.py │ │ ├── rasterize.py │ │ ├── formating.py │ │ ├── transform.py │ │ └── loading.py │ ├── base_dataset.py │ └── nusc_dataset.py ├── models │ ├── augmentation │ │ ├── __init__.py │ │ └── sythesis_det.py │ ├── heads │ │ ├── detgen_utils │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ ├── __init__.py │ │ ├── base_map_head.py │ │ └── map_element_detector.py │ ├── backbones │ │ ├── __init__.py │ │ ├── pointpillar_utils │ │ │ └── voxel.py │ │ └── point_pillar.py │ ├── mapers │ │ ├── __init__.py │ │ ├── base_mapper.py │ │ └── vectormapnet.py │ ├── losses │ │ ├── __init__.py │ │ └── detr_loss.py │ ├── transformer_utils │ │ ├── __init__.py │ │ └── base_transformer.py │ ├── __init__.py │ └── assigner │ │ ├── __init__.py │ │ └── assigner.py └── __init__.py ├── requirements.txt ├── configs └── _base_ │ └── default_runtime.py ├── env.md ├── .gitignore └── README.md /tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/models/augmentation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/datasets/nuscences_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/models/heads/detgen_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/datasets/evaluation/precision_recall/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plugin/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import * 2 | from .datasets import * -------------------------------------------------------------------------------- /plugin/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .ipm_backbone import IPMEncoder -------------------------------------------------------------------------------- /plugin/models/mapers/__init__.py: -------------------------------------------------------------------------------- 1 | from .vectormapnet import VectorMapNet 2 | -------------------------------------------------------------------------------- /plugin/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .detr_loss import LinesLoss, MasksLoss, LenLoss 2 | -------------------------------------------------------------------------------- /plugin/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipelines import * 2 | from .nusc_dataset import NuscDataset 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | efficientnet_pytorch 2 | # torch_scatter==2.0.6 3 | similaritymeasures==0.5.0 4 | shapely==1.8 5 | av2 6 | ipython 7 | -------------------------------------------------------------------------------- /plugin/models/heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .dghead import DGHead 2 | from .map_element_detector import MapElementDetector 3 | from .polylinegenerator import PolylineGenerator -------------------------------------------------------------------------------- /plugin/models/transformer_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .deformable_transformer import DeformableDetrTransformer_, DeformableDetrTransformerDecoder_ 2 | from .base_transformer import PlaceHolderEncoder -------------------------------------------------------------------------------- /plugin/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * 2 | from .heads import * 3 | from .losses import * 4 | from .mapers import * 5 | from .transformer_utils import * 6 | from .assigner import * 7 | -------------------------------------------------------------------------------- /plugin/models/assigner/__init__.py: -------------------------------------------------------------------------------- 1 | from .assigner import HungarianLinesAssigner 2 | from .match_cost import MapQueriesCost, BBoxLogitsCost, DynamicLinesCost, IoUCostC, BBoxCostC, LinesCost, LinesFixNumChamferCost, ClsSigmoidCost 3 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=50, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook') 11 | ]) 12 | # yapf:enable 13 | dist_params = dict(backend='nccl') 14 | log_level = 'INFO' 15 | work_dir = None 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | -------------------------------------------------------------------------------- /plugin/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .loading import LoadMultiViewImagesFromFiles 2 | from .formating import FormatBundleMap 3 | from .transform import ResizeMultiViewImages, PadMultiViewImages, Normalize3D 4 | from .rasterize import RasterizeMap 5 | from .vectorize import VectorizeMap 6 | from .poly_bbox import PolygonizeLocalMapBbox 7 | from .map_transform import VectorizeLocalMap 8 | # for argoverse 9 | 10 | __all__ = [ 11 | 'LoadMultiViewImagesFromFiles', 12 | 'FormatBundleMap', 'Normalize3D', 'ResizeMultiViewImages', 'PadMultiViewImages', 13 | 'RasterizeMap', 'VectorizeMap', 'PolygonizeLocalMapBbox' 14 | ] -------------------------------------------------------------------------------- /tools/create_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | export PYTHONPATH=`pwd`:$PYTHONPATH 5 | 6 | PARTITION=$1 7 | JOB_NAME=$2 8 | CONFIG=$3 9 | WORK_DIR=$4 10 | GPUS=${GPUS:-1} 11 | GPUS_PER_NODE=${GPUS_PER_NODE:-1} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | JOB_NAME=create_data 14 | 15 | srun -p ${PARTITION} \ 16 | --job-name=${JOB_NAME} \ 17 | --gres=gpu:${GPUS_PER_NODE} \ 18 | --ntasks=${GPUS} \ 19 | --ntasks-per-node=${GPUS_PER_NODE} \ 20 | --kill-on-bad-exit=1 \ 21 | ${SRUN_ARGS} \ 22 | python -u tools/create_data.py kitti \ 23 | --root-path ./data/kitti \ 24 | --out-dir ./data/kitti \ 25 | --extra-tag kitti 26 | -------------------------------------------------------------------------------- /tools/slurm_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | CHECKPOINT=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | PY_ARGS=${@:5} 13 | SRUN_ARGS=${SRUN_ARGS:-""} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /tools/slurm_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | PARTITION=$1 6 | JOB_NAME=$2 7 | CONFIG=$3 8 | WORK_DIR=$4 9 | GPUS=${GPUS:-8} 10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8} 11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5} 12 | SRUN_ARGS=${SRUN_ARGS:-""} 13 | PY_ARGS=${@:5} 14 | 15 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 16 | srun -p ${PARTITION} \ 17 | --job-name=${JOB_NAME} \ 18 | --gres=gpu:${GPUS_PER_NODE} \ 19 | --ntasks=${GPUS} \ 20 | --ntasks-per-node=${GPUS_PER_NODE} \ 21 | --cpus-per-task=${CPUS_PER_TASK} \ 22 | --kill-on-bad-exit=1 \ 23 | ${SRUN_ARGS} \ 24 | python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS} 25 | -------------------------------------------------------------------------------- /tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from mmcv import Config, DictAction 3 | 4 | 5 | def parse_args(): 6 | parser = argparse.ArgumentParser(description='Print the whole config') 7 | parser.add_argument('config', help='config file path') 8 | parser.add_argument( 9 | '--options', nargs='+', action=DictAction, help='arguments in dict') 10 | args = parser.parse_args() 11 | 12 | return args 13 | 14 | 15 | def main(): 16 | args = parse_args() 17 | 18 | cfg = Config.fromfile(args.config) 19 | if args.options is not None: 20 | cfg.merge_from_dict(args.options) 21 | print(f'Config:\n{cfg.pretty_text}') 22 | 23 | 24 | if __name__ == '__main__': 25 | main() 26 | -------------------------------------------------------------------------------- /plugin/models/transformer_utils/base_transformer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from mmcv.cnn import xavier_init, constant_init 7 | from mmcv.cnn.bricks.registry import (ATTENTION, 8 | TRANSFORMER_LAYER_SEQUENCE) 9 | from mmcv.cnn.bricks.transformer import (MultiScaleDeformableAttention, 10 | TransformerLayerSequence, 11 | build_transformer_layer_sequence) 12 | from mmcv.runner.base_module import BaseModule 13 | 14 | from mmdet.models.utils.builder import TRANSFORMER 15 | 16 | @TRANSFORMER_LAYER_SEQUENCE.register_module() 17 | class PlaceHolderEncoder(nn.Module): 18 | 19 | def __init__(self, *args, embed_dims=None, **kwargs): 20 | super(PlaceHolderEncoder, self).__init__() 21 | self.embed_dims = embed_dims 22 | 23 | def forward(self, *args, query=None, **kwargs): 24 | 25 | return query 26 | 27 | -------------------------------------------------------------------------------- /tools/model_converters/publish_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | import torch 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser( 8 | description='Process a checkpoint to be published') 9 | parser.add_argument('in_file', help='input checkpoint filename') 10 | parser.add_argument('out_file', help='output checkpoint filename') 11 | args = parser.parse_args() 12 | return args 13 | 14 | 15 | def process_checkpoint(in_file, out_file): 16 | checkpoint = torch.load(in_file, map_location='cpu') 17 | # remove optimizer for smaller file size 18 | if 'optimizer' in checkpoint: 19 | del checkpoint['optimizer'] 20 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 21 | # add the code here. 22 | torch.save(checkpoint, out_file) 23 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 24 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 25 | subprocess.Popen(['mv', out_file, final_file]) 26 | 27 | 28 | def main(): 29 | args = parse_args() 30 | process_checkpoint(args.in_file, args.out_file) 31 | 32 | 33 | if __name__ == '__main__': 34 | main() 35 | -------------------------------------------------------------------------------- /plugin/models/heads/base_map_head.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch.nn as nn 4 | from mmcv.runner import auto_fp16 5 | from mmcv.utils import print_log 6 | 7 | from mmdet.utils import get_root_logger 8 | 9 | 10 | class BaseMapHead(nn.Module, metaclass=ABCMeta): 11 | """Base class for mappers.""" 12 | 13 | def __init__(self): 14 | super(BaseMapHead, self).__init__() 15 | self.fp16_enabled = False 16 | 17 | def init_weights(self, pretrained=None): 18 | """Initialize the weights in detector. 19 | 20 | Args: 21 | pretrained (str, optional): Path to pre-trained weights. 22 | Defaults to None. 23 | """ 24 | if pretrained is not None: 25 | logger = get_root_logger() 26 | print_log(f'load model from: {pretrained}', logger=logger) 27 | 28 | @auto_fp16(apply_to=('img', )) 29 | def forward(self, *args, **kwargs): 30 | pass 31 | 32 | @abstractmethod 33 | def loss(self, pred, gt): 34 | ''' 35 | Compute loss 36 | Output: 37 | dict( 38 | loss: torch.Tensor 39 | log_vars: dict( 40 | str: float, 41 | ) 42 | num_samples: int 43 | ) 44 | ''' 45 | return 46 | 47 | @abstractmethod 48 | def post_process(self, pred): 49 | ''' 50 | convert model predictions to vectorized outputs 51 | the output format should be consistent with the evaluation function 52 | ''' 53 | return 54 | -------------------------------------------------------------------------------- /tools/misc/visualize_results.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import mmcv 3 | from mmcv import Config 4 | 5 | from mmdet3d.datasets import build_dataset 6 | 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser( 10 | description='MMDet3D visualize the results') 11 | parser.add_argument('config', help='test config file path') 12 | parser.add_argument('--result', help='results file in pickle format') 13 | parser.add_argument( 14 | '--show-dir', help='directory where visualize results will be saved') 15 | args = parser.parse_args() 16 | 17 | return args 18 | 19 | 20 | def main(): 21 | args = parse_args() 22 | 23 | if args.result is not None and \ 24 | not args.result.endswith(('.pkl', '.pickle')): 25 | raise ValueError('The results file must be a pkl file.') 26 | 27 | cfg = Config.fromfile(args.config) 28 | cfg.data.test.test_mode = True 29 | 30 | # build the dataset 31 | dataset = build_dataset(cfg.data.test) 32 | results = mmcv.load(args.result) 33 | 34 | if getattr(dataset, 'show', None) is not None: 35 | # data loading pipeline for showing 36 | eval_pipeline = cfg.get('eval_pipeline', {}) 37 | if eval_pipeline: 38 | dataset.show(results, args.show_dir, pipeline=eval_pipeline) 39 | else: 40 | dataset.show(results, args.show_dir) # use default pipeline 41 | else: 42 | raise NotImplementedError( 43 | 'Show is not implemented for dataset {}!'.format( 44 | type(dataset).__name__)) 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /env.md: -------------------------------------------------------------------------------- 1 | # Set up Environment 2 | 3 | ### Note 4 | 5 | If you have trouble in `pip install`, try add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to the command. 6 | 7 | ### Create conda environment 8 | 9 | ``` 10 | conda create --name hdmap-opensource python==3.8 11 | conda activate hdmap-opensource 12 | ``` 13 | 14 | ### Install PyTorch 15 | 16 | ``` 17 | pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html 18 | ``` 19 | 20 | ### Install MMCV-series 21 | 22 | We build our code on open-mmlab. So mmcv series all required. 23 | 24 | ``` 25 | # Install mmcv-series 26 | pip install mmcv-full==1.3.9 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html 27 | pip install mmdet==2.14.0 28 | pip install mmsegmentation==0.14.1 29 | ``` 30 | 31 | ### Install mmdetection3d 32 | 33 | Currently we are usingmmdetection3d of version 0.17.x . To install mmdet3d, please first download the releases of 0.17.x from , unzip the code and rename the folder to `mmdetection3d`. Then run 34 | 35 | ``` 36 | wget https://github.com/open-mmlab/mmdetection3d/archive/refs/tags/v0.17.3.zip 37 | unzip v0.17.3.zip 38 | cd mmdetection3d-0.17.3 39 | ``` 40 | 41 | ``` 42 | cd mmdetection3d 43 | pip install -v -e . 44 | ``` 45 | 46 | to install mmdetection3d. Note that some installations above requires CUDA environment, make sure add `export CUDA_HOME=/usr/local/cuda` to your bash source file. 47 | 48 | For more details about installation, please refer to open-mmlab . 49 | 50 | ### Other requirements 51 | 52 | Run 53 | 54 | ``` 55 | pip install -r requirements.txt 56 | ``` 57 | 58 | to install all requirements. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /tools/misc/fuse_conv_bn.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | from mmcv.runner import save_checkpoint 4 | from torch import nn as nn 5 | 6 | # from mmdet.apis import init_model 7 | from mmdet3d.apis import init_model 8 | # from mmcv.runner import init_model 9 | 10 | 11 | def fuse_conv_bn(conv, bn): 12 | """During inference, the functionary of batch norm layers is turned off but 13 | only the mean and var alone channels are used, which exposes the chance to 14 | fuse it with the preceding conv layers to save computations and simplify 15 | network structures.""" 16 | conv_w = conv.weight 17 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like( 18 | bn.running_mean) 19 | 20 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) 21 | conv.weight = nn.Parameter(conv_w * 22 | factor.reshape([conv.out_channels, 1, 1, 1])) 23 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) 24 | return conv 25 | 26 | 27 | def fuse_module(m): 28 | last_conv = None 29 | last_conv_name = None 30 | 31 | for name, child in m.named_children(): 32 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)): 33 | if last_conv is None: # only fuse BN that is after Conv 34 | continue 35 | fused_conv = fuse_conv_bn(last_conv, child) 36 | m._modules[last_conv_name] = fused_conv 37 | # To reduce changes, set BN as Identity instead of deleting it. 38 | m._modules[name] = nn.Identity() 39 | last_conv = None 40 | elif isinstance(child, nn.Conv2d): 41 | last_conv = child 42 | last_conv_name = name 43 | else: 44 | fuse_module(child) 45 | return m 46 | 47 | 48 | def parse_args(): 49 | parser = argparse.ArgumentParser( 50 | description='fuse Conv and BN layers in a model') 51 | parser.add_argument('config', help='config file path') 52 | parser.add_argument('checkpoint', help='checkpoint file path') 53 | parser.add_argument('out', help='output path of the converted model') 54 | args = parser.parse_args() 55 | return args 56 | 57 | 58 | def main(): 59 | args = parse_args() 60 | # build the model from a config file and a checkpoint file 61 | model = init_model(args.config, args.checkpoint) 62 | # fuse conv and bn layers of the model 63 | fused_model = fuse_module(model) 64 | save_checkpoint(fused_model, args.out) 65 | 66 | 67 | if __name__ == '__main__': 68 | main() 69 | -------------------------------------------------------------------------------- /plugin/datasets/pipelines/vectorize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from mmdet.datasets.builder import PIPELINES 3 | from shapely.geometry import LineString, box, Polygon 4 | import av2.geometry.interpolate as interp_utils 5 | 6 | 7 | @PIPELINES.register_module(force=True) 8 | class VectorizeMap(object): 9 | def __init__(self, coords_dim=3, sample_num=20, sample_dist=-1): 10 | self.coords_dim = coords_dim 11 | self.sample_num = sample_num 12 | self.sample_dist = sample_dist 13 | assert (sample_dist > 0 and sample_num < 0) or (sample_dist < 0 and sample_num > 0) 14 | if sample_dist > 0: 15 | self.sample_fn = self.interp_fixed_dist 16 | else: 17 | self.sample_fn = self.interp_fixed_num 18 | 19 | def interp_fixed_num(self, line: LineString, backend='shapely'): 20 | # TODO: compare two solutions 21 | # solution 1: 22 | if backend == 'shapely': 23 | distances = np.linspace(0, line.length, self.sample_num) 24 | sampled_points = np.array([list(line.interpolate(distance).coords) 25 | for distance in distances]).squeeze() 26 | 27 | # solution 2: 28 | elif backend == 'argo': 29 | sampled_points = interp_utils.interp_arc(self.sample_num, np.array(list(line.coords))) 30 | 31 | return sampled_points 32 | 33 | def interp_fixed_dist(self, line: LineString): 34 | distances = list(np.arange(self.sample_dist, line.length, self.sample_dist)) 35 | # make sure at least two sample points when sample_dist > line.length 36 | distances = [0,] + distances + [line.length,] 37 | 38 | sampled_points = np.array([list(line.interpolate(distance).coords) 39 | for distance in distances]).squeeze() 40 | 41 | return sampled_points 42 | 43 | def get_vectorized_lines(self, map_geoms): 44 | vectors = {} 45 | for label, geom_list in map_geoms.items(): 46 | vectors[label] = [] 47 | for geom in geom_list: 48 | if geom.geom_type == 'LineString': 49 | line = self.sample_fn(geom) 50 | line = line[:, :self.coords_dim] 51 | vectors[label].append(line) 52 | 53 | elif geom.geom_type == 'Polygon': 54 | # polygon objects will not be vectorized 55 | continue 56 | 57 | else: 58 | raise ValueError('map geoms must be either LineString or Polygon!') 59 | return vectors 60 | 61 | def __call__(self, input_dict): 62 | map_geoms = input_dict['map_geoms'] # {0: List[ped_crossing: LineString], 1: ...} 63 | 64 | ''' 65 | Dict: {label: vector_list(np Array), 66 | e.g. 67 | 0: [array([[x1, y1], [x2, y2]]), array([[x3, y3], [x4, y4], [x5, y5]])], 68 | 1: ... 69 | } 70 | ''' 71 | input_dict['vectors'] = self.get_vectorized_lines(map_geoms) 72 | return input_dict -------------------------------------------------------------------------------- /plugin/models/heads/detgen_utils/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import Tensor 4 | 5 | def generate_square_subsequent_mask(sz: int, condition_len: int = 1, bool_out=False, device: str = "cpu") -> torch.Tensor: 6 | """ Generate the attention mask for causal decoding """ 7 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) 8 | 9 | if condition_len > 1: 10 | mask[:condition_len,:condition_len] = 1 11 | 12 | if not bool_out: 13 | mask = ( 14 | mask.float() 15 | .masked_fill(mask == 0, float("-inf")) 16 | .masked_fill(mask == 1, float(0.0))) 17 | return mask.to(device=device) 18 | 19 | 20 | def dequantize_verts(verts, canvas_size: Tensor, add_noise=False): 21 | """Quantizes vertices and outputs integers with specified n_bits.""" 22 | min_range = -1 23 | max_range = 1 24 | range_quantize = canvas_size 25 | 26 | verts = verts.type(torch.float32) 27 | verts = verts * (max_range - min_range) / range_quantize + min_range 28 | if add_noise: 29 | verts += torch.rand_like(verts) * range_quantize 30 | return verts 31 | 32 | 33 | def quantize_verts( 34 | verts, 35 | canvas_size: Tensor): 36 | """Convert vertices from its original range ([-1,1]) to discrete values in [0, n_bits**2 - 1]. 37 | Args: 38 | verts: seqlen, 2 39 | """ 40 | min_range = -1 41 | max_range = 1 42 | range_quantize = canvas_size-1 43 | 44 | verts_ratio = (verts - min_range) / ( 45 | max_range - min_range) 46 | verts_quantize = verts_ratio * range_quantize 47 | 48 | return verts_quantize.type(torch.int32) 49 | 50 | 51 | def top_k_logits(logits, k): 52 | """Masks logits such that logits not in top-k are small.""" 53 | if k == 0: 54 | return logits 55 | else: 56 | values, _ = torch.topk(logits, k=k) 57 | k_largest = torch.min(values) 58 | logits = torch.where(logits < k_largest, 59 | torch.ones_like(logits)*-1e9, logits) 60 | return logits 61 | 62 | 63 | def top_p_logits(logits, p): 64 | """Masks logits using nucleus (top-p) sampling.""" 65 | if p == 1: 66 | return logits 67 | else: 68 | 69 | seq, dim = logits.shape[1:] 70 | logits = logits.view(-1, dim) 71 | sort_indices = torch.argsort(logits, dim=-1, descending=True) 72 | probs = F.softmax(logits, dim=-1).gather(-1, sort_indices) 73 | cumprobs = torch.cumsum(probs, dim=-1) - probs 74 | 75 | # The top 1 candidate always will not be masked. 76 | # This way ensures at least 1 indices will be selected. 77 | sort_mask = (cumprobs > p).type(logits.dtype) 78 | batch_indices = torch.repeat_interleave( 79 | torch.arange(logits.shape[0]).unsqueeze(-1), dim, dim=-1) 80 | 81 | top_p_mask = torch.zeros_like(logits) 82 | top_p_mask = top_p_mask.scatter_add(-1, sort_indices, sort_mask) 83 | 84 | logits -= top_p_mask * 1e9 85 | return logits.view(-1, seq, dim) 86 | -------------------------------------------------------------------------------- /tools/model_converters/regnet2mmdet.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | from collections import OrderedDict 4 | 5 | 6 | def convert_stem(model_key, model_weight, state_dict, converted_names): 7 | new_key = model_key.replace('stem.conv', 'conv1') 8 | new_key = new_key.replace('stem.bn', 'bn1') 9 | state_dict[new_key] = model_weight 10 | converted_names.add(model_key) 11 | print(f'Convert {model_key} to {new_key}') 12 | 13 | 14 | def convert_head(model_key, model_weight, state_dict, converted_names): 15 | new_key = model_key.replace('head.fc', 'fc') 16 | state_dict[new_key] = model_weight 17 | converted_names.add(model_key) 18 | print(f'Convert {model_key} to {new_key}') 19 | 20 | 21 | def convert_reslayer(model_key, model_weight, state_dict, converted_names): 22 | split_keys = model_key.split('.') 23 | layer, block, module = split_keys[:3] 24 | block_id = int(block[1:]) 25 | layer_name = f'layer{int(layer[1:])}' 26 | block_name = f'{block_id - 1}' 27 | 28 | if block_id == 1 and module == 'bn': 29 | new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}' 30 | elif block_id == 1 and module == 'proj': 31 | new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}' 32 | elif module == 'f': 33 | if split_keys[3] == 'a_bn': 34 | module_name = 'bn1' 35 | elif split_keys[3] == 'b_bn': 36 | module_name = 'bn2' 37 | elif split_keys[3] == 'c_bn': 38 | module_name = 'bn3' 39 | elif split_keys[3] == 'a': 40 | module_name = 'conv1' 41 | elif split_keys[3] == 'b': 42 | module_name = 'conv2' 43 | elif split_keys[3] == 'c': 44 | module_name = 'conv3' 45 | new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}' 46 | else: 47 | raise ValueError(f'Unsupported conversion of key {model_key}') 48 | print(f'Convert {model_key} to {new_key}') 49 | state_dict[new_key] = model_weight 50 | converted_names.add(model_key) 51 | 52 | 53 | def convert(src, dst): 54 | """Convert keys in pycls pretrained RegNet models to mmdet style.""" 55 | # load caffe model 56 | regnet_model = torch.load(src) 57 | blobs = regnet_model['model_state'] 58 | # convert to pytorch style 59 | state_dict = OrderedDict() 60 | converted_names = set() 61 | for key, weight in blobs.items(): 62 | if 'stem' in key: 63 | convert_stem(key, weight, state_dict, converted_names) 64 | elif 'head' in key: 65 | convert_head(key, weight, state_dict, converted_names) 66 | elif key.startswith('s'): 67 | convert_reslayer(key, weight, state_dict, converted_names) 68 | 69 | # check if all layers are converted 70 | for key in blobs: 71 | if key not in converted_names: 72 | print(f'not converted: {key}') 73 | # save checkpoint 74 | checkpoint = dict() 75 | checkpoint['state_dict'] = state_dict 76 | torch.save(checkpoint, dst) 77 | 78 | 79 | def main(): 80 | parser = argparse.ArgumentParser(description='Convert model keys') 81 | parser.add_argument('src', help='src detectron model path') 82 | parser.add_argument('dst', help='save path') 83 | args = parser.parse_args() 84 | convert(args.src, args.dst) 85 | 86 | 87 | if __name__ == '__main__': 88 | main() 89 | -------------------------------------------------------------------------------- /tools/analysis_tools/get_flops.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | from mmcv import Config, DictAction 4 | 5 | from mmdet3d.models import build_model 6 | 7 | try: 8 | from mmcv.cnn import get_model_complexity_info 9 | except ImportError: 10 | raise ImportError('Please upgrade mmcv to >0.6.2') 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description='Train a detector') 15 | parser.add_argument('config', help='train config file path') 16 | parser.add_argument( 17 | '--shape', 18 | type=int, 19 | nargs='+', 20 | default=[40000, 4], 21 | help='input point cloud size') 22 | parser.add_argument( 23 | '--modality', 24 | type=str, 25 | default='point', 26 | choices=['point', 'image', 'multi'], 27 | help='input data modality') 28 | parser.add_argument( 29 | '--cfg-options', 30 | nargs='+', 31 | action=DictAction, 32 | help='override some settings in the used config, the key-value pair ' 33 | 'in xxx=yyy format will be merged into config file. If the value to ' 34 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 35 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 36 | 'Note that the quotation marks are necessary and that no white space ' 37 | 'is allowed.') 38 | args = parser.parse_args() 39 | return args 40 | 41 | 42 | def main(): 43 | 44 | args = parse_args() 45 | 46 | if args.modality == 'point': 47 | assert len(args.shape) == 2, 'invalid input shape' 48 | input_shape = tuple(args.shape) 49 | elif args.modality == 'image': 50 | if len(args.shape) == 1: 51 | input_shape = (3, args.shape[0], args.shape[0]) 52 | elif len(args.shape) == 2: 53 | input_shape = (3, ) + tuple(args.shape) 54 | else: 55 | raise ValueError('invalid input shape') 56 | elif args.modality == 'multi': 57 | raise NotImplementedError( 58 | 'FLOPs counter is currently not supported for models with ' 59 | 'multi-modality input') 60 | 61 | cfg = Config.fromfile(args.config) 62 | if args.cfg_options is not None: 63 | cfg.merge_from_dict(args.cfg_options) 64 | # import modules from string list. 65 | if cfg.get('custom_imports', None): 66 | from mmcv.utils import import_modules_from_strings 67 | import_modules_from_strings(**cfg['custom_imports']) 68 | 69 | model = build_model( 70 | cfg.model, 71 | train_cfg=cfg.get('train_cfg'), 72 | test_cfg=cfg.get('test_cfg')) 73 | if torch.cuda.is_available(): 74 | model.cuda() 75 | model.eval() 76 | 77 | if hasattr(model, 'forward_dummy'): 78 | model.forward = model.forward_dummy 79 | else: 80 | raise NotImplementedError( 81 | 'FLOPs counter is currently not supported for {}'.format( 82 | model.__class__.__name__)) 83 | 84 | flops, params = get_model_complexity_info(model, input_shape) 85 | split_line = '=' * 30 86 | print(f'{split_line}\nInput shape: {input_shape}\n' 87 | f'Flops: {flops}\nParams: {params}\n{split_line}') 88 | print('!!!Please be cautious if you use the results in papers. ' 89 | 'You may need to check if all ops are supported and verify that the ' 90 | 'flops computation is correct.') 91 | 92 | 93 | if __name__ == '__main__': 94 | main() 95 | -------------------------------------------------------------------------------- /plugin/datasets/pipelines/rasterize.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from mmdet.datasets.builder import PIPELINES 3 | from shapely.geometry import LineString, box, Polygon 4 | from shapely import affinity, ops 5 | import cv2 6 | from PIL import Image, ImageDraw 7 | 8 | 9 | @PIPELINES.register_module(force=True) 10 | class RasterizeMap(object): 11 | ''' 12 | Generate rasterized map for map geoms 13 | ''' 14 | def __init__(self, roi_size=(60, 30), canvas_size=(400, 200), thickness=5, 15 | coords_dim=3): 16 | self.roi_size = roi_size 17 | self.canvas_size = canvas_size 18 | self.scale_x = self.canvas_size[0] / self.roi_size[0] 19 | self.scale_y = self.canvas_size[1] / self.roi_size[1] 20 | self.thickness = thickness 21 | self.coords_dim = coords_dim 22 | 23 | def line_ego_to_mask(self, line_ego, mask, color, thickness): 24 | trans_x = self.canvas_size[0] / 2 25 | trans_y = self.canvas_size[1] / 2 26 | line_ego = affinity.scale(line_ego, self.scale_x, self.scale_y, origin=(0, 0)) 27 | line_ego = affinity.affine_transform(line_ego, [1.0, 0.0, 0.0, 1.0, trans_x, trans_y]) 28 | 29 | coords = np.array(list(line_ego.coords), dtype=np.int32)[:, :2] 30 | coords = coords.reshape((-1, 2)) 31 | assert len(coords) >= 2 32 | 33 | cv2.polylines(mask, np.int32([coords]), False, color=color, thickness=thickness) 34 | 35 | def polygons_ego_to_mask(self, polygons, color): 36 | mask = Image.new("L", size=(self.canvas_size[0], self.canvas_size[1]), color=0) 37 | # Image expect size as (w, h) 38 | trans_x = self.canvas_size[0] / 2 39 | trans_y = self.canvas_size[1] / 2 40 | for polygon in polygons: 41 | polygon = affinity.scale(polygon, self.scale_x, self.scale_y, origin=(0, 0)) 42 | polygon = affinity.affine_transform(polygon, [1.0, 0.0, 0.0, 1.0, trans_x, trans_y]) 43 | ext = np.array(polygon.exterior.coords)[:, :self.coords_dim] 44 | vert_list = [(x, y) for x, y in ext] 45 | 46 | ImageDraw.Draw(mask).polygon(vert_list, outline=1, fill=color) 47 | 48 | return np.array(mask, np.uint8) 49 | 50 | def get_semantic_mask(self, map_geoms): 51 | num_classes = len(map_geoms) 52 | semantic_mask = np.zeros((num_classes, self.canvas_size[1], self.canvas_size[0]), dtype=np.uint8) 53 | 54 | for label, geom_list in map_geoms.items(): 55 | if len(geom_list) == 0: 56 | continue 57 | if geom_list[0].geom_type == 'LineString': 58 | for line in geom_list: 59 | self.line_ego_to_mask(line, semantic_mask[label], color=1, thickness=self.thickness) 60 | elif geom_list[0].geom_type == 'Polygon': 61 | polygons = [] 62 | for polygon in geom_list: 63 | polygons.append(polygon) 64 | semantic_mask[label] = self.polygons_ego_to_mask(polygons, color=1) 65 | else: 66 | raise ValueError('map geoms must be either LineString or Polygon!') 67 | return semantic_mask 68 | 69 | def get_instance_mask(self, map_geoms): 70 | raise NotImplementedError 71 | 72 | def __call__(self, input_dict): 73 | map_geoms = input_dict['map_geoms'] # {0: List[ped_crossing: LineString], 1: ...} 74 | 75 | # np Array (num_class, canvas_size[1], canvas_size[0]) 76 | semantic_mask = self.get_semantic_mask(map_geoms) 77 | input_dict['semantic_mask'] = semantic_mask 78 | return input_dict -------------------------------------------------------------------------------- /plugin/datasets/pipelines/formating.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mmcv 3 | from mmcv.parallel import DataContainer as DC 4 | 5 | from mmdet3d.core.points import BasePoints 6 | from mmdet.datasets.builder import PIPELINES 7 | from mmdet.datasets.pipelines import to_tensor 8 | from mmdet.datasets.pipelines.transforms import Resize 9 | import torch 10 | 11 | @PIPELINES.register_module() 12 | class FormatBundleMap(object): 13 | """Format data for map tasks and then collect data for mdeol input. 14 | 15 | These fields are formatted as follows. 16 | 17 | - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) 18 | - semantic_mask (in exists): (1)to tensor, (2)to DataContainer (stack=True) 19 | - vectors: (1)to DataContainer (cpu_only=True) 20 | - img_metas: (1)to DataContainer (cpu_only=True) 21 | """ 22 | 23 | def __init__(self, collect=True, process_img=True, 24 | keys=['img', 'semantic_mask', 'vectors'], 25 | meta_keys=['intrinsics', 'extrinsics']): 26 | 27 | self.process_img = process_img 28 | self.keys = keys 29 | self.meta_keys = meta_keys 30 | self.collect = collect 31 | 32 | def __call__(self, results): 33 | """Call function to transform and format common fields in results. 34 | 35 | Args: 36 | results (dict): Result dict contains the data to convert. 37 | 38 | Returns: 39 | dict: The result dict contains the data that is formatted with 40 | default bundle. 41 | """ 42 | # Format 3D data 43 | if 'points' in results: 44 | assert isinstance(results['points'], BasePoints) 45 | results['points'] = DC(results['points'].tensor) 46 | 47 | for key in ['voxels', 'coors', 'voxel_centers', 'num_points']: 48 | if key not in results: 49 | continue 50 | results[key] = DC(to_tensor(results[key]), stack=False) 51 | 52 | if 'img' in results and self.process_img: 53 | if isinstance(results['img'], list): 54 | # process multiple imgs in single frame 55 | imgs = [img.transpose(2, 0, 1) for img in results['img']] 56 | imgs = np.ascontiguousarray(np.stack(imgs, axis=0)) 57 | results['img'] = DC(to_tensor(imgs), stack=True) 58 | else: 59 | img = np.ascontiguousarray(results['img'].transpose(2, 0, 1)) 60 | results['img'] = DC(to_tensor(img), stack=True) 61 | 62 | if 'semantic_mask' in results: 63 | results['semantic_mask'] = DC(to_tensor(results['semantic_mask']), stack=True) 64 | 65 | if 'vectors' in results: 66 | # vectors may have different sizes 67 | vectors = results['vectors'] 68 | results['vectors'] = DC(vectors, stack=False, cpu_only=True) 69 | 70 | # same with above 71 | if 'polys' in results: 72 | results['polys'] = DC(results['polys'], stack=False, cpu_only=True) 73 | 74 | if self.collect: 75 | # final collect 76 | data = {} 77 | img_metas = {} 78 | for k in self.keys: 79 | data[k] = results[k] 80 | 81 | for k in self.meta_keys: 82 | img_metas[k] = results[k] 83 | data['img_metas'] = DC(img_metas, cpu_only=True) 84 | 85 | return data 86 | 87 | return results 88 | 89 | def __repr__(self): 90 | """str: Return a string that describes the module.""" 91 | repr_str = self.__class__.__name__ 92 | repr_str += f'(process_img={self.process_img}, ' 93 | return repr_str 94 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VectorMapNet_code 2 | **VectorMapNet: End-to-end Vectorized HD Map Learning** ICML 2023 3 | 4 | This is the official codebase of VectorMapNet 5 | 6 | 7 | [Yicheng Liu](https://scholar.google.com/citations?user=vRmsgQUAAAAJ&hl=zh-CN), Yuantian Yuan, [Yue Wang](https://people.csail.mit.edu/yuewang/), [Yilun Wang](https://scholar.google.com.hk/citations?user=nUyTDosAAAAJ&hl=en/), [Hang Zhao](http://people.csail.mit.edu/hangzhao/) 8 | 9 | 10 | **[[Paper](https://arxiv.org/pdf/2206.08920.pdf)] [[Project Page](https://tsinghua-mars-lab.github.io/vectormapnet/)]** 11 | 12 | **Abstract:** 13 | Autonomous driving systems require High-Definition (HD) semantic maps to navigate around urban roads. Existing solutions approach the semantic mapping problem by offline manual annotation, which suffers from serious scalability issues. Recent learning-based methods produce dense rasterized segmentation predictions to construct maps. However, these predictions do not include instance information of individual map elements and require heuristic post-processing to obtain vectorized maps. To tackle these challenges, we introduce an end-to-end vectorized HD map learning pipeline, termed VectorMapNet. VectorMapNet takes onboard sensor observations and predicts a sparse set of polylines in the bird's-eye view. This pipeline can explicitly model the spatial relation between map elements and generate vectorized maps that are friendly to downstream autonomous driving tasks. Extensive experiments show that VectorMapNet achieve strong map learning performance on both nuScenes and Argoverse2 dataset, surpassing previous state-of-the-art methods by 14.2 mAP and 14.6mAP. Qualitatively, VectorMapNet is capable of generating comprehensive maps and capturing fine-grained details of road geometry. To the best of our knowledge, VectorMapNet is the first work designed towards end-to-end vectorized map learning from onboard observations. 14 | 15 | **Questions/Requests:** 16 | Please file an [issue](https://github.com/Tsinghua-MARS-Lab/vecmapnet/issues) or send an email to [Yicheng](moooooore66@gmail.com). 17 | 18 | 19 | ## Bibtex 20 | If you found this paper or codebase useful, please cite our paper: 21 | ``` 22 | @inproceedings{liu2022vectormapnet, 23 | title={VectorMapNet: End-to-end Vectorized HD Map Learning}, 24 | author={Liu, Yicheng and Yuantian, Yuan and Wang, Yue and Wang, Yilun and Zhao, Hang}, 25 | booktitle={International conference on machine learning}, 26 | year={2023}, 27 | organization={PMLR} 28 | } 29 | ``` 30 | 31 | 32 | # Run VectorMapNet 33 | 34 | ## Note 35 | 36 | 37 | ## 0. Environment 38 | 39 | Set up environment by following this [script](env.md) 40 | 41 | ## 1. Prepare your dataset 42 | 43 | Store your data with following structure: 44 | 45 | ``` 46 | root 47 | |--datasets 48 | |--nuScenes 49 | |--Argoverse2(optional) 50 | 51 | ``` 52 | 53 | ### 1.1 Generate annotation files 54 | 55 | #### Preprocess nuScenes 56 | 57 | ``` 58 | python tools/data_converter/nuscenes_converter.py --data-root your/dataset/nuScenes/ 59 | ``` 60 | 61 | ## 2. Evaluate VectorMapNet 62 | 63 | ### Download Checkpoint 64 | | Method | Modality | Config | Checkpoint | 65 | |--------------|-------------|--------|------------| 66 | | VectorMapNet | Camera only | [config](configs/vectormapnet.py) | [model link](https://drive.google.com/file/d/1ccrlZ2HrFfpBB27kC9DkwCYWlTUpgmin/view?usp=sharing) | 67 | 68 | 69 | ### Train VectorMapNet 70 | 71 | In single GPU 72 | ``` 73 | python tools/train.py configs/vectormapnet.py 74 | ``` 75 | 76 | For multi GPUs 77 | ``` 78 | bash tools/dist_train.sh configs/vectormapnet.py $num_gpu 79 | ``` 80 | 81 | 82 | ### Do Evaluation 83 | 84 | In single GPU 85 | ``` 86 | python tools/test.py configs/vectormapnet.py /path/to/ckpt --eval name 87 | ``` 88 | 89 | For multi GPUs 90 | ``` 91 | bash tools/dist_test.sh configs/vectormapnet.py /path/to/ckpt $num_gpu --eval name 92 | ``` 93 | 94 | 95 | ### Expected Results 96 | 97 | | $AP_{ped}$ | $AP_{divider}$ | $AP_{boundary}$ | mAP | 98 | |--------------|----------------|-----------------|-------| 99 | | 39.8 | 47.7 | 38.8 | 42.1 | 100 | 101 | 102 | -------------------------------------------------------------------------------- /tools/builder.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This file is modifiled 3 | Add: MAPPERs, build_mapper 4 | Change: build_model (can build_mapper if cfg.type in ['Mapper']) 5 | ''' 6 | import warnings 7 | from mmcv.cnn import MODELS as MMCV_MODELS 8 | from mmcv.utils import Registry 9 | 10 | from mmdet.models.builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, 11 | ROI_EXTRACTORS, SHARED_HEADS) 12 | from mmseg.models.builder import SEGMENTORS 13 | 14 | MODELS = Registry('models', parent=MMCV_MODELS) 15 | #MAPPERS = Registry('mappers', parent=MMCV_MODELS) 16 | MAPPERS = Registry('mappers') 17 | 18 | VOXEL_ENCODERS = MODELS 19 | MIDDLE_ENCODERS = MODELS 20 | FUSION_LAYERS = MODELS 21 | 22 | 23 | def build_backbone(cfg): 24 | """Build backbone.""" 25 | return BACKBONES.build(cfg) 26 | 27 | 28 | def build_neck(cfg): 29 | """Build neck.""" 30 | return NECKS.build(cfg) 31 | 32 | 33 | def build_roi_extractor(cfg): 34 | """Build RoI feature extractor.""" 35 | return ROI_EXTRACTORS.build(cfg) 36 | 37 | 38 | def build_shared_head(cfg): 39 | """Build shared head of detector.""" 40 | return SHARED_HEADS.build(cfg) 41 | 42 | 43 | def build_head(cfg): 44 | """Build head.""" 45 | return HEADS.build(cfg) 46 | 47 | 48 | def build_loss(cfg): 49 | """Build loss function.""" 50 | return LOSSES.build(cfg) 51 | 52 | 53 | def build_detector(cfg, train_cfg=None, test_cfg=None): 54 | """Build detector.""" 55 | if train_cfg is not None or test_cfg is not None: 56 | warnings.warn( 57 | 'train_cfg and test_cfg is deprecated, ' 58 | 'please specify them in model', UserWarning) 59 | assert cfg.get('train_cfg') is None or train_cfg is None, \ 60 | 'train_cfg specified in both outer field and model field ' 61 | assert cfg.get('test_cfg') is None or test_cfg is None, \ 62 | 'test_cfg specified in both outer field and model field ' 63 | return DETECTORS.build( 64 | cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 65 | 66 | 67 | def build_mapper(cfg, train_cfg=None, test_cfg=None): 68 | if train_cfg is not None or test_cfg is not None: 69 | warnings.warn( 70 | 'train_cfg and test_cfg is deprecated, ' 71 | 'please specify them in model', UserWarning) 72 | 73 | return MAPPERS.build(cfg, default_args=dict(train_cfg=train_cfg, 74 | test_cfg=test_cfg)) 75 | 76 | 77 | def build_segmentor(cfg, train_cfg=None, test_cfg=None): 78 | """Build segmentor.""" 79 | if train_cfg is not None or test_cfg is not None: 80 | warnings.warn( 81 | 'train_cfg and test_cfg is deprecated, ' 82 | 'please specify them in model', UserWarning) 83 | assert cfg.get('train_cfg') is None or train_cfg is None, \ 84 | 'train_cfg specified in both outer field and model field ' 85 | assert cfg.get('test_cfg') is None or test_cfg is None, \ 86 | 'test_cfg specified in both outer field and model field ' 87 | return SEGMENTORS.build( 88 | cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) 89 | 90 | 91 | def build_model(cfg, train_cfg=None, test_cfg=None): 92 | """A function warpper for building 3D detector or segmentor according to 93 | cfg. 94 | 95 | Should be deprecated in the future. 96 | """ 97 | # if cfg.type in ['EncoderDecoder3D']: 98 | # return build_segmentor(cfg, train_cfg=train_cfg, test_cfg=test_cfg) 99 | # elif cfg.type in ['Mapper', 'VecHDMapNet', 'PolyHDMapNet', 'DGHdmapNet']: 100 | # return build_mapper(cfg, train_cfg=train_cfg, test_cfg=test_cfg) 101 | # else: 102 | # return build_detector(cfg, train_cfg=train_cfg, test_cfg=test_cfg) 103 | return build_mapper(cfg, train_cfg=train_cfg, test_cfg=test_cfg) 104 | 105 | 106 | def build_voxel_encoder(cfg): 107 | """Build voxel encoder.""" 108 | return VOXEL_ENCODERS.build(cfg) 109 | 110 | 111 | def build_middle_encoder(cfg): 112 | """Build middle level encoder.""" 113 | return MIDDLE_ENCODERS.build(cfg) 114 | 115 | 116 | def build_fusion_layer(cfg): 117 | """Build fusion layer.""" 118 | return FUSION_LAYERS.build(cfg) 119 | -------------------------------------------------------------------------------- /plugin/models/backbones/pointpillar_utils/voxel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | # import torch_scatter 4 | 5 | 6 | def pad_or_trim_to_np(x, shape, pad_val=0): 7 | shape = np.asarray(shape) 8 | pad = shape - np.minimum(np.shape(x), shape) 9 | zeros = np.zeros_like(pad) 10 | x = np.pad(x, np.stack([zeros, pad], axis=1), constant_values=pad_val) 11 | return x[:shape[0], :shape[1]] 12 | 13 | 14 | def raval_index(coords, dims): 15 | dims = torch.cat((dims, torch.ones(1, device=dims.device)), dim=0)[1:] 16 | dims = torch.flip(dims, dims=[0]) 17 | dims = torch.cumprod(dims, dim=0) / dims[0] 18 | multiplier = torch.flip(dims, dims=[0]) 19 | indices = torch.sum(coords * multiplier, dim=1) 20 | return indices 21 | 22 | 23 | def points_to_voxels( 24 | points_xyz, 25 | points_mask, 26 | grid_range_x, 27 | grid_range_y, 28 | grid_range_z 29 | ): 30 | batch_size, num_points, _ = points_xyz.shape 31 | voxel_size_x = grid_range_x[2] 32 | voxel_size_y = grid_range_y[2] 33 | voxel_size_z = grid_range_z[2] 34 | grid_size = np.asarray([ 35 | (grid_range_x[1]-grid_range_x[0]) / voxel_size_x, 36 | (grid_range_y[1]-grid_range_y[0]) / voxel_size_y, 37 | (grid_range_z[1]-grid_range_z[0]) / voxel_size_z 38 | ]).astype('int32') 39 | voxel_size = np.asarray([voxel_size_x, voxel_size_y, voxel_size_z]) 40 | voxel_size = torch.Tensor(voxel_size).to(points_xyz.device) 41 | num_voxels = grid_size[0] * grid_size[1] * grid_size[2] 42 | grid_offset = torch.Tensor([grid_range_x[0], grid_range_y[0], grid_range_z[0]]).to(points_xyz.device) 43 | shifted_points_xyz = points_xyz - grid_offset 44 | voxel_xyz = shifted_points_xyz / voxel_size 45 | voxel_coords = voxel_xyz.int() 46 | grid_size = torch.from_numpy(grid_size).to(points_xyz.device) 47 | grid_size = grid_size.int() 48 | zeros = torch.zeros_like(grid_size) 49 | voxel_paddings = ((points_mask < 1.0) | 50 | torch.any((voxel_coords >= grid_size) | 51 | (voxel_coords < zeros), dim=-1)) 52 | voxel_indices = raval_index( 53 | torch.reshape(voxel_coords, [batch_size * num_points, 3]), grid_size) 54 | voxel_indices = torch.reshape(voxel_indices, [batch_size, num_points]) 55 | voxel_indices = torch.where(voxel_paddings, 56 | torch.zeros_like(voxel_indices), 57 | voxel_indices) 58 | voxel_centers = ((0.5 + voxel_coords.float()) * voxel_size + grid_offset) 59 | voxel_coords = torch.where(torch.unsqueeze(voxel_paddings, dim=-1), 60 | torch.zeros_like(voxel_coords), 61 | voxel_coords) 62 | voxel_xyz = torch.where(torch.unsqueeze(voxel_paddings, dim=-1), 63 | torch.zeros_like(voxel_xyz), 64 | voxel_xyz) 65 | voxel_paddings = voxel_paddings.float() 66 | 67 | voxel_indices = voxel_indices.long() 68 | points_per_voxel = torch_scatter.scatter_sum( 69 | torch.ones((batch_size, num_points), dtype=voxel_coords.dtype, device=voxel_coords.device) * (1-voxel_paddings), 70 | voxel_indices, 71 | dim=1, 72 | dim_size=num_voxels 73 | ) 74 | 75 | voxel_point_count = torch.gather(points_per_voxel, 76 | dim=1, 77 | index=voxel_indices) 78 | 79 | 80 | voxel_centroids = torch_scatter.scatter_mean( 81 | points_xyz, 82 | voxel_indices, 83 | dim=1, 84 | dim_size=num_voxels) 85 | point_centroids = torch.gather(voxel_centroids, dim=1, index=torch.unsqueeze(voxel_indices, dim=-1).repeat(1, 1, 3)) 86 | local_points_xyz = points_xyz - point_centroids 87 | 88 | result = { 89 | 'local_points_xyz': local_points_xyz, 90 | 'shifted_points_xyz': shifted_points_xyz, 91 | 'point_centroids': point_centroids, 92 | 'points_xyz': points_xyz, 93 | 'grid_offset': grid_offset, 94 | 'voxel_coords': voxel_coords, 95 | 'voxel_centers': voxel_centers, 96 | 'voxel_indices': voxel_indices, 97 | 'voxel_paddings': voxel_paddings, 98 | 'points_mask': 1 - voxel_paddings, 99 | 'num_voxels': num_voxels, 100 | 'grid_size': grid_size, 101 | 'voxel_xyz': voxel_xyz, 102 | 'voxel_size': voxel_size, 103 | 'voxel_point_count': voxel_point_count, 104 | 'points_per_voxel': points_per_voxel 105 | } 106 | 107 | 108 | return result 109 | -------------------------------------------------------------------------------- /tools/analysis_tools/benchmark.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import torch 4 | from mmcv import Config 5 | from mmcv.parallel import MMDataParallel 6 | from mmcv.runner import load_checkpoint 7 | 8 | from mmdet3d.datasets import build_dataloader, build_dataset 9 | from mmdet3d.models import build_model 10 | # from mmdet.core import wrap_fp16_model 11 | from mmcv.runner import wrap_fp16_model 12 | from tools.misc.fuse_conv_bn import fuse_module 13 | 14 | import os 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser(description='MMDet benchmark a model') 18 | parser.add_argument('config', help='test config file path') 19 | parser.add_argument('checkpoint', help='checkpoint file') 20 | parser.add_argument('--samples', default=2000, help='samples to benchmark') 21 | parser.add_argument( 22 | '--log-interval', default=50, help='interval of logging') 23 | parser.add_argument( 24 | '--fuse-conv-bn', 25 | action='store_true', 26 | help='Whether to fuse conv and bn, this will slightly increase' 27 | 'the inference speed') 28 | args = parser.parse_args() 29 | return args 30 | 31 | 32 | def main(): 33 | args = parse_args() 34 | 35 | cfg = Config.fromfile(args.config) 36 | # set cudnn_benchmark 37 | if cfg.get('cudnn_benchmark', False): 38 | torch.backends.cudnn.benchmark = True 39 | cfg.model.pretrained = None 40 | cfg.data.test.test_mode = True 41 | 42 | # import modules from plguin/xx, registry will be updated 43 | if hasattr(cfg, 'plugin'): 44 | if cfg.plugin: 45 | import importlib 46 | if hasattr(cfg, 'plugin_dir'): 47 | plugin_dir = cfg.plugin_dir 48 | _module_dir = os.path.dirname(plugin_dir) 49 | _module_dir = _module_dir.split('/') 50 | _module_path = _module_dir[0] 51 | 52 | for m in _module_dir[1:]: 53 | _module_path = _module_path + '.' + m 54 | print(_module_path) 55 | plg_lib = importlib.import_module(_module_path) 56 | else: 57 | # import dir is the dirpath for the config file 58 | _module_dir = os.path.dirname(args.config) 59 | _module_dir = _module_dir.split('/') 60 | _module_path = _module_dir[0] 61 | for m in _module_dir[1:]: 62 | _module_path = _module_path + '.' + m 63 | print(_module_path) 64 | plg_lib = importlib.import_module(_module_path) 65 | 66 | # build the dataloader 67 | # TODO: support multiple images per gpu (only minor changes are needed) 68 | dataset = build_dataset(cfg.data.test) 69 | data_loader = build_dataloader( 70 | dataset, 71 | samples_per_gpu=1, 72 | workers_per_gpu=cfg.data.workers_per_gpu, 73 | dist=False, 74 | shuffle=False) 75 | 76 | # build the model and load checkpoint 77 | cfg.model.train_cfg = None 78 | model = build_model(cfg.model, test_cfg=cfg.get('test_cfg')) 79 | # model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) 80 | fp16_cfg = cfg.get('fp16', None) 81 | if fp16_cfg is not None: 82 | wrap_fp16_model(model) 83 | load_checkpoint(model, args.checkpoint, map_location='cpu') 84 | if args.fuse_conv_bn: 85 | model = fuse_module(model) 86 | 87 | model = MMDataParallel(model, device_ids=[0]) 88 | 89 | model.eval() 90 | 91 | # the first several iterations may be very slow so skip them 92 | num_warmup = 20 93 | pure_inf_time = 0 94 | 95 | # benchmark with several samples and take the average 96 | for i, data in enumerate(data_loader): 97 | 98 | torch.cuda.synchronize() 99 | start_time = time.perf_counter() 100 | 101 | with torch.no_grad(): 102 | model(return_loss=False, rescale=True, **data) 103 | 104 | torch.cuda.synchronize() 105 | elapsed = time.perf_counter() - start_time 106 | 107 | if i >= num_warmup: 108 | pure_inf_time += elapsed 109 | if (i + 1) % args.log_interval == 0: 110 | fps = (i + 1 - num_warmup) / pure_inf_time 111 | print(f'Done image [{i + 1:<3}/ {args.samples}], ' 112 | f'fps: {fps:.1f} img / s') 113 | 114 | if (i + 1) == args.samples: 115 | pure_inf_time += elapsed 116 | fps = (i + 1 - num_warmup) / pure_inf_time 117 | print(f'Overall fps: {fps:.1f} img / s') 118 | break 119 | 120 | 121 | if __name__ == '__main__': 122 | main() 123 | -------------------------------------------------------------------------------- /plugin/datasets/base_dataset.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import tempfile 4 | import warnings 5 | from os import path as osp 6 | from torch.utils.data import Dataset 7 | from mmdet3d.datasets.utils import extract_result_dict, get_loading_pipeline 8 | 9 | from mmdet.datasets import DATASETS 10 | 11 | from mmdet3d.datasets.pipelines import Compose 12 | import time 13 | 14 | 15 | @DATASETS.register_module() 16 | class BaseMapDataset(Dataset): 17 | """BaseClass for Map Dataset 18 | 19 | This is the base dataset of nuScenes and argoverse 2dataset. 20 | 21 | Args: 22 | data_root (str): Path of dataset root. 23 | ann_file (str): Path of annotation file. 24 | pipeline (list[dict], optional): Pipeline used for data processing. 25 | Defaults to None. 26 | classes (tuple[str], optional): Classes used in the dataset. 27 | Defaults to None. 28 | test_mode (bool, optional): Whether the dataset is in test mode. 29 | Defaults to False. 30 | """ 31 | 32 | def __init__(self, 33 | ann_file, 34 | modality=dict( 35 | use_camera=True, 36 | use_lidar=False, 37 | use_radar=False, 38 | use_map=True, 39 | use_external=False, 40 | ), 41 | pipeline=None, 42 | cat2id=None, 43 | interval=1,): 44 | super().__init__() 45 | self.ann_file = ann_file 46 | self.modality = modality 47 | 48 | self.classes = list(cat2id.keys()) 49 | self.num_classes = len(self.classes) 50 | self.cat2id = cat2id 51 | self.interval = interval 52 | 53 | # 54 | self.load_annotations(self.ann_file) 55 | 56 | if pipeline is not None: 57 | self.pipeline = Compose(pipeline) 58 | else: 59 | self.pipeline = None 60 | 61 | self.flag = np.zeros(len(self), dtype=np.uint8) 62 | 63 | def load_annotations(self, ann_file): 64 | """Load annotations from ann_file. 65 | 66 | Args: 67 | ann_file (str): Path of the annotation file. 68 | 69 | Returns: 70 | list[dict]: List of annotations. 71 | """ 72 | print('collecting samples...') 73 | start_time = time.time() 74 | samples = mmcv.load(ann_file)[::self.interval] 75 | print(f'collected {len(samples)} samples in {(time.time() - start_time):.2f}s') 76 | self.samples = samples 77 | 78 | def get_sample(self, index): 79 | info = self.samples[index] 80 | 81 | input_dict = dict( 82 | info=info, 83 | ) 84 | 85 | return input_dict 86 | 87 | def prepare_data(self, index): 88 | """Prepare data for testing. 89 | 90 | Args: 91 | index (int): Index for accessing the target data. 92 | 93 | Returns: 94 | dict: Testing data dict of the corresponding index. 95 | """ 96 | input_dict = self.get_sample(index) 97 | example = self.pipeline(input_dict) 98 | return example 99 | 100 | def format_results(self, 101 | outputs, 102 | pklfile_prefix=None, 103 | submission_prefix=None): 104 | """Format the results to pkl file. 105 | 106 | Args: 107 | outputs (list[dict]): Testing results of the dataset. 108 | pklfile_prefix (str | None): The prefix of pkl files. It includes 109 | the file path and the prefix of filename, e.g., "a/b/prefix". 110 | If not specified, a temp file will be created. Default: None. 111 | 112 | Returns: 113 | tuple: (outputs, tmp_dir), outputs is the detection results, \ 114 | tmp_dir is the temporal directory created for saving json \ 115 | files when ``jsonfile_prefix`` is not specified. 116 | """ 117 | if pklfile_prefix is None: 118 | tmp_dir = tempfile.TemporaryDirectory() 119 | pklfile_prefix = osp.join(tmp_dir.name, 'results') 120 | out = f'{pklfile_prefix}.pkl' 121 | mmcv.dump(outputs, out) 122 | return outputs, tmp_dir 123 | 124 | def evaluate(self, 125 | results, 126 | logger=None, 127 | show=True, 128 | **kwargs): 129 | """Evaluate. 130 | 131 | Evaluation in indoor protocol. 132 | 133 | Args: 134 | results (list[dict]): List of results. 135 | 136 | Returns: 137 | dict: Evaluation results. 138 | """ 139 | if show: 140 | print('show results') # or call another function 141 | 142 | ret_dict = {} 143 | return ret_dict 144 | 145 | def __len__(self): 146 | """Return the length of data infos. 147 | 148 | Returns: 149 | int: Length of data infos. 150 | """ 151 | return len(self.samples) 152 | 153 | def _rand_another(self, idx): 154 | """Randomly get another item. 155 | 156 | Returns: 157 | int: Another index of item. 158 | """ 159 | return np.random.choice(self.__len__) 160 | 161 | def __getitem__(self, idx): 162 | """Get item from infos according to the given index. 163 | 164 | Returns: 165 | dict: Data dictionary of the corresponding index. 166 | """ 167 | data = self.prepare_data(idx) 168 | return data 169 | -------------------------------------------------------------------------------- /plugin/models/mapers/base_mapper.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch.nn as nn 4 | from mmcv.runner import auto_fp16 5 | from mmcv.utils import print_log 6 | 7 | from mmdet.utils import get_root_logger 8 | 9 | 10 | class BaseMapper(nn.Module, metaclass=ABCMeta): 11 | """Base class for mappers.""" 12 | 13 | def __init__(self): 14 | super(BaseMapper, self).__init__() 15 | self.fp16_enabled = False 16 | 17 | @property 18 | def with_neck(self): 19 | """bool: whether the detector has a neck""" 20 | return hasattr(self, 'neck') and self.neck is not None 21 | 22 | # TODO: these properties need to be carefully handled 23 | # for both single stage & two stage detectors 24 | @property 25 | def with_shared_head(self): 26 | """bool: whether the detector has a shared head in the RoI Head""" 27 | return hasattr(self, 'roi_head') and self.roi_head.with_shared_head 28 | 29 | @property 30 | def with_bbox(self): 31 | """bool: whether the detector has a bbox head""" 32 | return ((hasattr(self, 'roi_head') and self.roi_head.with_bbox) 33 | or (hasattr(self, 'bbox_head') and self.bbox_head is not None)) 34 | 35 | @property 36 | def with_mask(self): 37 | """bool: whether the detector has a mask head""" 38 | return ((hasattr(self, 'roi_head') and self.roi_head.with_mask) 39 | or (hasattr(self, 'mask_head') and self.mask_head is not None)) 40 | 41 | #@abstractmethod 42 | def extract_feat(self, imgs): 43 | """Extract features from images.""" 44 | pass 45 | 46 | def forward_train(self, *args, **kwargs): 47 | pass 48 | 49 | #@abstractmethod 50 | def simple_test(self, img, img_metas, **kwargs): 51 | pass 52 | 53 | #@abstractmethod 54 | def aug_test(self, imgs, img_metas, **kwargs): 55 | """Test function with test time augmentation.""" 56 | pass 57 | 58 | def init_weights(self, pretrained=None): 59 | """Initialize the weights in detector. 60 | 61 | Args: 62 | pretrained (str, optional): Path to pre-trained weights. 63 | Defaults to None. 64 | """ 65 | if pretrained is not None: 66 | logger = get_root_logger() 67 | print_log(f'load model from: {pretrained}', logger=logger) 68 | 69 | def forward_test(self, *args, **kwargs): 70 | """ 71 | Args: 72 | """ 73 | if True: 74 | self.simple_test() 75 | else: 76 | self.aug_test() 77 | 78 | # @auto_fp16(apply_to=('img', )) 79 | def forward(self, *args, return_loss=True, **kwargs): 80 | """Calls either :func:`forward_train` or :func:`forward_test` depending 81 | on whether ``return_loss`` is ``True``. 82 | 83 | Note this setting will change the expected inputs. When 84 | ``return_loss=True``, img and img_meta are single-nested (i.e. Tensor 85 | and List[dict]), and when ``resturn_loss=False``, img and img_meta 86 | should be double nested (i.e. List[Tensor], List[List[dict]]), with 87 | the outer list indicating test time augmentations. 88 | """ 89 | 90 | if return_loss: 91 | return self.forward_train(*args, **kwargs) 92 | else: 93 | kwargs.pop('rescale') 94 | return self.forward_test(*args, **kwargs) 95 | 96 | def train_step(self, data_dict, optimizer): 97 | """The iteration step during training. 98 | 99 | This method defines an iteration step during training, except for the 100 | back propagation and optimizer updating, which are done in an optimizer 101 | hook. Note that in some complicated cases or models, the whole process 102 | including back propagation and optimizer updating is also defined in 103 | this method, such as GAN. 104 | 105 | Args: 106 | data_dict (dict): The output of dataloader. 107 | optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of 108 | runner is passed to ``train_step()``. This argument is unused 109 | and reserved. 110 | 111 | Returns: 112 | dict: It should contain at least 3 keys: ``loss``, ``log_vars``, \ 113 | ``num_samples``. 114 | 115 | - ``loss`` is a tensor for back propagation, which can be a \ 116 | weighted sum of multiple losses. 117 | - ``log_vars`` contains all the variables to be sent to the 118 | logger. 119 | - ``num_samples`` indicates the batch size (when the model is \ 120 | DDP, it means the batch size on each GPU), which is used for \ 121 | averaging the logs. 122 | """ 123 | loss, log_vars, num_samples = self(**data_dict) 124 | 125 | outputs = dict( 126 | loss=loss, log_vars=log_vars, num_samples=num_samples) 127 | 128 | return outputs 129 | 130 | def val_step(self, data, optimizer): 131 | """The iteration step during validation. 132 | 133 | This method shares the same signature as :func:`train_step`, but used 134 | during val epochs. Note that the evaluation after training epochs is 135 | not implemented with this method, but an evaluation hook. 136 | """ 137 | loss, log_vars, num_samples = self(**data) 138 | 139 | outputs = dict( 140 | loss=loss, log_vars=log_vars, num_samples=num_samples) 141 | 142 | return outputs 143 | 144 | def show_result(self, 145 | **kwargs): 146 | img = None 147 | return img -------------------------------------------------------------------------------- /tools/model_converters/convert_votenet_checkpoints.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tempfile 3 | import torch 4 | from mmcv import Config 5 | from mmcv.runner import load_state_dict 6 | 7 | from mmdet3d.models import build_detector 8 | 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser( 12 | description='MMDet3D upgrade model version(before v0.6.0) of VoteNet') 13 | parser.add_argument('checkpoint', help='checkpoint file') 14 | parser.add_argument('--out', help='path of the output checkpoint file') 15 | args = parser.parse_args() 16 | return args 17 | 18 | 19 | def parse_config(config_strings): 20 | """Parse config from strings. 21 | 22 | Args: 23 | config_strings (string): strings of model config. 24 | 25 | Returns: 26 | Config: model config 27 | """ 28 | temp_file = tempfile.NamedTemporaryFile() 29 | config_path = f'{temp_file.name}.py' 30 | with open(config_path, 'w') as f: 31 | f.write(config_strings) 32 | 33 | config = Config.fromfile(config_path) 34 | 35 | # Update backbone config 36 | if 'pool_mod' in config.model.backbone: 37 | config.model.backbone.pop('pool_mod') 38 | 39 | if 'sa_cfg' not in config.model.backbone: 40 | config.model.backbone['sa_cfg'] = dict( 41 | type='PointSAModule', 42 | pool_mod='max', 43 | use_xyz=True, 44 | normalize_xyz=True) 45 | 46 | if 'type' not in config.model.bbox_head.vote_aggregation_cfg: 47 | config.model.bbox_head.vote_aggregation_cfg['type'] = 'PointSAModule' 48 | 49 | # Update bbox_head config 50 | if 'pred_layer_cfg' not in config.model.bbox_head: 51 | config.model.bbox_head['pred_layer_cfg'] = dict( 52 | in_channels=128, shared_conv_channels=(128, 128), bias=True) 53 | 54 | if 'feat_channels' in config.model.bbox_head: 55 | config.model.bbox_head.pop('feat_channels') 56 | 57 | if 'vote_moudule_cfg' in config.model.bbox_head: 58 | config.model.bbox_head['vote_module_cfg'] = config.model.bbox_head.pop( 59 | 'vote_moudule_cfg') 60 | 61 | if config.model.bbox_head.vote_aggregation_cfg.use_xyz: 62 | config.model.bbox_head.vote_aggregation_cfg.mlp_channels[0] -= 3 63 | 64 | temp_file.close() 65 | 66 | return config 67 | 68 | 69 | def main(): 70 | """Convert keys in checkpoints for VoteNet. 71 | 72 | There can be some breaking changes during the development of mmdetection3d, 73 | and this tool is used for upgrading checkpoints trained with old versions 74 | (before v0.6.0) to the latest one. 75 | """ 76 | args = parse_args() 77 | checkpoint = torch.load(args.checkpoint) 78 | cfg = parse_config(checkpoint['meta']['config']) 79 | # Build the model and load checkpoint 80 | model = build_detector( 81 | cfg.model, 82 | train_cfg=cfg.get('train_cfg'), 83 | test_cfg=cfg.get('test_cfg')) 84 | orig_ckpt = checkpoint['state_dict'] 85 | converted_ckpt = orig_ckpt.copy() 86 | 87 | if cfg['dataset_type'] == 'ScanNetDataset': 88 | NUM_CLASSES = 18 89 | elif cfg['dataset_type'] == 'SUNRGBDDataset': 90 | NUM_CLASSES = 10 91 | else: 92 | raise NotImplementedError 93 | 94 | RENAME_PREFIX = { 95 | 'bbox_head.conv_pred.0': 'bbox_head.conv_pred.shared_convs.layer0', 96 | 'bbox_head.conv_pred.1': 'bbox_head.conv_pred.shared_convs.layer1' 97 | } 98 | 99 | DEL_KEYS = [ 100 | 'bbox_head.conv_pred.0.bn.num_batches_tracked', 101 | 'bbox_head.conv_pred.1.bn.num_batches_tracked' 102 | ] 103 | 104 | EXTRACT_KEYS = { 105 | 'bbox_head.conv_pred.conv_cls.weight': 106 | ('bbox_head.conv_pred.conv_out.weight', [(0, 2), (-NUM_CLASSES, -1)]), 107 | 'bbox_head.conv_pred.conv_cls.bias': 108 | ('bbox_head.conv_pred.conv_out.bias', [(0, 2), (-NUM_CLASSES, -1)]), 109 | 'bbox_head.conv_pred.conv_reg.weight': 110 | ('bbox_head.conv_pred.conv_out.weight', [(2, -NUM_CLASSES)]), 111 | 'bbox_head.conv_pred.conv_reg.bias': 112 | ('bbox_head.conv_pred.conv_out.bias', [(2, -NUM_CLASSES)]) 113 | } 114 | 115 | # Delete some useless keys 116 | for key in DEL_KEYS: 117 | converted_ckpt.pop(key) 118 | 119 | # Rename keys with specific prefix 120 | RENAME_KEYS = dict() 121 | for old_key in converted_ckpt.keys(): 122 | for rename_prefix in RENAME_PREFIX.keys(): 123 | if rename_prefix in old_key: 124 | new_key = old_key.replace(rename_prefix, 125 | RENAME_PREFIX[rename_prefix]) 126 | RENAME_KEYS[new_key] = old_key 127 | for new_key, old_key in RENAME_KEYS.items(): 128 | converted_ckpt[new_key] = converted_ckpt.pop(old_key) 129 | 130 | # Extract weights and rename the keys 131 | for new_key, (old_key, indices) in EXTRACT_KEYS.items(): 132 | cur_layers = orig_ckpt[old_key] 133 | converted_layers = [] 134 | for (start, end) in indices: 135 | if end != -1: 136 | converted_layers.append(cur_layers[start:end]) 137 | else: 138 | converted_layers.append(cur_layers[start:]) 139 | converted_layers = torch.cat(converted_layers, 0) 140 | converted_ckpt[new_key] = converted_layers 141 | if old_key in converted_ckpt.keys(): 142 | converted_ckpt.pop(old_key) 143 | 144 | # Check the converted checkpoint by loading to the model 145 | load_state_dict(model, converted_ckpt, strict=True) 146 | checkpoint['state_dict'] = converted_ckpt 147 | torch.save(checkpoint, args.out) 148 | 149 | 150 | if __name__ == '__main__': 151 | main() 152 | -------------------------------------------------------------------------------- /plugin/models/losses/detr_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn as nn 3 | from torch.nn import functional as F 4 | from mmdet.models.losses import l1_loss 5 | from mmdet.models.losses.utils import weighted_loss 6 | import mmcv 7 | 8 | from mmdet.models.builder import LOSSES 9 | 10 | 11 | @mmcv.jit(derivate=True, coderize=True) 12 | @weighted_loss 13 | def smooth_l1_loss(pred, target, beta=1.0): 14 | """Smooth L1 loss. 15 | Args: 16 | pred (torch.Tensor): The prediction. 17 | target (torch.Tensor): The learning target of the prediction. 18 | beta (float, optional): The threshold in the piecewise function. 19 | Defaults to 1.0. 20 | Returns: 21 | torch.Tensor: Calculated loss 22 | """ 23 | assert beta > 0 24 | if target.numel() == 0: 25 | return pred.sum() * 0 26 | 27 | assert pred.size() == target.size() 28 | diff = torch.abs(pred - target) 29 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, 30 | diff - 0.5 * beta) 31 | 32 | return loss 33 | 34 | 35 | @LOSSES.register_module() 36 | class LinesLoss(nn.Module): 37 | 38 | def __init__(self, reduction='mean', loss_weight=1.0, beta=0.5): 39 | """ 40 | L1 loss. The same as the smooth L1 loss 41 | Args: 42 | reduction (str, optional): The method to reduce the loss. 43 | Options are "none", "mean" and "sum". 44 | loss_weight (float, optional): The weight of loss. 45 | """ 46 | 47 | super(LinesLoss, self).__init__() 48 | self.reduction = reduction 49 | self.loss_weight = loss_weight 50 | self.beta = beta 51 | 52 | def forward(self, 53 | pred, 54 | target, 55 | weight=None, 56 | avg_factor=None, 57 | reduction_override=None): 58 | """Forward function. 59 | 60 | Args: 61 | pred (torch.Tensor): The prediction. 62 | shape: [bs, ...] 63 | target (torch.Tensor): The learning target of the prediction. 64 | shape: [bs, ...] 65 | weight (torch.Tensor, optional): The weight of loss for each 66 | prediction. Defaults to None. 67 | it's useful when the predictions are not all valid. 68 | avg_factor (int, optional): Average factor that is used to average 69 | the loss. Defaults to None. 70 | reduction_override (str, optional): The reduction method used to 71 | override the original reduction method of the loss. 72 | Defaults to None. 73 | """ 74 | assert reduction_override in (None, 'none', 'mean', 'sum') 75 | reduction = ( 76 | reduction_override if reduction_override else self.reduction) 77 | 78 | loss = smooth_l1_loss( 79 | pred, target, weight, reduction=reduction, avg_factor=avg_factor, beta=self.beta) 80 | 81 | return loss*self.loss_weight 82 | 83 | 84 | @mmcv.jit(derivate=True, coderize=True) 85 | @weighted_loss 86 | def bce(pred, label, class_weight=None): 87 | """ 88 | pred: B,nquery,npts 89 | label: B,nquery,npts 90 | """ 91 | 92 | if label.numel() == 0: 93 | return pred.sum() * 0 94 | assert pred.size() == label.size() 95 | 96 | loss = F.binary_cross_entropy_with_logits( 97 | pred, label.float(), pos_weight=class_weight, reduction='none') 98 | 99 | return loss 100 | 101 | 102 | @LOSSES.register_module() 103 | class MasksLoss(nn.Module): 104 | 105 | def __init__(self, reduction='mean', loss_weight=1.0): 106 | super(MasksLoss, self).__init__() 107 | self.reduction = reduction 108 | self.loss_weight = loss_weight 109 | 110 | def forward(self, 111 | pred, 112 | target, 113 | weight=None, 114 | avg_factor=None, 115 | reduction_override=None): 116 | """Forward function. 117 | 118 | Args: 119 | xxx 120 | """ 121 | assert reduction_override in (None, 'none', 'mean', 'sum') 122 | reduction = ( 123 | reduction_override if reduction_override else self.reduction) 124 | 125 | loss = bce(pred, target, weight, reduction=reduction, 126 | avg_factor=avg_factor) 127 | 128 | return loss*self.loss_weight 129 | 130 | @mmcv.jit(derivate=True, coderize=True) 131 | @weighted_loss 132 | def ce(pred, label, class_weight=None): 133 | """ 134 | pred: B*nquery,npts 135 | label: B*nquery, 136 | """ 137 | 138 | if label.numel() == 0: 139 | return pred.sum() * 0 140 | 141 | loss = F.cross_entropy( 142 | pred, label, weight=class_weight, reduction='none') 143 | 144 | return loss 145 | 146 | 147 | @LOSSES.register_module() 148 | class LenLoss(nn.Module): 149 | 150 | def __init__(self, reduction='mean', loss_weight=1.0): 151 | super(LenLoss, self).__init__() 152 | self.reduction = reduction 153 | self.loss_weight = loss_weight 154 | 155 | def forward(self, 156 | pred, 157 | target, 158 | weight=None, 159 | avg_factor=None, 160 | reduction_override=None): 161 | """Forward function. 162 | 163 | Args: 164 | xxx 165 | """ 166 | assert reduction_override in (None, 'none', 'mean', 'sum') 167 | reduction = ( 168 | reduction_override if reduction_override else self.reduction) 169 | 170 | loss = ce(pred, target, weight, reduction=reduction, 171 | avg_factor=avg_factor) 172 | 173 | return loss*self.loss_weight 174 | -------------------------------------------------------------------------------- /tools/data_converter/argoverse_converter.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from multiprocessing import Pool 3 | import multiprocessing 4 | from random import sample 5 | import time 6 | import mmcv 7 | import logging 8 | from pathlib import Path 9 | from os import path as osp 10 | import os 11 | from av2.datasets.sensor.av2_sensor_dataloader import AV2SensorDataLoader 12 | from tqdm import tqdm 13 | import argparse 14 | 15 | CAM_NAMES = ['ring_front_center', 'ring_front_right', 'ring_front_left', 16 | 'ring_rear_right','ring_rear_left', 'ring_side_right', 'ring_side_left', 17 | # 'stereo_front_left', 'stereo_front_right', 18 | ] 19 | 20 | FAIL_LOGS = [ 21 | '01bb304d-7bd8-35f8-bbef-7086b688e35e', 22 | '453e5558-6363-38e3-bf9b-42b5ba0a6f1d' 23 | ] 24 | 25 | def parse_args(): 26 | parser = argparse.ArgumentParser(description='Data converter arg parser') 27 | parser.add_argument( 28 | '--data-root', 29 | type=str, 30 | help='specify the root path of dataset') 31 | parser.add_argument( 32 | '--nproc', 33 | type=int, 34 | default=64, 35 | required=False, 36 | help='workers to process data') 37 | args = parser.parse_args() 38 | return args 39 | 40 | def create_av2_infos_mp(root_path, 41 | info_prefix, 42 | dest_path=None, 43 | split='train', 44 | num_multithread=64): 45 | """Create info file of av2 dataset. 46 | 47 | Given the raw data, generate its related info file in pkl format. 48 | 49 | Args: 50 | root_path (str): Path of the data root. 51 | info_prefix (str): Prefix of the info file to be generated. 52 | dest_path (str): Path to store generated file, default to root_path 53 | split (str): Split of the data. 54 | Default: 'train' 55 | """ 56 | root_path = osp.join(root_path, split) 57 | if dest_path is None: 58 | dest_path = root_path 59 | 60 | loader = AV2SensorDataLoader(Path(root_path), Path(root_path)) 61 | log_ids = list(loader.get_log_ids()) 62 | for l in FAIL_LOGS: 63 | if l in log_ids: 64 | log_ids.remove(l) 65 | 66 | print('collecting samples...') 67 | start_time = time.time() 68 | print('num cpu:', multiprocessing.cpu_count()) 69 | print(f'using {num_multithread} threads') 70 | 71 | # to supress logging from av2.utils.synchronization_database 72 | sdb_logger = logging.getLogger('av2.utils.synchronization_database') 73 | prev_level = sdb_logger.level 74 | sdb_logger.setLevel(logging.CRITICAL) 75 | 76 | # FIXME: need to check the order 77 | pool = Pool(num_multithread) 78 | fn = partial(get_data_from_logid, loader=loader, data_root=root_path) 79 | rt = pool.map_async(fn, log_ids) 80 | pool.close() 81 | pool.join() 82 | results = rt.get() 83 | 84 | samples = [] 85 | discarded = 0 86 | sample_idx = 0 87 | for _samples, _discarded in results: 88 | for i in range(len(_samples)): 89 | _samples[i]['sample_idx'] = sample_idx 90 | sample_idx += 1 91 | samples += _samples 92 | discarded += _discarded 93 | 94 | sdb_logger.setLevel(prev_level) 95 | print(f'{len(samples)} available samples, {discarded} samples discarded') 96 | 97 | id2map = {} 98 | for log_id in log_ids: 99 | map_path_dir = osp.join(root_path, log_id, 'map') 100 | map_fname = str(list(Path(map_path_dir).glob("log_map_archive_*.json"))[0]) 101 | map_fname = osp.join(map_path_dir, map_fname) 102 | id2map[log_id] = map_fname 103 | 104 | print('collected in {}s'.format(time.time()-start_time)) 105 | infos = dict(samples=samples, id2map=id2map) 106 | 107 | info_path = osp.join(dest_path, 108 | '{}_map_infos_{}.pkl'.format(info_prefix, split)) 109 | print(f'saving results to {info_path}') 110 | mmcv.dump(infos, info_path) 111 | 112 | def get_data_from_logid(log_id, loader: AV2SensorDataLoader, data_root): 113 | samples = [] 114 | discarded = 0 115 | 116 | # We use lidar timestamps to query all sensors. 117 | # The frequency is 10Hz 118 | cam_timestamps = loader._sdb.per_log_lidar_timestamps_index[log_id] 119 | for ts in cam_timestamps: 120 | cam_ring_fpath = [loader.get_closest_img_fpath( 121 | log_id, cam_name, ts 122 | ) for cam_name in CAM_NAMES] 123 | lidar_fpath = loader.get_closest_lidar_fpath(log_id, ts) 124 | 125 | # If bad sensor synchronization, discard the sample 126 | if None in cam_ring_fpath or lidar_fpath is None: 127 | discarded += 1 128 | continue 129 | 130 | cams = {} 131 | for i, cam_name in enumerate(CAM_NAMES): 132 | pinhole_cam = loader.get_log_pinhole_camera(log_id, cam_name) 133 | cams[cam_name] = dict( 134 | img_fpath=str(cam_ring_fpath[i]), 135 | intrinsics=pinhole_cam.intrinsics.K, 136 | extrinsics=pinhole_cam.extrinsics, 137 | ) 138 | 139 | city_SE3_ego = loader.get_city_SE3_ego(log_id, int(ts)) 140 | e2g_translation = city_SE3_ego.translation 141 | e2g_rotation = city_SE3_ego.rotation 142 | 143 | samples.append(dict( 144 | e2g_translation=e2g_translation, 145 | e2g_rotation=e2g_rotation, 146 | cams=cams, 147 | lidar_fpath=str(lidar_fpath), 148 | # map_fpath=map_fname, 149 | timestamp=str(ts), 150 | log_id=log_id)) 151 | 152 | return samples, discarded 153 | 154 | 155 | if __name__ == '__main__': 156 | args = parse_args() 157 | for name in ['train', 'val', 'test']: 158 | create_av2_infos_mp( 159 | root_path=args.data_root, 160 | split=name, 161 | info_prefix='av2', 162 | dest_path=args.data_root,) -------------------------------------------------------------------------------- /plugin/models/assigner/assigner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.core.bbox.builder import BBOX_ASSIGNERS 4 | from mmdet.core.bbox.assigners import AssignResult 5 | from mmdet.core.bbox.assigners import BaseAssigner 6 | from mmdet.core.bbox.match_costs import build_match_cost 7 | 8 | try: 9 | from scipy.optimize import linear_sum_assignment 10 | except ImportError: 11 | linear_sum_assignment = None 12 | 13 | 14 | @BBOX_ASSIGNERS.register_module() 15 | class HungarianLinesAssigner(BaseAssigner): 16 | """ 17 | Computes one-to-one matching between predictions and ground truth. 18 | This class computes an assignment between the targets and the predictions 19 | based on the costs. The costs are weighted sum of three components: 20 | classification cost and regression L1 cost. The 21 | targets don't include the no_object, so generally there are more 22 | predictions than targets. After the one-to-one matching, the un-matched 23 | are treated as backgrounds. Thus each query prediction will be assigned 24 | with `0` or a positive integer indicating the ground truth index: 25 | - 0: negative sample, no assigned gt 26 | - positive integer: positive sample, index (1-based) of assigned gt 27 | Args: 28 | cls_weight (int | float, optional): The scale factor for classification 29 | cost. Default 1.0. 30 | bbox_weight (int | float, optional): The scale factor for regression 31 | L1 cost. Default 1.0. 32 | """ 33 | 34 | def __init__(self, 35 | cost=dict( 36 | type='MapQueriesCost', 37 | cls_cost=dict(type='ClassificationCost', weight=1.), 38 | reg_cost=dict(type='LinesCost', weight=1.0), 39 | ), 40 | pc_range=None, 41 | **kwargs): 42 | 43 | self.pc_range = pc_range 44 | self.cost = build_match_cost(cost) 45 | 46 | def assign(self, 47 | preds: dict, 48 | gts: dict, 49 | gt_bboxes_ignore=None, 50 | eps=1e-7): 51 | """ 52 | Computes one-to-one matching based on the weighted costs. 53 | This method assign each query prediction to a ground truth or 54 | background. The `assigned_gt_inds` with -1 means don't care, 55 | 0 means negative sample, and positive number is the index (1-based) 56 | of assigned gt. 57 | The assignment is done in the following steps, the order matters. 58 | 1. assign every prediction to -1 59 | 2. compute the weighted costs 60 | 3. do Hungarian matching on CPU based on the costs 61 | 4. assign all to 0 (background) first, then for each matched pair 62 | between predictions and gts, treat this prediction as foreground 63 | and assign the corresponding gt index (plus 1) to it. 64 | Args: 65 | lines_pred (Tensor): predicted normalized lines: 66 | [num_query, num_points, 2] 67 | cls_pred (Tensor): Predicted classification logits, shape 68 | [num_query, num_class]. 69 | 70 | Note: when compute bbox l1 loss, velocity is not included!! 71 | 72 | lines_gt (Tensor): Ground truth lines 73 | 74 | [num_gt, num_points, 2]. 75 | labels_gt (Tensor): Label of `gt_bboxes`, shape (num_gt,). 76 | gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are 77 | labelled as `ignored`. Default None. 78 | eps (int | float, optional): A value added to the denominator for 79 | numerical stability. Default 1e-7. 80 | Returns: 81 | :obj:`AssignResult`: The assigned result. 82 | """ 83 | assert gt_bboxes_ignore is None, \ 84 | 'Only case when gt_bboxes_ignore is None is supported.' 85 | num_gts, num_lines = gts['lines'].size(0), preds['lines'].size(0) 86 | 87 | # 1. assign -1 by default 88 | assigned_gt_inds = \ 89 | preds['lines'].new_full((num_lines,), -1, dtype=torch.long) 90 | assigned_labels = \ 91 | preds['lines'].new_full((num_lines,), -1, dtype=torch.long) 92 | 93 | if num_gts == 0 or num_lines == 0: 94 | # No ground truth or boxes, return empty assignment 95 | if num_gts == 0: 96 | # No ground truth, assign all to background 97 | assigned_gt_inds[:] = 0 98 | return AssignResult( 99 | num_gts, assigned_gt_inds, None, labels=assigned_labels) 100 | 101 | # 2. compute the weighted costs 102 | cost = self.cost(preds, gts) 103 | 104 | # 3. do Hungarian matching on CPU using linear_sum_assignment 105 | cost = cost.detach().cpu().numpy() 106 | if linear_sum_assignment is None: 107 | raise ImportError('Please run "pip install scipy" ' 108 | 'to install scipy first.') 109 | try: 110 | matched_row_inds, matched_col_inds = linear_sum_assignment(cost) 111 | except: 112 | print('cost max{}, min{}'.format(cost.max(), cost.min())) 113 | import ipdb; ipdb.set_trace() 114 | matched_row_inds = torch.from_numpy(matched_row_inds).to( 115 | preds['lines'].device) 116 | matched_col_inds = torch.from_numpy(matched_col_inds).to( 117 | preds['lines'].device) 118 | 119 | # 4. assign backgrounds and foregrounds 120 | # assign all indices to backgrounds first 121 | assigned_gt_inds[:] = 0 122 | # assign foregrounds based on matching results 123 | assigned_gt_inds[matched_row_inds] = matched_col_inds + 1 124 | assigned_labels[matched_row_inds] = gts['labels'][matched_col_inds] 125 | return AssignResult( 126 | num_gts, assigned_gt_inds, None, labels=assigned_labels) -------------------------------------------------------------------------------- /plugin/datasets/evaluation/precision_recall/tgfg.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import similaritymeasures 4 | from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps 5 | from scipy.spatial import distance 6 | from shapely.geometry import CAP_STYLE, JOIN_STYLE, LineString, Polygon 7 | from shapely.strtree import STRtree 8 | 9 | 10 | def tpfp_gen(gen_lines, 11 | gt_lines, 12 | threshold=0.5, 13 | coord_dim=2, 14 | metric='POR'): 15 | """Check if detected bboxes are true positive or false positive. 16 | 17 | Args: 18 | det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5). 19 | gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4). 20 | gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image, 21 | of shape (k, 4). Default: None 22 | iou_thr (float): IoU threshold to be considered as matched. 23 | Default: 0.5. 24 | use_legacy_coordinate (bool): Whether to use coordinate system in 25 | mmdet v1.x. which means width, height should be 26 | calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively. 27 | Default: False. 28 | 29 | Returns: 30 | tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of 31 | each array is (num_scales, m). 32 | """ 33 | 34 | num_gens = gen_lines.shape[0] 35 | num_gts = gt_lines.shape[0] 36 | 37 | # tp and fp 38 | tp = np.zeros((num_gens), dtype=np.float32) 39 | fp = np.zeros((num_gens), dtype=np.float32) 40 | 41 | # if there is no gt bboxes in this image, then all det bboxes 42 | # within area range are false positives 43 | if num_gts == 0: 44 | fp[...] = 1 45 | return tp, fp 46 | 47 | if num_gens == 0: 48 | return tp, fp 49 | 50 | gen_scores = gen_lines[:,-1] # n 51 | # distance matrix: n x m 52 | matrix = polyline_score( 53 | gen_lines[:,:-1].reshape(num_gens,-1,coord_dim), 54 | gt_lines.reshape(num_gts,-1,coord_dim),linewidth=2.,metric=metric) 55 | # for each det, the max iou with all gts 56 | matrix_max = matrix.max(axis=1) 57 | # for each det, which gt overlaps most with it 58 | matrix_argmax = matrix.argmax(axis=1) 59 | # sort all dets in descending order by scores 60 | sort_inds = np.argsort(-gen_scores) 61 | 62 | gt_covered = np.zeros(num_gts, dtype=bool) 63 | 64 | # tp = 0 and fp = 0 means ignore this detected bbox, 65 | for i in sort_inds: 66 | if matrix_max[i] >= threshold: 67 | matched_gt = matrix_argmax[i] 68 | if not gt_covered[matched_gt]: 69 | gt_covered[matched_gt] = True 70 | tp[i] = 1 71 | else: 72 | fp[i] = 1 73 | else: 74 | fp[i] = 1 75 | 76 | return tp, fp 77 | 78 | 79 | def polyline_score(pred_lines, gt_lines, linewidth=1., metric='POR'): 80 | ''' 81 | each line with 1 meter width 82 | pred_lines: num_preds, List [npts, 2] 83 | gt_lines: num_gts, npts, 2 84 | gt_mask: num_gts, npts, 2 85 | ''' 86 | positive_threshold = 1. 87 | num_preds = len(pred_lines) 88 | num_gts = len(gt_lines) 89 | line_length = pred_lines.shape[1] 90 | 91 | # gt_lines = gt_lines + np.array((1.,1.)) 92 | 93 | pred_lines_shapely = \ 94 | [LineString(i).buffer(linewidth, 95 | cap_style=CAP_STYLE.flat, join_style=JOIN_STYLE.mitre) 96 | for i in pred_lines] 97 | gt_lines_shapely =\ 98 | [LineString(i).buffer(linewidth, 99 | cap_style=CAP_STYLE.flat, join_style=JOIN_STYLE.mitre) 100 | for i in gt_lines] 101 | 102 | # construct tree 103 | tree = STRtree(pred_lines_shapely) 104 | index_by_id = dict((id(pt), i) for i, pt in enumerate(pred_lines_shapely)) 105 | 106 | if metric=='POR': 107 | iou_matrix = np.zeros((num_preds, num_gts),dtype=np.float64) 108 | elif metric=='frechet': 109 | iou_matrix = np.full((num_preds, num_gts), -100.) 110 | elif metric=='chamfer': 111 | iou_matrix = np.full((num_preds, num_gts), -100.) 112 | elif metric=='chamfer_v2': 113 | iou_matrix = np.full((num_preds, num_gts), -100.) 114 | 115 | for i, pline in enumerate(gt_lines_shapely): 116 | 117 | for o in tree.query(pline): 118 | if o.intersects(pline): 119 | pred_id = index_by_id[id(o)] 120 | 121 | if metric=='POR': 122 | dist_mat = distance.cdist( 123 | pred_lines[pred_id], gt_lines[i], 'euclidean') 124 | 125 | valid_ab = (dist_mat.min(-1) < positive_threshold).sum() 126 | valid_ba = (dist_mat.min(-2) < positive_threshold).sum() 127 | 128 | iou_matrix[pred_id, i] = min(valid_ba,valid_ab) / line_length 129 | # iou_matrix[pred_id, i] = ((valid_ba+valid_ab)/2) / line_length 130 | # assert iou_matrix[pred_id, i] <= 1. and iou_matrix[pred_id, i] >= 0. 131 | elif metric=='frechet': 132 | fdistance_1 = \ 133 | -similaritymeasures.frechet_dist(pred_lines[pred_id], gt_lines[i]) 134 | fdistance_2 = \ 135 | -similaritymeasures.frechet_dist(pred_lines[pred_id][::-1], gt_lines[i]) 136 | fdistance = max(fdistance_1,fdistance_2) 137 | iou_matrix[pred_id, i] = fdistance 138 | 139 | elif metric=='chamfer': 140 | dist_mat = distance.cdist( 141 | pred_lines[pred_id], gt_lines[i], 'euclidean') 142 | 143 | valid_ab = dist_mat.min(-1).sum() 144 | valid_ba = dist_mat.min(-2).sum() 145 | 146 | iou_matrix[pred_id, i] = -(valid_ba+valid_ab)/(2*line_length) 147 | # if iou_matrix[pred_id, i] == 0: 148 | # import ipdb; ipdb.set_trace() 149 | elif metric=='chamfer_v2': 150 | dist_mat = distance.cdist( 151 | pred_lines[pred_id], gt_lines[i], 'euclidean') 152 | 153 | valid_ab = dist_mat.min(-1).sum() 154 | valid_ba = dist_mat.min(-2).sum() 155 | 156 | iou_matrix[pred_id, i] = -(valid_ba/pred_lines[pred_id].shape[0] 157 | +valid_ab/gt_lines[i].shape[0])/2 158 | 159 | 160 | return iou_matrix 161 | -------------------------------------------------------------------------------- /tools/data_converter/nuscenes_converter.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from collections import OrderedDict 4 | from os import path as osp 5 | from typing import List, Tuple, Union 6 | 7 | import mmcv 8 | import numpy as np 9 | from IPython import embed 10 | from mmdet3d.core.bbox.box_np_ops import points_cam2img 11 | from mmdet3d.datasets import NuScenesDataset 12 | from nuscenes.eval.common.utils import quaternion_yaw 13 | from nuscenes.nuscenes import NuScenes 14 | from nuscenes.utils.geometry_utils import view_points 15 | from pyquaternion import Quaternion 16 | from shapely.geometry import MultiPoint, box 17 | 18 | nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 19 | 'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 20 | 'barrier') 21 | 22 | nus_attributes = ('cycle.with_rider', 'cycle.without_rider', 23 | 'pedestrian.moving', 'pedestrian.standing', 24 | 'pedestrian.sitting_lying_down', 'vehicle.moving', 25 | 'vehicle.parked', 'vehicle.stopped', 'None') 26 | 27 | def parse_args(): 28 | parser = argparse.ArgumentParser(description='Data converter arg parser') 29 | parser.add_argument( 30 | '--data-root', 31 | type=str, 32 | help='specify the root path of dataset') 33 | 34 | parser.add_argument( 35 | '-v','--version', 36 | choices=['v1.0-mini', 'v1.0-trainval', 'v1.0-test'], 37 | default='v1.0-trainval') 38 | 39 | args = parser.parse_args() 40 | return args 41 | 42 | def create_nuscenes_infos_map(root_path, 43 | dest_path=None, 44 | info_prefix='nuscenes', 45 | version='v1.0-trainval',): 46 | """Create info file for map learning task on nuscene dataset. 47 | 48 | Given the raw data, generate its related info file in pkl format. 49 | 50 | Args: 51 | root_path (str): Path of the data root. 52 | info_prefix (str): Prefix of the info file to be generated. 53 | version (str): Version of the data. 54 | Default: 'v1.0-trainval' 55 | """ 56 | from nuscenes.nuscenes import NuScenes 57 | nusc = NuScenes(version=version, dataroot=root_path, verbose=True) 58 | from nuscenes.utils import splits 59 | assert version in ['v1.0-trainval', 'v1.0-test', 'v1.0-mini'] 60 | if version == 'v1.0-trainval': 61 | train_scenes = splits.train 62 | val_scenes = splits.val 63 | elif version == 'v1.0-test': 64 | train_scenes = splits.test 65 | val_scenes = [] 66 | else: 67 | train_scenes = splits.mini_train 68 | val_scenes = splits.mini_val 69 | 70 | test = 'test' in version 71 | if test: 72 | print('test scene: {}'.format(len(train_scenes))) 73 | else: 74 | print('train scene: {}, val scene: {}'.format( 75 | len(train_scenes), len(val_scenes))) 76 | 77 | train_samples, val_samples, test_samples = [], [], [] 78 | 79 | for sample in mmcv.track_iter_progress(nusc.sample): 80 | lidar_token = sample['data']['LIDAR_TOP'] 81 | sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP']) 82 | cs_record = nusc.get('calibrated_sensor', 83 | sd_rec['calibrated_sensor_token']) 84 | pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token']) 85 | lidar_path, boxes, _ = nusc.get_sample_data(lidar_token) 86 | 87 | mmcv.check_file_exist(lidar_path) 88 | scene_record = nusc.get('scene', sample['scene_token']) 89 | log_record = nusc.get('log', scene_record['log_token']) 90 | location = log_record['location'] 91 | scene_name = scene_record['name'] 92 | 93 | info = { 94 | 'lidar_path': lidar_path, 95 | 'token': sample['token'], 96 | 'cams': {}, 97 | 'lidar2ego_translation': cs_record['translation'], 98 | 'lidar2ego_rotation': cs_record['rotation'], 99 | 'e2g_translation': pose_record['translation'], 100 | 'e2g_rotation': pose_record['rotation'], 101 | 'timestamp': sample['timestamp'], 102 | 'location': location, 103 | 'scene_name': scene_name 104 | } 105 | 106 | # obtain 6 image's information per frame 107 | camera_types = [ 108 | 'CAM_FRONT', 109 | 'CAM_FRONT_RIGHT', 110 | 'CAM_FRONT_LEFT', 111 | 'CAM_BACK', 112 | 'CAM_BACK_LEFT', 113 | 'CAM_BACK_RIGHT', 114 | ] 115 | for cam in camera_types: 116 | cam_token = sample['data'][cam] 117 | sd_rec = nusc.get('sample_data', cam_token) 118 | cs_record = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token']) 119 | 120 | cam2ego_rotation = Quaternion(cs_record['rotation']).rotation_matrix 121 | cam2ego_translation = np.array(cs_record['translation']) 122 | 123 | ego2cam_rotation = cam2ego_rotation.T 124 | ego2cam_translation = ego2cam_rotation.dot(-cam2ego_translation) 125 | 126 | transform_matrix = np.eye(4) #ego2cam 127 | transform_matrix[:3, :3] = ego2cam_rotation 128 | transform_matrix[:3, 3] = ego2cam_translation 129 | 130 | cam_info = dict( 131 | extrinsics=transform_matrix, # ego2cam 132 | intrinsics=cs_record['camera_intrinsic'], 133 | img_fpath=str(nusc.get_sample_data_path(sd_rec['token'])) 134 | ) 135 | info['cams'][cam] = cam_info 136 | 137 | if scene_name in train_scenes: 138 | train_samples.append(info) 139 | elif scene_name in val_scenes: 140 | val_samples.append(info) 141 | else: 142 | test_samples.append(info) 143 | 144 | if dest_path is None: 145 | dest_path = root_path 146 | 147 | if test: 148 | info_path = osp.join(dest_path, f'{info_prefix}_map_infos_test.pkl') 149 | print(f'saving test set to {info_path}') 150 | mmcv.dump(test_samples, info_path) 151 | 152 | else: 153 | # for training set 154 | info_path = osp.join(dest_path, f'{info_prefix}_map_infos_train.pkl') 155 | print(f'saving training set to {info_path}') 156 | mmcv.dump(train_samples, info_path) 157 | 158 | # for val set 159 | info_path = osp.join(dest_path, f'{info_prefix}_map_infos_val.pkl') 160 | print(f'saving validation set to {info_path}') 161 | mmcv.dump(val_samples, info_path) 162 | 163 | 164 | if __name__ == '__main__': 165 | args = parse_args() 166 | 167 | create_nuscenes_infos_map(root_path=args.data_root, version=args.version) 168 | -------------------------------------------------------------------------------- /tools/visualization/visualize_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | import mmcv 5 | from IPython import embed 6 | from mmcv import Config 7 | from mmdet3d.datasets import build_dataset 8 | from renderer import Renderer 9 | from av2.map.map_api import ArgoverseStaticMap 10 | from pathlib import Path 11 | from shapely.geometry import Polygon, box, MultiPolygon 12 | from shapely import affinity, ops 13 | import av2.rendering.vector as vector_plotting_utils 14 | import matplotlib.pyplot as plt 15 | 16 | CAM_NAMES_AV2 = ['ring_front_center', 'ring_front_right', 'ring_front_left', 17 | 'ring_rear_right','ring_rear_left', 'ring_side_right', 'ring_side_left', 18 | ] 19 | 20 | CAM_NAMES_NUSC = ['CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT', 21 | 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT', 22 | ] 23 | 24 | def import_plugin(cfg): 25 | import sys 26 | sys.path.append(os.path.abspath('.')) 27 | if hasattr(cfg, 'plugin'): 28 | if cfg.plugin: 29 | import importlib 30 | 31 | def import_path(plugin_dir): 32 | _module_dir = os.path.dirname(plugin_dir) 33 | _module_dir = _module_dir.split('/') 34 | _module_path = _module_dir[0] 35 | 36 | for m in _module_dir[1:]: 37 | _module_path = _module_path + '.' + m 38 | print(_module_path) 39 | plg_lib = importlib.import_module(_module_path) 40 | 41 | plugin_dirs = cfg.plugin_dir 42 | if not isinstance(plugin_dirs, list): 43 | plugin_dirs = [plugin_dirs,] 44 | for plugin_dir in plugin_dirs: 45 | import_path(plugin_dir) 46 | 47 | def get_drivable_areas(data_root, split): 48 | data_root = os.path.join(data_root, split) 49 | logs = os.listdir(data_root) 50 | cities = {} 51 | for log in logs: 52 | map_dir = os.path.join(data_root, log, 'map') 53 | map_json = str(list(Path(map_dir).glob("log_map_archive_*.json"))[0]) 54 | city = map_json.split('____')[-1].split('_')[0] 55 | avm = ArgoverseStaticMap.from_json(Path(map_json)) 56 | 57 | for _, da in avm.vector_drivable_areas.items(): 58 | polygon_xyz = da.xyz[:, :2] 59 | polygon = Polygon(polygon_xyz) 60 | 61 | if city not in cities.keys(): 62 | cities[city] = [] 63 | cities[city].append(polygon) 64 | 65 | return cities 66 | 67 | def visualize_whole_city(data_root='/nvme/argoverse2/sensor/'): 68 | cities_train = get_drivable_areas(data_root, 'train') 69 | cities_val = get_drivable_areas(data_root, 'val') 70 | cities_test = get_drivable_areas(data_root, 'test') 71 | 72 | val_area1 = box(0, -2000, 1200, 5000) 73 | val_area2 = box(4700, -2000, 5200, 5000) 74 | val_area3 = box(7000, -2000, 9000, 5000) 75 | val_area = MultiPolygon([val_area1, val_area2, val_area3]) 76 | 77 | 78 | for city in cities_train.keys(): 79 | fig = plt.figure(figsize=(10, 10)) 80 | ax = fig.add_subplot() 81 | for p in cities_train[city]: 82 | vector_plotting_utils.plot_polygon_patch_mpl(np.array(p.exterior.coords), ax, color='r', alpha=0.4) 83 | for p in cities_val[city]: 84 | vector_plotting_utils.plot_polygon_patch_mpl(np.array(p.exterior.coords), ax, color='g', alpha=0.4) 85 | # for p in cities_test[city]: 86 | # vector_plotting_utils.plot_polygon_patch_mpl(np.array(p.exterior.coords), ax, color='g', alpha=0.5) 87 | 88 | plt.axis("equal") 89 | plt.tight_layout() 90 | plt.savefig(f"./vis/drivable_areas_{city}.jpg", dpi=500) 91 | 92 | city = 'PIT' 93 | fig = plt.figure(figsize=(10, 10)) 94 | ax = fig.add_subplot() 95 | cnt = 0 96 | for p in (cities_train[city] + cities_val[city]): 97 | if p.intersection(val_area).is_empty: 98 | cnt += 1 99 | vector_plotting_utils.plot_polygon_patch_mpl(np.array(p.exterior.coords), ax, color='r', alpha=0.4) 100 | else: 101 | vector_plotting_utils.plot_polygon_patch_mpl(np.array(p.exterior.coords), ax, color='g', alpha=0.4) 102 | 103 | print(f"{cnt}/{len(cities_train[city] + cities_val[city])}") 104 | plt.axis("equal") 105 | plt.tight_layout() 106 | plt.savefig(f"./vis/drivable_areas_{city}_resplit.jpg", dpi=500) 107 | plt.close("all") 108 | 109 | 110 | def vectors_to_pcd(vectors): 111 | def _write_obj(points, out_filename): 112 | """Write points into ``obj`` format for meshlab visualization. 113 | 114 | Args: 115 | points (np.ndarray): Points in shape (N, dim). 116 | out_filename (str): Filename to be saved. 117 | """ 118 | N = points.shape[0] 119 | fout = open(out_filename, 'w') 120 | for i in range(N): 121 | if points.shape[1] == 6: 122 | c = points[i, 3:].astype(int) 123 | fout.write( 124 | 'v %f %f %f %d %d %d\n' % 125 | (points[i, 0], points[i, 1], points[i, 2], c[0], c[1], c[2])) 126 | 127 | else: 128 | fout.write('v %f %f %f\n' % 129 | (points[i, 0], points[i, 1], points[i, 2])) 130 | fout.close() 131 | 132 | COLOR_MAPS_RGB = { 133 | # bgr colors 134 | 0: (0, 0, 255), 135 | 1: (255, 0, 0), 136 | 2: (0, 255, 0), 137 | 3: (51, 183, 255), 138 | } 139 | pcd = [] 140 | for label, v_list in vectors.items(): 141 | for v in v_list: 142 | for pts in v: 143 | color = COLOR_MAPS_RGB[label] 144 | pcd.append([pts[0], pts[1], pts[2], color[0], color[1], color[2]]) 145 | 146 | pcd = np.array(pcd) 147 | _write_obj(pcd, 'pcd.obj') 148 | 149 | if __name__ == '__main__': 150 | # visualize_whole_city() 151 | cfg = Config.fromfile('plugin/configs/debug_nusc.py') 152 | import_plugin(cfg) 153 | 154 | dataset = build_dataset(cfg.data.val) 155 | for i in mmcv.track_iter_progress(range(len(dataset))): 156 | data = dataset[i] 157 | # data = dataset[41] 158 | 159 | # imgs = data['img'] 160 | # vectors = data['vectors'] 161 | # semantic_mask = data['semantic_mask'] 162 | # intrinsics = data['cam_intrinsics'] 163 | # extrinsics = data['cam_extrinsics'] 164 | 165 | # cat2id = cfg.cat2id 166 | # roi_size = cfg.roi_size 167 | # renderer = Renderer(cat2id, roi_size, CAM_NAMES_NUSC) 168 | # out_dir = './vis' 169 | # os.makedirs(out_dir, exist_ok=True) 170 | # renderer.render_camera_views_from_vectors(vectors, imgs, extrinsics, intrinsics, thickness=3, out_dir=out_dir) 171 | # renderer.render_bev_from_vectors(vectors, out_dir=out_dir) 172 | # renderer.render_bev_from_mask(semantic_mask, out_dir=out_dir) -------------------------------------------------------------------------------- /tools/mmdet_train.py: -------------------------------------------------------------------------------- 1 | import random 2 | import warnings 3 | 4 | import numpy as np 5 | import torch 6 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 7 | from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner, 8 | Fp16OptimizerHook, OptimizerHook, build_optimizer, 9 | build_runner) 10 | from mmcv.utils import build_from_cfg 11 | 12 | from mmdet.core import DistEvalHook, EvalHook 13 | from mmdet.datasets import (build_dataloader, build_dataset, 14 | replace_ImageToTensor) 15 | from mmdet.utils import get_root_logger 16 | 17 | 18 | def set_random_seed(seed, deterministic=False): 19 | """Set random seed. 20 | 21 | Args: 22 | seed (int): Seed to be used. 23 | deterministic (bool): Whether to set the deterministic option for 24 | CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` 25 | to True and `torch.backends.cudnn.benchmark` to False. 26 | Default: False. 27 | """ 28 | random.seed(seed) 29 | np.random.seed(seed) 30 | torch.manual_seed(seed) 31 | torch.cuda.manual_seed_all(seed) 32 | if deterministic: 33 | torch.backends.cudnn.deterministic = True 34 | torch.backends.cudnn.benchmark = False 35 | 36 | 37 | def train_detector(model, 38 | dataset, 39 | cfg, 40 | distributed=False, 41 | validate=False, 42 | timestamp=None, 43 | meta=None): 44 | logger = get_root_logger(cfg.log_level) 45 | 46 | # prepare data loaders 47 | dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] 48 | if 'imgs_per_gpu' in cfg.data: 49 | logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. ' 50 | 'Please use "samples_per_gpu" instead') 51 | if 'samples_per_gpu' in cfg.data: 52 | logger.warning( 53 | f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and ' 54 | f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"' 55 | f'={cfg.data.imgs_per_gpu} is used in this experiments') 56 | else: 57 | logger.warning( 58 | 'Automatically set "samples_per_gpu"="imgs_per_gpu"=' 59 | f'{cfg.data.imgs_per_gpu} in this experiments') 60 | cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu 61 | 62 | data_loaders = [ 63 | build_dataloader( 64 | ds, 65 | cfg.data.samples_per_gpu, 66 | cfg.data.workers_per_gpu, 67 | # cfg.gpus will be ignored if distributed 68 | len(cfg.gpu_ids), 69 | dist=distributed, 70 | seed=cfg.seed) for ds in dataset 71 | ] 72 | 73 | # put model on gpus 74 | if distributed: 75 | find_unused_parameters = cfg.get('find_unused_parameters', False) 76 | # Sets the `find_unused_parameters` parameter in 77 | # torch.nn.parallel.DistributedDataParallel 78 | model = MMDistributedDataParallel( 79 | model.cuda(), 80 | device_ids=[torch.cuda.current_device()], 81 | broadcast_buffers=False, 82 | find_unused_parameters=find_unused_parameters) 83 | else: 84 | model = MMDataParallel( 85 | model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids) 86 | 87 | # build runner 88 | optimizer = build_optimizer(model, cfg.optimizer) 89 | 90 | if 'runner' not in cfg: 91 | cfg.runner = { 92 | 'type': 'EpochBasedRunner', 93 | 'max_epochs': cfg.total_epochs 94 | } 95 | warnings.warn( 96 | 'config is now expected to have a `runner` section, ' 97 | 'please set `runner` in your config.', UserWarning) 98 | else: 99 | if 'total_epochs' in cfg: 100 | assert cfg.total_epochs == cfg.runner.max_epochs 101 | 102 | runner = build_runner( 103 | cfg.runner, 104 | default_args=dict( 105 | model=model, 106 | optimizer=optimizer, 107 | work_dir=cfg.work_dir, 108 | logger=logger, 109 | meta=meta)) 110 | 111 | # an ugly workaround to make .log and .log.json filenames the same 112 | runner.timestamp = timestamp 113 | 114 | # fp16 setting 115 | fp16_cfg = cfg.get('fp16', None) 116 | if fp16_cfg is not None: 117 | optimizer_config = Fp16OptimizerHook( 118 | **cfg.optimizer_config, **fp16_cfg, distributed=distributed) 119 | elif distributed and 'type' not in cfg.optimizer_config: 120 | optimizer_config = OptimizerHook(**cfg.optimizer_config) 121 | else: 122 | optimizer_config = cfg.optimizer_config 123 | 124 | # register hooks 125 | runner.register_training_hooks(cfg.lr_config, optimizer_config, 126 | cfg.checkpoint_config, cfg.log_config, 127 | cfg.get('momentum_config', None)) 128 | if distributed: 129 | if isinstance(runner, EpochBasedRunner): 130 | runner.register_hook(DistSamplerSeedHook()) 131 | 132 | # register eval hooks 133 | if validate: 134 | # Support batch_size > 1 in validation 135 | val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1) 136 | if val_samples_per_gpu > 1: 137 | # Replace 'ImageToTensor' to 'DefaultFormatBundle' 138 | cfg.data.val.pipeline = replace_ImageToTensor( 139 | cfg.data.val.pipeline) 140 | val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) 141 | val_dataloader = build_dataloader( 142 | val_dataset, 143 | samples_per_gpu=val_samples_per_gpu, 144 | workers_per_gpu=cfg.data.workers_per_gpu, 145 | dist=distributed, 146 | shuffle=False) 147 | eval_cfg = cfg.get('evaluation', {}) 148 | eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' 149 | eval_hook = DistEvalHook if distributed else EvalHook 150 | runner.register_hook(eval_hook(val_dataloader, **eval_cfg)) 151 | 152 | # user-defined hooks 153 | if cfg.get('custom_hooks', None): 154 | custom_hooks = cfg.custom_hooks 155 | assert isinstance(custom_hooks, list), \ 156 | f'custom_hooks expect list type, but got {type(custom_hooks)}' 157 | for hook_cfg in cfg.custom_hooks: 158 | assert isinstance(hook_cfg, dict), \ 159 | 'Each item in custom_hooks expects dict type, but got ' \ 160 | f'{type(hook_cfg)}' 161 | hook_cfg = hook_cfg.copy() 162 | priority = hook_cfg.pop('priority', 'NORMAL') 163 | hook = build_from_cfg(hook_cfg, HOOKS) 164 | runner.register_hook(hook, priority=priority) 165 | 166 | if cfg.resume_from: 167 | runner.resume(cfg.resume_from) 168 | elif cfg.load_from: 169 | runner.load_checkpoint(cfg.load_from) 170 | runner.run(data_loaders, cfg.workflow) 171 | -------------------------------------------------------------------------------- /tools/visualization/renderer.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import os 3 | from IPython import embed 4 | import av2.geometry.interpolate as interp_utils 5 | import numpy as np 6 | import copy 7 | import cv2 8 | import matplotlib.pyplot as plt 9 | from PIL import Image 10 | 11 | 12 | def remove_nan_values(uv): 13 | is_u_valid = np.logical_not(np.isnan(uv[:, 0])) 14 | is_v_valid = np.logical_not(np.isnan(uv[:, 1])) 15 | is_uv_valid = np.logical_and(is_u_valid, is_v_valid) 16 | 17 | uv_valid = uv[is_uv_valid] 18 | return uv_valid 19 | 20 | def points_ego2img(pts_ego, extrinsics, intrinsics): 21 | pts_ego_4d = np.concatenate([pts_ego, np.ones([len(pts_ego), 1])], axis=-1) 22 | pts_cam_4d = extrinsics @ pts_ego_4d.T 23 | 24 | uv = (intrinsics @ pts_cam_4d[:3, :]).T 25 | uv = remove_nan_values(uv) 26 | depth = uv[:, 2] 27 | uv = uv[:, :2] / uv[:, 2].reshape(-1, 1) 28 | 29 | return uv, depth 30 | 31 | def draw_polyline_ego_on_img(polyline_ego, img_bgr, extrinsics, intrinsics, color_bgr, thickness): 32 | if polyline_ego.shape[1] == 2: 33 | zeros = np.zeros((polyline_ego.shape[0], 1)) 34 | polyline_ego = np.concatenate([polyline_ego, zeros], axis=1) 35 | 36 | polyline_ego = interp_utils.interp_arc(t=500, points=polyline_ego) 37 | 38 | uv, depth = points_ego2img(polyline_ego, extrinsics, intrinsics) 39 | 40 | h, w, c = img_bgr.shape 41 | 42 | is_valid_x = np.logical_and(0 <= uv[:, 0], uv[:, 0] < w - 1) 43 | is_valid_y = np.logical_and(0 <= uv[:, 1], uv[:, 1] < h - 1) 44 | is_valid_z = depth > 0 45 | is_valid_points = np.logical_and.reduce([is_valid_x, is_valid_y, is_valid_z]) 46 | 47 | if is_valid_points.sum() == 0: 48 | return 49 | 50 | uv = np.round(uv[is_valid_points]).astype(np.int32) 51 | 52 | draw_visible_polyline_cv2( 53 | copy.deepcopy(uv), 54 | valid_pts_bool=np.ones((len(uv), 1), dtype=bool), 55 | image=img_bgr, 56 | color=color_bgr, 57 | thickness_px=thickness, 58 | ) 59 | 60 | def draw_visible_polyline_cv2(line, valid_pts_bool, image, color, thickness_px): 61 | """Draw a polyline onto an image using given line segments. 62 | 63 | Args: 64 | line: Array of shape (K, 2) representing the coordinates of line. 65 | valid_pts_bool: Array of shape (K,) representing which polyline coordinates are valid for rendering. 66 | For example, if the coordinate is occluded, a user might specify that it is invalid. 67 | Line segments touching an invalid vertex will not be rendered. 68 | image: Array of shape (H, W, 3), representing a 3-channel BGR image 69 | color: Tuple of shape (3,) with a BGR format color 70 | thickness_px: thickness (in pixels) to use when rendering the polyline. 71 | """ 72 | line = np.round(line).astype(int) # type: ignore 73 | for i in range(len(line) - 1): 74 | 75 | if (not valid_pts_bool[i]) or (not valid_pts_bool[i + 1]): 76 | continue 77 | 78 | x1 = line[i][0] 79 | y1 = line[i][1] 80 | x2 = line[i + 1][0] 81 | y2 = line[i + 1][1] 82 | 83 | # Use anti-aliasing (AA) for curves 84 | image = cv2.line(image, pt1=(x1, y1), pt2=(x2, y2), color=color, thickness=thickness_px, lineType=cv2.LINE_AA) 85 | 86 | 87 | COLOR_MAPS_BGR = { 88 | # bgr colors 89 | 'divider': (0, 0, 255), 90 | 'boundary': (0, 255, 0), 91 | 'ped_crossing': (255, 0, 0), 92 | 'centerline': (51, 183, 255), 93 | 'drivable_area': (171, 255, 255) 94 | } 95 | 96 | COLOR_MAPS_PLT = { 97 | 'divider': 'r', 98 | 'boundary': 'g', 99 | 'ped_crossing': 'b', 100 | 'centerline': 'orange', 101 | 'drivable_area': 'y', 102 | } 103 | 104 | CAM_NAMES_AV2 = ['ring_front_center', 'ring_front_right', 'ring_front_left', 105 | 'ring_rear_right','ring_rear_left', 'ring_side_right', 'ring_side_left', 106 | ] 107 | 108 | class Renderer(object): 109 | def __init__(self, cat2id, roi_size, cam_names=CAM_NAMES_AV2): 110 | self.roi_size = roi_size 111 | self.cat2id = cat2id 112 | self.id2cat = {v: k for k, v in cat2id.items()} 113 | self.cam_names = cam_names 114 | 115 | def render_bev_from_vectors(self, vectors, out_dir): 116 | car_img = Image.open('icon/car.png') 117 | map_path = os.path.join(out_dir, 'map.jpg') 118 | 119 | plt.figure(figsize=(self.roi_size[0], self.roi_size[1])) 120 | plt.xlim(-self.roi_size[0] / 2, self.roi_size[0] / 2) 121 | plt.ylim(-self.roi_size[1] / 2, self.roi_size[1] / 2) 122 | plt.axis('off') 123 | plt.imshow(car_img, extent=[-1.5, 1.5, -1.2, 1.2]) 124 | 125 | for label, vector_list in vectors.items(): 126 | cat = self.id2cat[label] 127 | color = COLOR_MAPS_PLT[cat] 128 | for vector in vector_list: 129 | pts = vector[:, :2] 130 | x = np.array([pt[0] for pt in pts]) 131 | y = np.array([pt[1] for pt in pts]) 132 | plt.quiver(x[:-1], y[:-1], x[1:] - x[:-1], y[1:] - y[:-1], angles='xy', color=color, 133 | scale_units='xy', scale=1) 134 | 135 | plt.savefig(map_path, bbox_inches='tight', dpi=40) 136 | plt.close() 137 | 138 | def render_camera_views_from_vectors(self, vectors, imgs, extrinsics, intrinsics, thickness, out_dir): 139 | for i in range(len(imgs)): 140 | img = imgs[i] 141 | extrinsic = extrinsics[i] 142 | intrinsic = intrinsics[i] 143 | # img_bgr = copy.deepcopy(img.numpy().transpose((1, 2, 0))) 144 | img_bgr = copy.deepcopy(img) 145 | 146 | for label, vector_list in vectors.items(): 147 | cat = self.id2cat[label] 148 | color = COLOR_MAPS_BGR[cat] 149 | for vector in vector_list: 150 | img_bgr = np.ascontiguousarray(img_bgr) 151 | draw_polyline_ego_on_img(vector, img_bgr, extrinsic, intrinsic, 152 | color, thickness) 153 | 154 | out_path = osp.join(out_dir, self.cam_names[i]) + '.jpg' 155 | cv2.imwrite(out_path, img_bgr) 156 | 157 | def render_bev_from_mask(self, semantic_mask, out_dir): 158 | c, h, w = semantic_mask.shape 159 | bev_img = np.ones((3, h, w), dtype=np.uint8) * 255 160 | drivable_area_mask = semantic_mask[self.cat2id['drivable_area']] 161 | valid = drivable_area_mask == 1 162 | bev_img[:, valid] = np.array(COLOR_MAPS_BGR['drivable_area']).reshape(3, 1) 163 | 164 | for label in range(c): 165 | cat = self.id2cat[label] 166 | if cat == 'drivable_area': 167 | continue 168 | mask = semantic_mask[label] 169 | valid = mask == 1 170 | bev_img[:, valid] = np.array(COLOR_MAPS_BGR[cat]).reshape(3, 1) 171 | 172 | bev_img_flipud = np.array([np.flipud(i) for i in bev_img], dtype=np.uint8) 173 | out_path = osp.join(out_dir, 'semantic_map.jpg') 174 | cv2.imwrite(out_path, bev_img_flipud.transpose((1, 2, 0))) 175 | -------------------------------------------------------------------------------- /plugin/datasets/pipelines/transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import mmcv 3 | 4 | from mmdet.datasets.builder import PIPELINES 5 | 6 | 7 | @PIPELINES.register_module(force=True) 8 | class Normalize3D(object): 9 | """Normalize the image. 10 | Added key is "img_norm_cfg". 11 | Args: 12 | mean (sequence): Mean values of 3 channels. 13 | std (sequence): Std values of 3 channels. 14 | to_rgb (bool): Whether to convert the image from BGR to RGB, 15 | default is true. 16 | """ 17 | 18 | def __init__(self, mean, std, to_rgb=True): 19 | self.mean = np.array(mean, dtype=np.float32) 20 | self.std = np.array(std, dtype=np.float32) 21 | self.to_rgb = to_rgb 22 | 23 | def __call__(self, results): 24 | """Call function to normalize images. 25 | Args: 26 | results (dict): Result dict from loading pipeline. 27 | Returns: 28 | dict: Normalized results, 'img_norm_cfg' key is added into 29 | result dict. 30 | """ 31 | for key in results.get('img_fields', ['img']): 32 | results[key] = [mmcv.imnormalize( 33 | img, self.mean, self.std, self.to_rgb) for img in results[key]] 34 | results['img_norm_cfg'] = dict( 35 | mean=self.mean, std=self.std, to_rgb=self.to_rgb) 36 | return results 37 | 38 | def __repr__(self): 39 | repr_str = self.__class__.__name__ 40 | repr_str += f'(mean={self.mean}, std={self.std}, to_rgb={self.to_rgb})' 41 | return repr_str 42 | 43 | 44 | @PIPELINES.register_module(force=True) 45 | class PadMultiViewImages(object): 46 | """Pad multi-view images and change intrinsics 47 | There are two padding modes: (1) pad to a fixed size and (2) pad to the 48 | minimum size that is divisible by some number. 49 | Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor", 50 | Changed keys is 'cam_intrinsics', 51 | 52 | Args: 53 | size (tuple, optional): Fixed padding size, (h, w). 54 | size_divisor (int, optional): The divisor of padded size. 55 | pad_val (float, optional): Padding value, 0 by default. 56 | """ 57 | 58 | def __init__(self, size=None, size_divisor=None, pad_val=0, change_intrinsics=False): 59 | self.size = size 60 | self.size_divisor = size_divisor 61 | self.pad_val = pad_val 62 | # only one of size and size_divisor should be valid 63 | assert size is not None or size_divisor is not None 64 | assert size is None or size_divisor is None 65 | 66 | self.change_intrinsics = change_intrinsics 67 | 68 | def _pad_img(self, results): 69 | """Pad images according to ``self.size``.""" 70 | original_shape = [img.shape for img in results['img']] 71 | 72 | for key in results.get('img_fields', ['img']): 73 | if self.size is not None: 74 | padded_img = [mmcv.impad( 75 | img, shape=self.size, pad_val=self.pad_val) for img in results[key]] 76 | elif self.size_divisor is not None: 77 | padded_img = [mmcv.impad_to_multiple( 78 | img, self.size_divisor, pad_val=self.pad_val) for img in results[key]] 79 | results[key] = padded_img 80 | 81 | if self.change_intrinsics: 82 | post_intrinsics, post_ego2imgs = [], [] 83 | for img, oshape, cam_intrinsic, ego2img in zip(results['img'], original_shape, results['cam_intrinsics'], results['ego2img']): 84 | scaleW = img.shape[1] / oshape[1] 85 | scaleH = img.shape[0] / oshape[0] 86 | 87 | rot_resize_matrix = np.array([ 88 | [scaleW, 0, 0, 0], 89 | [0, scaleH, 0, 0], 90 | [0, 0, 1, 0], 91 | [0, 0, 0, 1]]) 92 | 93 | post_intrinsic = rot_resize_matrix[:3, :3] @ cam_intrinsic 94 | post_ego2img = rot_resize_matrix @ ego2img 95 | post_intrinsics.append(post_intrinsic) 96 | post_ego2imgs.append(post_ego2img) 97 | 98 | results.update({ 99 | 'cam_intrinsics': post_intrinsics, 100 | 'ego2img': post_ego2imgs, 101 | }) 102 | 103 | results['img_shape'] = [img.shape for img in padded_img] 104 | results['img_fixed_size'] = self.size 105 | results['img_size_divisor'] = self.size_divisor 106 | 107 | def __call__(self, results): 108 | """Call function to pad images, masks, semantic segmentation maps. 109 | Args: 110 | results (dict): Result dict from loading pipeline. 111 | Returns: 112 | dict: Updated result dict. 113 | """ 114 | self._pad_img(results) 115 | return results 116 | 117 | def __repr__(self): 118 | repr_str = self.__class__.__name__ 119 | repr_str += f'(size={self.size}, ' 120 | repr_str += f'size_divisor={self.size_divisor}, ' 121 | repr_str += f'pad_val={self.pad_val})' 122 | return repr_str 123 | 124 | 125 | @PIPELINES.register_module(force=True) 126 | class ResizeMultiViewImages(object): 127 | """Resize mulit-view images and change intrinsics 128 | Changed keys is 'cam_intrinsics' 129 | 130 | Args: 131 | size (tuple, optional): Fixed padding size, (h, w). 132 | size_divisor (int, optional): The divisor of padded size. 133 | pad_val (float, optional): Padding value, 0 by default. 134 | """ 135 | 136 | def __init__(self, size, change_intrinsics=False): 137 | self.size = size 138 | self.change_intrinsics = change_intrinsics 139 | 140 | def __call__(self, results: dict): 141 | 142 | new_imgs, post_intrinsics, post_ego2imgs = [], [], [] 143 | 144 | for img, cam_intrinsic, ego2img in zip(results['img'], results['cam_intrinsics'], results['ego2img']): 145 | tmp, scaleW, scaleH = mmcv.imresize(img, 146 | # mmcv.imresize expect (w, h) shape 147 | (self.size[1], self.size[0]), 148 | return_scale=True) 149 | new_imgs.append(tmp) 150 | 151 | rot_resize_matrix = np.array([ 152 | [scaleW, 0, 0, 0], 153 | [0, scaleH, 0, 0], 154 | [0, 0, 1, 0], 155 | [0, 0, 0, 1]]) 156 | post_intrinsic = rot_resize_matrix[:3, :3] @ cam_intrinsic 157 | post_ego2img = rot_resize_matrix @ ego2img 158 | post_intrinsics.append(post_intrinsic) 159 | post_ego2imgs.append(post_ego2img) 160 | 161 | results['img'] = new_imgs 162 | results['img_shape'] = [img.shape for img in new_imgs] 163 | if self.change_intrinsics: 164 | results.update({ 165 | 'cam_intrinsics': post_intrinsics, 166 | 'ego2img': post_ego2imgs, 167 | }) 168 | 169 | return results 170 | 171 | def __repr__(self): 172 | repr_str = self.__class__.__name__ 173 | return repr_str 174 | -------------------------------------------------------------------------------- /plugin/models/heads/map_element_detector.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from mmcv.cnn import Conv2d, Linear 6 | from mmcv.runner import force_fp32 7 | from torch.distributions.categorical import Categorical 8 | 9 | from mmdet.core import multi_apply, reduce_mean 10 | from mmdet.models import HEADS 11 | from .detr_bbox import DETRBboxHead 12 | from mmdet.models.utils.transformer import inverse_sigmoid 13 | 14 | @HEADS.register_module(force=True) 15 | class MapElementDetector(DETRBboxHead): 16 | 17 | def __init__(self, *args, **kwargs): 18 | super(MapElementDetector, self).__init__(*args, **kwargs) 19 | 20 | def _init_embedding(self): 21 | 22 | self.label_embed = nn.Embedding( 23 | self.num_classes, self.embed_dims) 24 | 25 | self.img_coord_embed = nn.Linear(2, self.embed_dims) 26 | 27 | # query_pos_embed & query_embed 28 | self.query_embedding = nn.Embedding(self.num_query, 29 | self.embed_dims*2) 30 | 31 | # for bbox parameter xstart, ystart, xend, yend 32 | self.bbox_embedding = nn.Embedding( self.bbox_size, 33 | self.embed_dims*2) 34 | 35 | def _init_branch(self,): 36 | """Initialize classification branch and regression branch of head.""" 37 | 38 | fc_cls = Linear(self.embed_dims*self.bbox_size, self.cls_out_channels) 39 | # fc_cls = Linear(self.embed_dims, self.cls_out_channels) 40 | 41 | reg_branch = [] 42 | for _ in range(self.num_reg_fcs): 43 | reg_branch.append(Linear(self.embed_dims, self.embed_dims)) 44 | reg_branch.append(nn.LayerNorm(self.embed_dims)) 45 | reg_branch.append(nn.ReLU()) 46 | 47 | if self.discrete_output: 48 | reg_branch.append(nn.Linear( 49 | self.embed_dims, max(self.canvas_size), bias=True,)) 50 | else: 51 | reg_branch.append(nn.Linear( 52 | self.embed_dims, self.coord_dim, bias=True,)) 53 | 54 | reg_branch = nn.Sequential(*reg_branch) 55 | # add sigmoid or not 56 | 57 | def _get_clones(module, N): 58 | return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) 59 | 60 | num_pred = self.transformer.decoder.num_layers 61 | 62 | if self.iterative: 63 | fc_cls = _get_clones(fc_cls, num_pred) 64 | reg_branch = _get_clones(reg_branch, num_pred) 65 | else: 66 | reg_branch = nn.ModuleList( 67 | [reg_branch for _ in range(num_pred)]) 68 | fc_cls = nn.ModuleList( 69 | [fc_cls for _ in range(num_pred)]) 70 | 71 | self.pre_branches = nn.ModuleDict([ 72 | ('cls', fc_cls), 73 | ('reg', reg_branch), ]) 74 | 75 | def _prepare_context(self, batch, context): 76 | """Prepare class label and vertex context.""" 77 | 78 | global_context_embedding = None 79 | if self.separate_detect: 80 | global_context_embedding = self.label_embed(batch['class_label']) 81 | 82 | # Image context 83 | if self.separate_detect: 84 | image_embeddings = assign_bev( 85 | context['bev_embeddings'], batch['batch_idx']) 86 | else: 87 | image_embeddings = context['bev_embeddings'] 88 | 89 | image_embeddings = self.input_proj( 90 | image_embeddings) # only change feature size 91 | 92 | # Pass images through encoder 93 | device = image_embeddings.device 94 | 95 | # Add 2D coordinate grid embedding 96 | B, C, H, W = image_embeddings.shape 97 | Ws = torch.linspace(-1., 1., W) 98 | Hs = torch.linspace(-1., 1., H) 99 | image_coords = torch.stack( 100 | torch.meshgrid(Hs, Ws), dim=-1).to(device) 101 | image_coord_embeddings = self.img_coord_embed(image_coords) 102 | 103 | image_embeddings += image_coord_embeddings[None].permute(0, 3, 1, 2) 104 | 105 | # Reshape spatial grid to sequence 106 | sequential_context_embeddings = image_embeddings.reshape( 107 | B, C, H, W) 108 | 109 | return (global_context_embedding, sequential_context_embeddings) 110 | 111 | def forward(self, batch, context, img_metas=None, multi_scale=False): 112 | ''' 113 | Args: 114 | bev_feature (List[Tensor]): shape [B, C, H, W] 115 | feature in bev view 116 | img_metas 117 | 118 | Outs: 119 | preds_dict (Dict): 120 | all_cls_scores (Tensor): Classification score of all 121 | decoder layers, has shape 122 | [nb_dec, bs, num_query, cls_out_channels]. 123 | all_lines_preds (Tensor): 124 | [nb_dec, bs, num_query, num_points, 2]. 125 | ''' 126 | 127 | (global_context_embedding, sequential_context_embeddings) =\ 128 | self._prepare_context(batch, context) 129 | 130 | x = sequential_context_embeddings 131 | B, C, H, W = x.shape 132 | 133 | query_embedding = self.query_embedding.weight[None,:,None].repeat(B, 1, self.bbox_size, 1) 134 | bbox_embed = self.bbox_embedding.weight 135 | query_embedding = query_embedding + bbox_embed[None,None] 136 | query_embedding = query_embedding.view(B, -1, C*2) 137 | 138 | img_masks = x.new_zeros((B, H, W)) 139 | pos_embed = self.positional_encoding(img_masks) 140 | 141 | # outs_dec: [nb_dec, bs, num_query, embed_dim] 142 | hs, init_reference, inter_references = self.transformer( 143 | [x,], 144 | [img_masks.type(torch.bool)], 145 | query_embedding, 146 | [pos_embed], 147 | reg_branches= self.reg_branches if self.iterative else None, # noqa:E501 148 | cls_branches= None, # noqa:E501 149 | ) 150 | outs_dec = hs.permute(0, 2, 1, 3) 151 | 152 | outputs = [] 153 | for i, (query_feat) in enumerate(outs_dec): 154 | if i == 0: 155 | reference = init_reference 156 | else: 157 | reference = inter_references[i - 1] 158 | outputs.append(self.get_prediction(i,query_feat,reference)) 159 | 160 | return outputs 161 | 162 | def get_prediction(self, level, query_feat, reference): 163 | 164 | bs, num_query, h = query_feat.shape 165 | query_feat = query_feat.view(bs, -1, self.bbox_size,h) 166 | 167 | ocls = self.pre_branches['cls'][level](query_feat.flatten(-2)) 168 | # ocls = ocls.mean(-2) 169 | reference = inverse_sigmoid(reference) 170 | reference = reference.view(bs, -1, self.bbox_size,self.coord_dim) 171 | 172 | tmp = self.pre_branches['reg'][level](query_feat) 173 | tmp[...,:self.kp_coord_dim] = tmp[...,:self.kp_coord_dim] + reference[...,:self.kp_coord_dim] 174 | lines = tmp.sigmoid() # bs, num_query, self.bbox_size,2 175 | 176 | lines = lines * self.canvas_size[:self.coord_dim] 177 | lines = lines.flatten(-2) 178 | 179 | return dict( 180 | lines=lines, # [bs, num_query, bboxsize*2] 181 | scores=ocls, # [bs, num_query, num_class] 182 | embeddings= query_feat, # [bs, num_query, bbox_size, h] 183 | ) 184 | -------------------------------------------------------------------------------- /tools/mmdet_test.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import pickle 3 | import shutil 4 | import tempfile 5 | import time 6 | 7 | import mmcv 8 | import torch 9 | import torch.distributed as dist 10 | from mmcv.image import tensor2imgs 11 | from mmcv.runner import get_dist_info 12 | 13 | from mmdet.core import encode_mask_results 14 | 15 | 16 | def single_gpu_test(model, 17 | data_loader, 18 | show=False, 19 | out_dir=None, 20 | show_score_thr=0.3): 21 | model.eval() 22 | results = [] 23 | dataset = data_loader.dataset 24 | prog_bar = mmcv.ProgressBar(len(dataset)) 25 | for i, data in enumerate(data_loader): 26 | with torch.no_grad(): 27 | result = model(return_loss=False, rescale=True, **data) 28 | 29 | batch_size = len(result) 30 | if show or out_dir: 31 | if batch_size == 1 and isinstance(data['img'][0], torch.Tensor): 32 | img_tensor = data['img'][0] 33 | else: 34 | img_tensor = data['img'][0].data[0] 35 | img_metas = data['img_metas'][0].data[0] 36 | imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) 37 | assert len(imgs) == len(img_metas) 38 | 39 | for i, (img, img_meta) in enumerate(zip(imgs, img_metas)): 40 | h, w, _ = img_meta['img_shape'] 41 | img_show = img[:h, :w, :] 42 | 43 | ori_h, ori_w = img_meta['ori_shape'][:-1] 44 | img_show = mmcv.imresize(img_show, (ori_w, ori_h)) 45 | 46 | if out_dir: 47 | out_file = osp.join(out_dir, img_meta['ori_filename']) 48 | else: 49 | out_file = None 50 | 51 | model.module.show_result( 52 | img_show, 53 | result[i], 54 | show=show, 55 | out_file=out_file, 56 | score_thr=show_score_thr) 57 | 58 | # encode mask results 59 | if isinstance(result[0], tuple): 60 | result = [(bbox_results, encode_mask_results(mask_results)) 61 | for bbox_results, mask_results in result] 62 | results.extend(result) 63 | 64 | for _ in range(batch_size): 65 | prog_bar.update() 66 | return results 67 | 68 | 69 | def multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False): 70 | """Test model with multiple gpus. 71 | 72 | This method tests model with multiple gpus and collects the results 73 | under two different modes: gpu and cpu modes. By setting 'gpu_collect=True' 74 | it encodes results to gpu tensors and use gpu communication for results 75 | collection. On cpu mode it saves the results on different gpus to 'tmpdir' 76 | and collects them by the rank 0 worker. 77 | 78 | Args: 79 | model (nn.Module): Model to be tested. 80 | data_loader (nn.Dataloader): Pytorch data loader. 81 | tmpdir (str): Path of directory to save the temporary results from 82 | different gpus under cpu mode. 83 | gpu_collect (bool): Option to use either gpu or cpu to collect results. 84 | 85 | Returns: 86 | list: The prediction results. 87 | """ 88 | model.eval() 89 | results = [] 90 | dataset = data_loader.dataset 91 | rank, world_size = get_dist_info() 92 | if rank == 0: 93 | prog_bar = mmcv.ProgressBar(len(dataset)) 94 | time.sleep(2) # This line can prevent deadlock problem in some cases. 95 | for i, data in enumerate(data_loader): 96 | with torch.no_grad(): 97 | result = model(return_loss=False, rescale=True, **data) 98 | # encode mask results 99 | # if isinstance(result[0], tuple): 100 | # result = [(bbox_results, encode_mask_results(mask_results)) 101 | # for bbox_results, mask_results in result] 102 | results.extend(result) 103 | 104 | if rank == 0: 105 | batch_size = len(result) 106 | for _ in range(batch_size * world_size): 107 | prog_bar.update() 108 | 109 | # collect results from all ranks 110 | if gpu_collect: 111 | results = collect_results_gpu(results, len(dataset)) 112 | else: 113 | results = collect_results_cpu(results, len(dataset), tmpdir) 114 | return results 115 | 116 | 117 | def collect_results_cpu(result_part, size, tmpdir=None): 118 | rank, world_size = get_dist_info() 119 | # create a tmp dir if it is not specified 120 | if tmpdir is None: 121 | MAX_LEN = 512 122 | # 32 is whitespace 123 | dir_tensor = torch.full((MAX_LEN, ), 124 | 32, 125 | dtype=torch.uint8, 126 | device='cuda') 127 | if rank == 0: 128 | mmcv.mkdir_or_exist('.dist_test') 129 | tmpdir = tempfile.mkdtemp(dir='.dist_test') 130 | tmpdir = torch.tensor( 131 | bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda') 132 | dir_tensor[:len(tmpdir)] = tmpdir 133 | dist.broadcast(dir_tensor, 0) 134 | tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip() 135 | else: 136 | mmcv.mkdir_or_exist(tmpdir) 137 | # dump the part result to the dir 138 | mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl')) 139 | dist.barrier() 140 | # collect all parts 141 | if rank != 0: 142 | return None 143 | else: 144 | # load results of all parts from tmp dir 145 | part_list = [] 146 | for i in range(world_size): 147 | part_file = osp.join(tmpdir, f'part_{i}.pkl') 148 | part_list.append(mmcv.load(part_file)) 149 | # sort the results 150 | ordered_results = [] 151 | for res in zip(*part_list): 152 | ordered_results.extend(list(res)) 153 | # the dataloader may pad some samples 154 | ordered_results = ordered_results[:size] 155 | # remove tmp dir 156 | shutil.rmtree(tmpdir) 157 | return ordered_results 158 | 159 | 160 | def collect_results_gpu(result_part, size): 161 | rank, world_size = get_dist_info() 162 | # dump result part to tensor with pickle 163 | part_tensor = torch.tensor( 164 | bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda') 165 | # gather all result part tensor shape 166 | shape_tensor = torch.tensor(part_tensor.shape, device='cuda') 167 | shape_list = [shape_tensor.clone() for _ in range(world_size)] 168 | dist.all_gather(shape_list, shape_tensor) 169 | # padding result part tensor to max length 170 | shape_max = torch.tensor(shape_list).max() 171 | part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda') 172 | part_send[:shape_tensor[0]] = part_tensor 173 | part_recv_list = [ 174 | part_tensor.new_zeros(shape_max) for _ in range(world_size) 175 | ] 176 | # gather all result part 177 | dist.all_gather(part_recv_list, part_send) 178 | 179 | if rank == 0: 180 | part_list = [] 181 | for recv, shape in zip(part_recv_list, shape_list): 182 | part_list.append( 183 | pickle.loads(recv[:shape[0]].cpu().numpy().tobytes())) 184 | # sort the results 185 | ordered_results = [] 186 | for res in zip(*part_list): 187 | ordered_results.extend(list(res)) 188 | # the dataloader may pad some samples 189 | ordered_results = ordered_results[:size] 190 | return ordered_results 191 | -------------------------------------------------------------------------------- /plugin/models/augmentation/sythesis_det.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import random 5 | 6 | 7 | class NoiseSythesis(nn.Module): 8 | 9 | def __init__(self, 10 | p, scale=0.01, shift_scale=(8,5), 11 | scaling_size=(0.1,0.1), canvas_size=(200, 100), 12 | bbox_type='sce', 13 | poly_coord_dim=2, 14 | bbox_coord_dim=2, 15 | quantify=True): 16 | super(NoiseSythesis, self).__init__() 17 | 18 | self.p = p 19 | self.scale = scale 20 | self.bbox_type = bbox_type 21 | self.quantify = quantify 22 | 23 | self.poly_coord_dim = poly_coord_dim 24 | self.bbox_coord_dim = bbox_coord_dim 25 | 26 | self.transforms = [self.random_shifting, self.random_scaling] 27 | # self.transforms = [self.random_scaling] 28 | 29 | self.register_buffer('canvas_size', torch.tensor(canvas_size)) 30 | self.register_buffer('shift_scale', torch.tensor(shift_scale).float()) 31 | self.register_buffer('scaling_size', torch.tensor(scaling_size)) 32 | 33 | def random_scaling(self, bbox): 34 | ''' 35 | bbox: B, paramter_num, 2 36 | ''' 37 | device = bbox.device 38 | dtype = bbox.dtype 39 | B = bbox.shape[0] 40 | 41 | noise = (torch.rand(B, device=device)*2-1)[:,None,None] # [-1,1] 42 | scale = self.scaling_size.to(device) 43 | scale = (noise * scale) + 1 44 | 45 | scaled_bbox = bbox * scale 46 | 47 | # recenterization 48 | coffset = scaled_bbox.mean(-2) - bbox.float().mean(-2) 49 | scaled_bbox = scaled_bbox - coffset[:,None] 50 | 51 | return scaled_bbox.round().type(dtype) 52 | 53 | def random_shifting(self, bbox): 54 | ''' 55 | bbox: B, paramter_num, 2 56 | ''' 57 | device = bbox.device 58 | batch_size = bbox.shape[0] 59 | 60 | shift_scale = self.shift_scale 61 | scale = (bbox.max(1)[0] - bbox.min(1)[0]) * 0.1 62 | scale = torch.where(scale < shift_scale, scale, shift_scale) 63 | 64 | noise = (torch.rand(batch_size, 2, device=device)*2-1) # [-1,1] 65 | offset = (noise * scale).round().type(bbox.dtype) 66 | 67 | shifted_bbox = bbox + offset[:, None] 68 | 69 | return shifted_bbox 70 | 71 | def gaussian_noise_bbox(self, bbox): 72 | 73 | dtype = bbox.dtype 74 | batch_size = bbox.shape[0] 75 | 76 | scale = (self.canvas_size * self.scale)[:self.bbox_coord_dim] 77 | 78 | noisy_bbox = torch.normal(bbox.type(torch.float), scale) 79 | 80 | if self.quantify: 81 | noisy_bbox = noisy_bbox.round().type(dtype) 82 | # prevent out of bound case 83 | for i in range(self.bbox_coord_dim): 84 | noisy_bbox[...,i] =\ 85 | torch.clamp(noisy_bbox[...,0],1,self.canvas_size[i]) 86 | else: 87 | noisy_bbox = noisy_bbox.type(torch.float) 88 | 89 | return noisy_bbox 90 | 91 | def gaussian_noise_poly(self, polyline, polyline_mask): 92 | 93 | device = polyline.device 94 | batchsize = polyline.shape[0] 95 | scale = self.canvas_size * self.scale 96 | 97 | polyline = F.pad(polyline,(0,self.poly_coord_dim-1)) 98 | polyline = polyline.view(batchsize,-1, self.poly_coord_dim) 99 | mask = F.pad(polyline_mask[:,1:],(0,self.poly_coord_dim)) 100 | 101 | noisy_polyline = torch.normal(polyline.type(torch.float), scale) 102 | 103 | if self.quantify: 104 | noisy_polyline = noisy_polyline.round().type(polyline.dtype) 105 | 106 | # prevent out of bound case 107 | for i in range(self.poly_coord_dim): 108 | noisy_polyline[...,i] =\ 109 | torch.clamp(noisy_polyline[...,i],0,self.canvas_size[i]) 110 | 111 | else: 112 | noisy_polyline = noisy_polyline.type(torch.float) 113 | 114 | noisy_polyline = noisy_polyline.view(batchsize,-1) * mask 115 | noisy_polyline = noisy_polyline[:,:-(self.poly_coord_dim-1)] 116 | 117 | return noisy_polyline 118 | 119 | def random_apply(self, bbox): 120 | 121 | for t in self.transforms: 122 | 123 | if self.p < torch.rand(1): 124 | continue 125 | 126 | bbox = t(bbox) 127 | 128 | # prevent out of bound case 129 | bbox[...,0] =\ 130 | torch.clamp(bbox[...,0],0,self.canvas_size[0]) 131 | 132 | bbox[...,1] =\ 133 | torch.clamp(bbox[...,1],0,self.canvas_size[1]) 134 | 135 | return bbox 136 | 137 | def simple_aug(self, batch): 138 | 139 | # augment bbox 140 | if self.bbox_type in ['sce', 'xyxy']: 141 | fbbox = batch['bbox_flat'] 142 | seq_len = fbbox.shape[0] 143 | bbox = fbbox.view(seq_len, -1, 2) 144 | bbox = self.gaussian_noise_bbox(bbox) 145 | fbbox_aug = bbox.view(seq_len, -1) 146 | 147 | aug_mask = torch.rand(fbbox.shape,device=fbbox.device) 148 | fbbox = torch.where(aug_mask] -- Ordered points to define the vectorized line. 127 | "pts_num": , -- Number of points in this line. 128 | "type": <0, 1, 2> -- Type of the line: 0: ped; 1: divider; 2: boundary 129 | "confidence_level": -- Confidence level for prediction (used by Average Precision) 130 | } 131 | ''' 132 | 133 | if case is None: 134 | continue 135 | 136 | vector_lines = [] 137 | for i in range(case['nline']): 138 | vector = case['lines'][i] * patch_size + origin 139 | vector_lines.append({ 140 | 'pts': vector, 141 | 'pts_num': len(case['lines'][i]), 142 | 'type': case['labels'][i], 143 | 'confidence_level': case['scores'][i], 144 | }) 145 | submissions['results'][case['token']] = {} 146 | submissions['results'][case['token']]['vectors'] = vector_lines 147 | 148 | if 'groundTruth' in case: 149 | 150 | submissions['groundTruth'][case['token']] = {} 151 | vector_lines = [] 152 | for i in range(case['groundTruth']['nline']): 153 | line = case['groundTruth']['lines'][i] * \ 154 | patch_size + origin 155 | 156 | vector_lines.append({ 157 | 'pts': line, 158 | 'pts_num': len(case['groundTruth']['lines'][i]), 159 | 'type': case['groundTruth']['labels'][i], 160 | 'confidence_level': 1., 161 | }) 162 | submissions['groundTruth'][case['token'] 163 | ]['vectors'] = vector_lines 164 | 165 | # Use pickle format to minimize submission file size. 166 | print('Done!') 167 | mmcv.mkdir_or_exist(prefix) 168 | res_path = os.path.join(prefix, '{}.pkl'.format(name)) 169 | mmcv.dump(submissions, res_path) 170 | 171 | return res_path 172 | 173 | def evaluate(self, 174 | results, 175 | logger=None, 176 | name=None, 177 | **kwargs): 178 | ''' 179 | Args: 180 | results (list[Tensor]): List of results. 181 | eval_cfg (Dict): Config of test dataset. 182 | output_format (str): Model output format, should be either 'raster' or 'vector'. 183 | 184 | Returns: 185 | dict: Evaluation results. 186 | ''' 187 | 188 | print('len of the results', len(results)) 189 | name = 'results_nuscence' if name is None else name 190 | result_path = self.format_results( 191 | results, name, prefix=self.work_dir, patch_size=self.eval_cfg.patch_size, origin=self.eval_cfg.origin) 192 | 193 | self.eval_cfg.evaluation_cfg['result_path'] = result_path 194 | self.eval_cfg.evaluation_cfg['ann_file'] = self.ann_file 195 | 196 | mean_ap = eval_chamfer( 197 | self.eval_cfg.evaluation_cfg, update=True, logger=logger) 198 | 199 | result_dict = { 200 | 'mAP': mean_ap, 201 | } 202 | 203 | print('VectormapNet Evaluation Results:') 204 | print(result_dict) 205 | 206 | return result_dict 207 | -------------------------------------------------------------------------------- /plugin/models/backbones/point_pillar.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | # import torch_scatter 4 | 5 | from .pointpillar_utils.voxel import points_to_voxels 6 | 7 | 8 | class PillarBlock(nn.Module): 9 | def __init__(self, idims=64, dims=64, num_layers=1, 10 | stride=1): 11 | super(PillarBlock, self).__init__() 12 | layers = [] 13 | self.idims = idims 14 | self.stride = stride 15 | for i in range(num_layers): 16 | layers.append(nn.Conv2d(self.idims, dims, 3, stride=self.stride, 17 | padding=1, bias=False)) 18 | layers.append(nn.BatchNorm2d(dims)) 19 | layers.append(nn.ReLU(inplace=True)) 20 | self.idims = dims 21 | self.stride = 1 22 | self.layers = nn.Sequential(*layers) 23 | 24 | def forward(self, x): 25 | return self.layers(x) 26 | 27 | 28 | class PointNet(nn.Module): 29 | def __init__(self, idims=64, odims=64): 30 | super(PointNet, self).__init__() 31 | self.pointnet = nn.Sequential( 32 | nn.Conv1d(idims, odims, kernel_size=1, bias=False), 33 | nn.BatchNorm1d(odims), 34 | nn.ReLU(inplace=True) 35 | ) 36 | 37 | def forward(self, points_feature, points_mask): 38 | batch_size, num_points, num_dims = points_feature.shape 39 | points_feature = points_feature.permute(0, 2, 1) 40 | mask = points_mask.view(batch_size, 1, num_points) 41 | return self.pointnet(points_feature) * mask 42 | 43 | 44 | class PointPillar(nn.Module): 45 | def __init__(self, C, xbound, ybound, zbound, embedded_dim=16, direction_dim=37): 46 | super(PointPillar, self).__init__() 47 | self.xbound = xbound 48 | self.ybound = ybound 49 | self.zbound = zbound 50 | self.embedded_dim = embedded_dim 51 | self.pn = PointNet(15, 64) 52 | self.block1 = PillarBlock(64, dims=64, num_layers=2, stride=1) 53 | self.block2 = PillarBlock(64, dims=128, num_layers=3, stride=2) 54 | self.block3 = PillarBlock(128, 256, num_layers=3, stride=2) 55 | self.up1 = nn.Sequential( 56 | nn.Conv2d(64, 64, 3, padding=1, bias=False), 57 | nn.BatchNorm2d(64), 58 | nn.ReLU(inplace=True) 59 | ) 60 | self.up2 = nn.Sequential( 61 | nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), 62 | nn.Conv2d(128, 128, 3, stride=1, padding=1, bias=False), 63 | nn.BatchNorm2d(128), 64 | nn.ReLU(inplace=True) 65 | ) 66 | self.up3 = nn.Sequential( 67 | nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True), 68 | nn.Conv2d(256, 256, 3, stride=1, padding=1, bias=False), 69 | nn.BatchNorm2d(256), 70 | nn.ReLU(inplace=True) 71 | ) 72 | self.conv_out = nn.Sequential( 73 | nn.Conv2d(448, 256, 3, padding=1, bias=False), 74 | nn.BatchNorm2d(256), 75 | nn.ReLU(inplace=True), 76 | nn.Conv2d(256, 128, 3, padding=1, bias=False), 77 | nn.BatchNorm2d(128), 78 | nn.ReLU(inplace=True), 79 | nn.Conv2d(128, C, 1), 80 | ) 81 | self.instance_conv_out = nn.Sequential( 82 | nn.Conv2d(448, 256, 3, padding=1, bias=False), 83 | nn.BatchNorm2d(256), 84 | nn.ReLU(inplace=True), 85 | nn.Conv2d(256, 128, 3, padding=1, bias=False), 86 | nn.BatchNorm2d(128), 87 | nn.ReLU(inplace=True), 88 | nn.Conv2d(128, embedded_dim, 1), 89 | ) 90 | self.direction_conv_out = nn.Sequential( 91 | nn.Conv2d(448, 256, 3, padding=1, bias=False), 92 | nn.BatchNorm2d(256), 93 | nn.ReLU(inplace=True), 94 | nn.Conv2d(256, 128, 3, padding=1, bias=False), 95 | nn.BatchNorm2d(128), 96 | nn.ReLU(inplace=True), 97 | nn.Conv2d(128, direction_dim, 1), 98 | ) 99 | 100 | def forward(self, points, points_mask, 101 | x, rots, trans, intrins, post_rots, post_trans, translation, yaw_pitch_roll): 102 | points_xyz = points[:, :, :3] 103 | points_feature = points[:, :, 3:] 104 | voxels = points_to_voxels( 105 | points_xyz, points_mask, self.xbound, self.ybound, self.zbound 106 | ) 107 | points_feature = torch.cat( 108 | [points, # 5 109 | torch.unsqueeze(voxels['voxel_point_count'], dim=-1), # 1 110 | voxels['local_points_xyz'], # 3 111 | voxels['point_centroids'], # 3 112 | points_xyz - voxels['voxel_centers'], # 3 113 | ], dim=-1 114 | ) 115 | points_feature = self.pn(points_feature, voxels['points_mask']) 116 | voxel_feature = torch_scatter.scatter_mean( 117 | points_feature, 118 | torch.unsqueeze(voxels['voxel_indices'], dim=1), 119 | dim=2, 120 | dim_size=voxels['num_voxels']) 121 | batch_size = points.size(0) 122 | voxel_feature = voxel_feature.view(batch_size, -1, voxels['grid_size'][0], voxels['grid_size'][1]) 123 | voxel_feature1 = self.block1(voxel_feature) 124 | voxel_feature2 = self.block2(voxel_feature1) 125 | voxel_feature3 = self.block3(voxel_feature2) 126 | voxel_feature1 = self.up1(voxel_feature1) 127 | voxel_feature2 = self.up2(voxel_feature2) 128 | voxel_feature3 = self.up3(voxel_feature3) 129 | voxel_feature = torch.cat([voxel_feature1, voxel_feature2, voxel_feature3], dim=1) 130 | return self.conv_out(voxel_feature).transpose(3, 2), self.instance_conv_out(voxel_feature).transpose(3, 2), self.direction_conv_out(voxel_feature).transpose(3, 2) 131 | 132 | 133 | class PointPillarEncoder(nn.Module): 134 | def __init__(self, C, xbound, ybound, zbound): 135 | super(PointPillarEncoder, self).__init__() 136 | self.xbound = xbound 137 | self.ybound = ybound 138 | self.zbound = zbound 139 | self.pn = PointNet(15, 64) 140 | self.block1 = PillarBlock(64, dims=64, num_layers=2, stride=1) 141 | self.block2 = PillarBlock(64, dims=128, num_layers=3, stride=2) 142 | self.block3 = PillarBlock(128, 256, num_layers=3, stride=2) 143 | self.up1 = nn.Sequential( 144 | nn.Conv2d(64, 64, 3, padding=1, bias=False), 145 | nn.BatchNorm2d(64), 146 | nn.ReLU(inplace=True) 147 | ) 148 | self.up2 = nn.Sequential( 149 | nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), 150 | nn.Conv2d(128, 128, 3, stride=1, padding=1, bias=False), 151 | nn.BatchNorm2d(128), 152 | nn.ReLU(inplace=True) 153 | ) 154 | self.up3 = nn.Sequential( 155 | nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True), 156 | nn.Conv2d(256, 256, 3, stride=1, padding=1, bias=False), 157 | nn.BatchNorm2d(256), 158 | nn.ReLU(inplace=True) 159 | ) 160 | self.conv_out = nn.Sequential( 161 | nn.Conv2d(448, 256, 3, padding=1, bias=False), 162 | nn.BatchNorm2d(256), 163 | nn.ReLU(inplace=True), 164 | nn.Conv2d(256, 128, 3, padding=1, bias=False), 165 | nn.BatchNorm2d(128), 166 | nn.ReLU(inplace=True), 167 | nn.Conv2d(128, C, 1), 168 | ) 169 | 170 | def forward(self, points, points_mask): 171 | points_xyz = points[:, :, :3] 172 | points_feature = points[:, :, 3:] 173 | voxels = points_to_voxels( 174 | points_xyz, points_mask, self.xbound, self.ybound, self.zbound 175 | ) 176 | points_feature = torch.cat( 177 | [points, # 5 178 | torch.unsqueeze(voxels['voxel_point_count'], dim=-1), # 1 179 | voxels['local_points_xyz'], # 3 180 | voxels['point_centroids'], # 3 181 | points_xyz - voxels['voxel_centers'], # 3 182 | ], dim=-1 183 | ) 184 | points_feature = self.pn(points_feature, voxels['points_mask']) 185 | voxel_feature = torch_scatter.scatter_mean( 186 | points_feature, 187 | torch.unsqueeze(voxels['voxel_indices'], dim=1), 188 | dim=2, 189 | dim_size=voxels['num_voxels']) 190 | batch_size = points.size(0) 191 | voxel_feature = voxel_feature.view(batch_size, -1, voxels['grid_size'][0], voxels['grid_size'][1]) 192 | voxel_feature1 = self.block1(voxel_feature) 193 | voxel_feature2 = self.block2(voxel_feature1) 194 | voxel_feature3 = self.block3(voxel_feature2) 195 | voxel_feature1 = self.up1(voxel_feature1) 196 | voxel_feature2 = self.up2(voxel_feature2) 197 | voxel_feature3 = self.up3(voxel_feature3) 198 | voxel_feature = torch.cat([voxel_feature1, voxel_feature2, voxel_feature3], dim=1) 199 | return self.conv_out(voxel_feature).transpose(3, 2) 200 | -------------------------------------------------------------------------------- /tools/analysis_tools/analyze_logs.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import numpy as np 4 | import seaborn as sns 5 | from collections import defaultdict 6 | from matplotlib import pyplot as plt 7 | 8 | 9 | def cal_train_time(log_dicts, args): 10 | for i, log_dict in enumerate(log_dicts): 11 | print(f'{"-" * 5}Analyze train time of {args.json_logs[i]}{"-" * 5}') 12 | all_times = [] 13 | for epoch in log_dict.keys(): 14 | if args.include_outliers: 15 | all_times.append(log_dict[epoch]['time']) 16 | else: 17 | all_times.append(log_dict[epoch]['time'][1:]) 18 | all_times = np.array(all_times) 19 | epoch_ave_time = all_times.mean(-1) 20 | slowest_epoch = epoch_ave_time.argmax() 21 | fastest_epoch = epoch_ave_time.argmin() 22 | std_over_epoch = epoch_ave_time.std() 23 | print(f'slowest epoch {slowest_epoch + 1}, ' 24 | f'average time is {epoch_ave_time[slowest_epoch]:.4f}') 25 | print(f'fastest epoch {fastest_epoch + 1}, ' 26 | f'average time is {epoch_ave_time[fastest_epoch]:.4f}') 27 | print(f'time std over epochs is {std_over_epoch:.4f}') 28 | print(f'average iter time: {np.mean(all_times):.4f} s/iter') 29 | print() 30 | 31 | 32 | def plot_curve(log_dicts, args): 33 | if args.backend is not None: 34 | plt.switch_backend(args.backend) 35 | sns.set_style(args.style) 36 | # if legend is None, use {filename}_{key} as legend 37 | legend = args.legend 38 | if legend is None: 39 | legend = [] 40 | for json_log in args.json_logs: 41 | for metric in args.keys: 42 | legend.append(f'{json_log}_{metric}') 43 | assert len(legend) == (len(args.json_logs) * len(args.keys)) 44 | metrics = args.keys 45 | 46 | num_metrics = len(metrics) 47 | for i, log_dict in enumerate(log_dicts): 48 | epochs = list(log_dict.keys()) 49 | for j, metric in enumerate(metrics): 50 | print(f'plot curve of {args.json_logs[i]}, metric is {metric}') 51 | if metric not in log_dict[epochs[args.interval - 1]]: 52 | raise KeyError( 53 | f'{args.json_logs[i]} does not contain metric {metric}') 54 | 55 | if args.mode == 'eval': 56 | if min(epochs) == args.interval: 57 | x0 = args.interval 58 | else: 59 | # if current training is resumed from previous checkpoint 60 | # we lost information in early epochs 61 | # `xs` should start according to `min(epochs)` 62 | if min(epochs) % args.interval == 0: 63 | x0 = min(epochs) 64 | else: 65 | # find the first epoch that do eval 66 | x0 = min(epochs) + args.interval - \ 67 | min(epochs) % args.interval 68 | xs = np.arange(x0, max(epochs) + 1, args.interval) 69 | ys = [] 70 | for epoch in epochs[args.interval - 1::args.interval]: 71 | ys += log_dict[epoch][metric] 72 | 73 | # if training is aborted before eval of the last epoch 74 | # `xs` and `ys` will have different length and cause an error 75 | # check if `ys[-1]` is empty here 76 | if not log_dict[epoch][metric]: 77 | xs = xs[:-1] 78 | 79 | ax = plt.gca() 80 | ax.set_xticks(xs) 81 | plt.xlabel('epoch') 82 | plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o') 83 | else: 84 | xs = [] 85 | ys = [] 86 | num_iters_per_epoch = \ 87 | log_dict[epochs[args.interval-1]]['iter'][-1] 88 | for epoch in epochs[args.interval - 1::args.interval]: 89 | iters = log_dict[epoch]['iter'] 90 | if log_dict[epoch]['mode'][-1] == 'val': 91 | iters = iters[:-1] 92 | xs.append( 93 | np.array(iters) + (epoch - 1) * num_iters_per_epoch) 94 | ys.append(np.array(log_dict[epoch][metric][:len(iters)])) 95 | xs = np.concatenate(xs) 96 | ys = np.concatenate(ys) 97 | plt.xlabel('iter') 98 | plt.plot( 99 | xs, ys, label=legend[i * num_metrics + j], linewidth=0.5) 100 | plt.legend() 101 | if args.title is not None: 102 | plt.title(args.title) 103 | if args.out is None: 104 | plt.show() 105 | else: 106 | print(f'save curve to: {args.out}') 107 | plt.savefig(args.out) 108 | plt.cla() 109 | 110 | 111 | def add_plot_parser(subparsers): 112 | parser_plt = subparsers.add_parser( 113 | 'plot_curve', help='parser for plotting curves') 114 | parser_plt.add_argument( 115 | 'json_logs', 116 | type=str, 117 | nargs='+', 118 | help='path of train log in json format') 119 | parser_plt.add_argument( 120 | '--keys', 121 | type=str, 122 | nargs='+', 123 | default=['mAP_0.25'], 124 | help='the metric that you want to plot') 125 | parser_plt.add_argument('--title', type=str, help='title of figure') 126 | parser_plt.add_argument( 127 | '--legend', 128 | type=str, 129 | nargs='+', 130 | default=None, 131 | help='legend of each plot') 132 | parser_plt.add_argument( 133 | '--backend', type=str, default=None, help='backend of plt') 134 | parser_plt.add_argument( 135 | '--style', type=str, default='dark', help='style of plt') 136 | parser_plt.add_argument('--out', type=str, default=None) 137 | parser_plt.add_argument('--mode', type=str, default='train') 138 | parser_plt.add_argument('--interval', type=int, default=1) 139 | 140 | 141 | def add_time_parser(subparsers): 142 | parser_time = subparsers.add_parser( 143 | 'cal_train_time', 144 | help='parser for computing the average time per training iteration') 145 | parser_time.add_argument( 146 | 'json_logs', 147 | type=str, 148 | nargs='+', 149 | help='path of train log in json format') 150 | parser_time.add_argument( 151 | '--include-outliers', 152 | action='store_true', 153 | help='include the first value of every epoch when computing ' 154 | 'the average time') 155 | 156 | 157 | def parse_args(): 158 | parser = argparse.ArgumentParser(description='Analyze Json Log') 159 | # currently only support plot curve and calculate average train time 160 | subparsers = parser.add_subparsers(dest='task', help='task parser') 161 | add_plot_parser(subparsers) 162 | add_time_parser(subparsers) 163 | args = parser.parse_args() 164 | return args 165 | 166 | 167 | def load_json_logs(json_logs): 168 | # load and convert json_logs to log_dict, key is epoch, value is a sub dict 169 | # keys of sub dict is different metrics, e.g. memory, bbox_mAP 170 | # value of sub dict is a list of corresponding values of all iterations 171 | log_dicts = [dict() for _ in json_logs] 172 | for json_log, log_dict in zip(json_logs, log_dicts): 173 | with open(json_log, 'r') as log_file: 174 | for line in log_file: 175 | log = json.loads(line.strip()) 176 | # skip lines without `epoch` field 177 | if 'epoch' not in log: 178 | continue 179 | epoch = log.pop('epoch') 180 | if epoch not in log_dict: 181 | log_dict[epoch] = defaultdict(list) 182 | for k, v in log.items(): 183 | log_dict[epoch][k].append(v) 184 | return log_dicts 185 | 186 | 187 | def main(): 188 | args = parse_args() 189 | 190 | json_logs = args.json_logs 191 | for json_log in json_logs: 192 | assert json_log.endswith('.json') 193 | 194 | log_dicts = load_json_logs(json_logs) 195 | 196 | eval(args.task)(log_dicts, args) 197 | 198 | 199 | if __name__ == '__main__': 200 | main() 201 | -------------------------------------------------------------------------------- /tools/misc/browse_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | import warnings 4 | from mmcv import Config, DictAction, mkdir_or_exist, track_iter_progress 5 | from os import path as osp 6 | 7 | from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode, 8 | DepthInstance3DBoxes, LiDARInstance3DBoxes) 9 | from mmdet3d.core.visualizer import (show_multi_modality_result, show_result, 10 | show_seg_result) 11 | from mmdet3d.datasets import build_dataset 12 | 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser(description='Browse a dataset') 16 | parser.add_argument('config', help='train config file path') 17 | parser.add_argument( 18 | '--skip-type', 19 | type=str, 20 | nargs='+', 21 | default=['Normalize'], 22 | help='skip some useless pipeline') 23 | parser.add_argument( 24 | '--output-dir', 25 | default=None, 26 | type=str, 27 | help='If there is no display interface, you can save it') 28 | parser.add_argument( 29 | '--task', 30 | type=str, 31 | choices=['det', 'seg', 'multi_modality-det', 'mono-det'], 32 | help='Determine the visualization method depending on the task.') 33 | parser.add_argument( 34 | '--online', 35 | action='store_true', 36 | help='Whether to perform online visualization. Note that you often ' 37 | 'need a monitor to do so.') 38 | parser.add_argument( 39 | '--cfg-options', 40 | nargs='+', 41 | action=DictAction, 42 | help='override some settings in the used config, the key-value pair ' 43 | 'in xxx=yyy format will be merged into config file. If the value to ' 44 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 45 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 46 | 'Note that the quotation marks are necessary and that no white space ' 47 | 'is allowed.') 48 | args = parser.parse_args() 49 | return args 50 | 51 | 52 | def build_data_cfg(config_path, skip_type, cfg_options): 53 | """Build data config for loading visualization data.""" 54 | cfg = Config.fromfile(config_path) 55 | if cfg_options is not None: 56 | cfg.merge_from_dict(cfg_options) 57 | # import modules from string list. 58 | if cfg.get('custom_imports', None): 59 | from mmcv.utils import import_modules_from_strings 60 | import_modules_from_strings(**cfg['custom_imports']) 61 | # extract inner dataset of `RepeatDataset` as `cfg.data.train` 62 | # so we don't need to worry about it later 63 | if cfg.data.train['type'] == 'RepeatDataset': 64 | cfg.data.train = cfg.data.train.dataset 65 | train_data_cfg = cfg.data.train 66 | # eval_pipeline purely consists of loading functions 67 | # use eval_pipeline for data loading 68 | train_data_cfg['pipeline'] = [ 69 | x for x in cfg.eval_pipeline if x['type'] not in skip_type 70 | ] 71 | 72 | return cfg 73 | 74 | 75 | def to_depth_mode(points, bboxes): 76 | """Convert points and bboxes to Depth Coord and Depth Box mode.""" 77 | if points is not None: 78 | points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR, 79 | Coord3DMode.DEPTH) 80 | if bboxes is not None: 81 | bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR, 82 | Box3DMode.DEPTH) 83 | return points, bboxes 84 | 85 | 86 | def show_det_data(idx, dataset, out_dir, filename, show=False): 87 | """Visualize 3D point cloud and 3D bboxes.""" 88 | example = dataset.prepare_train_data(idx) 89 | points = example['points']._data.numpy() 90 | gt_bboxes = dataset.get_ann_info(idx)['gt_bboxes_3d'].tensor 91 | if dataset.box_mode_3d != Box3DMode.DEPTH: 92 | points, gt_bboxes = to_depth_mode(points, gt_bboxes) 93 | show_result( 94 | points, 95 | gt_bboxes.clone(), 96 | None, 97 | out_dir, 98 | filename, 99 | show=show, 100 | snapshot=True) 101 | 102 | 103 | def show_seg_data(idx, dataset, out_dir, filename, show=False): 104 | """Visualize 3D point cloud and segmentation mask.""" 105 | example = dataset.prepare_train_data(idx) 106 | points = example['points']._data.numpy() 107 | gt_seg = example['pts_semantic_mask']._data.numpy() 108 | show_seg_result( 109 | points, 110 | gt_seg.copy(), 111 | None, 112 | out_dir, 113 | filename, 114 | np.array(dataset.PALETTE), 115 | dataset.ignore_index, 116 | show=show, 117 | snapshot=True) 118 | 119 | 120 | def show_proj_bbox_img(idx, 121 | dataset, 122 | out_dir, 123 | filename, 124 | show=False, 125 | is_nus_mono=False): 126 | """Visualize 3D bboxes on 2D image by projection.""" 127 | try: 128 | example = dataset.prepare_train_data(idx) 129 | except AttributeError: # for Mono-3D datasets 130 | example = dataset.prepare_train_img(idx) 131 | gt_bboxes = dataset.get_ann_info(idx)['gt_bboxes_3d'] 132 | img_metas = example['img_metas']._data 133 | img = example['img']._data.numpy() 134 | # need to transpose channel to first dim 135 | img = img.transpose(1, 2, 0) 136 | # no 3D gt bboxes, just show img 137 | if gt_bboxes.tensor.shape[0] == 0: 138 | gt_bboxes = None 139 | if isinstance(gt_bboxes, DepthInstance3DBoxes): 140 | show_multi_modality_result( 141 | img, 142 | gt_bboxes, 143 | None, 144 | None, 145 | out_dir, 146 | filename, 147 | box_mode='depth', 148 | img_metas=img_metas, 149 | show=show) 150 | elif isinstance(gt_bboxes, LiDARInstance3DBoxes): 151 | show_multi_modality_result( 152 | img, 153 | gt_bboxes, 154 | None, 155 | img_metas['lidar2img'], 156 | out_dir, 157 | filename, 158 | box_mode='lidar', 159 | img_metas=img_metas, 160 | show=show) 161 | elif isinstance(gt_bboxes, CameraInstance3DBoxes): 162 | show_multi_modality_result( 163 | img, 164 | gt_bboxes, 165 | None, 166 | img_metas['cam_intrinsic'], 167 | out_dir, 168 | filename, 169 | box_mode='camera', 170 | img_metas=img_metas, 171 | show=show) 172 | else: 173 | # can't project, just show img 174 | warnings.warn( 175 | f'unrecognized gt box type {type(gt_bboxes)}, only show image') 176 | show_multi_modality_result( 177 | img, None, None, None, out_dir, filename, show=show) 178 | 179 | 180 | def main(): 181 | args = parse_args() 182 | 183 | if args.output_dir is not None: 184 | mkdir_or_exist(args.output_dir) 185 | 186 | cfg = build_data_cfg(args.config, args.skip_type, args.cfg_options) 187 | try: 188 | dataset = build_dataset( 189 | cfg.data.train, default_args=dict(filter_empty_gt=False)) 190 | except TypeError: # seg dataset doesn't have `filter_empty_gt` key 191 | dataset = build_dataset(cfg.data.train) 192 | data_infos = dataset.data_infos 193 | dataset_type = cfg.dataset_type 194 | 195 | # configure visualization mode 196 | vis_task = args.task # 'det', 'seg', 'multi_modality-det', 'mono-det' 197 | 198 | for idx, data_info in enumerate(track_iter_progress(data_infos)): 199 | if dataset_type in ['KittiDataset', 'WaymoDataset']: 200 | data_path = data_info['point_cloud']['velodyne_path'] 201 | elif dataset_type in [ 202 | 'ScanNetDataset', 'SUNRGBDDataset', 'ScanNetSegDataset', 203 | 'S3DISSegDataset' 204 | ]: 205 | data_path = data_info['pts_path'] 206 | elif dataset_type in ['NuScenesDataset', 'LyftDataset']: 207 | data_path = data_info['lidar_path'] 208 | elif dataset_type in ['NuScenesMonoDataset']: 209 | data_path = data_info['file_name'] 210 | else: 211 | raise NotImplementedError( 212 | f'unsupported dataset type {dataset_type}') 213 | 214 | file_name = osp.splitext(osp.basename(data_path))[0] 215 | 216 | if vis_task in ['det', 'multi_modality-det']: 217 | # show 3D bboxes on 3D point clouds 218 | show_det_data( 219 | idx, dataset, args.output_dir, file_name, show=args.online) 220 | if vis_task in ['multi_modality-det', 'mono-det']: 221 | # project 3D bboxes to 2D image 222 | show_proj_bbox_img( 223 | idx, 224 | dataset, 225 | args.output_dir, 226 | file_name, 227 | show=args.online, 228 | is_nus_mono=(dataset_type == 'NuScenesMonoDataset')) 229 | elif vis_task in ['seg']: 230 | # show 3D segmentation mask on 3D point clouds 231 | show_seg_data( 232 | idx, dataset, args.output_dir, file_name, show=args.online) 233 | 234 | 235 | if __name__ == '__main__': 236 | main() 237 | -------------------------------------------------------------------------------- /plugin/datasets/pipelines/loading.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | from mmdet.datasets.builder import PIPELINES 4 | from pyquaternion import Quaternion 5 | 6 | @PIPELINES.register_module(force=True) 7 | class LoadMultiViewImagesFromFiles(object): 8 | """Load multi channel images from a list of separate channel files. 9 | 10 | Expects results['img_filename'] to be a list of filenames. 11 | 12 | Args: 13 | to_float32 (bool): Whether to convert the img to float32. 14 | Defaults to False. 15 | color_type (str): Color type of the file. Defaults to 'unchanged'. 16 | """ 17 | 18 | def __init__(self, to_float32=False, color_type='unchanged'): 19 | self.to_float32 = to_float32 20 | self.color_type = color_type 21 | 22 | def __call__(self, results): 23 | """Call function to load multi-view image from files. 24 | 25 | Args: 26 | results (dict): Result dict containing multi-view image filenames. 27 | 28 | Returns: 29 | dict: The result dict containing the multi-view image data. \ 30 | Added keys and values are described below. 31 | 32 | - filename (str): Multi-view image filenames. 33 | - img (np.ndarray): Multi-view image arrays. 34 | - img_shape (tuple[int]): Shape of multi-view image arrays. 35 | - ori_shape (tuple[int]): Shape of original image arrays. 36 | - pad_shape (tuple[int]): Shape of padded image arrays. 37 | - scale_factor (float): Scale factor. 38 | - img_norm_cfg (dict): Normalization configuration of images. 39 | """ 40 | filename = results['img_filenames'] 41 | img = [mmcv.imread(name, self.color_type) for name in filename] 42 | if self.to_float32: 43 | img = [i.astype(np.float32) for i in img] 44 | results['img'] = img 45 | results['img_shape'] = [i.shape for i in img] 46 | results['ori_shape'] = [i.shape for i in img] 47 | # Set initial values for default meta_keys 48 | results['pad_shape'] = [i.shape for i in img] 49 | # results['scale_factor'] = 1.0 50 | num_channels = 1 if len(img[0].shape) < 3 else img[0].shape[2] 51 | results['img_norm_cfg'] = dict( 52 | mean=np.zeros(num_channels, dtype=np.float32), 53 | std=np.ones(num_channels, dtype=np.float32), 54 | to_rgb=False) 55 | results['img_fields'] = ['img'] 56 | return results 57 | 58 | def __repr__(self): 59 | """str: Return a string that describes the module.""" 60 | return f'{self.__class__.__name__} (to_float32={self.to_float32}, '\ 61 | f"color_type='{self.color_type}')" 62 | 63 | # FIXME 64 | @PIPELINES.register_module() 65 | class LoadPointsFromMultiSweepsHdmapNet(object): 66 | """Load points from multiple sweeps. 67 | 68 | This is usually used for nuScenes dataset to utilize previous sweeps. 69 | 70 | Args: 71 | sweeps_num (int): Number of sweeps. Defaults to 10. 72 | load_dim (int): Dimension number of the loaded points. Defaults to 5. 73 | use_dim (list[int]): Which dimension to use. Defaults to [0, 1, 2, 4]. 74 | file_client_args (dict): Config dict of file clients, refer to 75 | https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py 76 | for more details. Defaults to dict(backend='disk'). 77 | pad_empty_sweeps (bool): Whether to repeat keyframe when 78 | sweeps is empty. Defaults to False. 79 | remove_close (bool): Whether to remove close points. 80 | Defaults to False. 81 | test_mode (bool): If test_model=True used for testing, it will not 82 | randomly sample sweeps but select the nearest N frames. 83 | Defaults to False. 84 | """ 85 | 86 | def __init__(self, 87 | sweeps_num=10, 88 | load_dim=5, 89 | use_dim=[0, 1, 2, 4], 90 | file_client_args=dict(backend='disk'), 91 | pad_empty_sweeps=False, 92 | remove_close=False, 93 | test_mode=False): 94 | self.load_dim = load_dim 95 | self.sweeps_num = sweeps_num 96 | self.use_dim = use_dim 97 | self.file_client_args = file_client_args.copy() 98 | self.file_client = None 99 | self.pad_empty_sweeps = pad_empty_sweeps 100 | self.remove_close = remove_close 101 | self.test_mode = test_mode 102 | 103 | def _load_points(self, pts_filename): 104 | """Private function to load point clouds data. 105 | 106 | Args: 107 | pts_filename (str): Filename of point clouds data. 108 | 109 | Returns: 110 | np.ndarray: An array containing point clouds data. 111 | """ 112 | if self.file_client is None: 113 | self.file_client = mmcv.FileClient(**self.file_client_args) 114 | try: 115 | pts_bytes = self.file_client.get(pts_filename) 116 | points = np.frombuffer(pts_bytes, dtype=np.float32) 117 | except ConnectionError: 118 | mmcv.check_file_exist(pts_filename) 119 | if pts_filename.endswith('.npy'): 120 | points = np.load(pts_filename) 121 | else: 122 | points = np.fromfile(pts_filename, dtype=np.float32) 123 | return points 124 | 125 | def _remove_close(self, points, radius=1.0): 126 | """Removes point too close within a certain radius from origin. 127 | 128 | Args: 129 | points (np.ndarray | :obj:`BasePoints`): Sweep points. 130 | radius (float): Radius below which points are removed. 131 | Defaults to 1.0. 132 | 133 | Returns: 134 | np.ndarray: Points after removing. 135 | """ 136 | if isinstance(points, np.ndarray): 137 | points_numpy = points 138 | elif isinstance(points, BasePoints): 139 | points_numpy = points.tensor.numpy() 140 | else: 141 | raise NotImplementedError 142 | x_filt = np.abs(points_numpy[:, 0]) < radius 143 | y_filt = np.abs(points_numpy[:, 1]) < radius 144 | not_close = np.logical_not(np.logical_and(x_filt, y_filt)) 145 | return points[not_close] 146 | 147 | def __call__(self, results): 148 | """Call function to load multi-sweep point clouds from files. 149 | 150 | Args: 151 | results (dict): Result dict containing multi-sweep point cloud \ 152 | filenames. 153 | 154 | Returns: 155 | dict: The result dict containing the multi-sweep points data. \ 156 | Added key and value are described below. 157 | 158 | - points (np.ndarray | :obj:`BasePoints`): Multi-sweep point \ 159 | cloud arrays. 160 | """ 161 | l2e_r = results['lidar2ego_r'] 162 | l2e_t = results['lidar2ego_t'] 163 | l2e_r_mat = Quaternion(l2e_r).rotation_matrix 164 | 165 | points = results['points'] 166 | points.tensor[:, 4] = 0 167 | 168 | points.tensor[:, :3] = points.tensor[:, :3] @ l2e_r_mat.T 169 | points.tensor[:, :3] = points.tensor[:, :3] + np.array(l2e_t) 170 | 171 | sweep_points_list = [points] 172 | ts = results['timestamp'] 173 | 174 | if self.pad_empty_sweeps and len(results['sweeps']) == 0: 175 | for i in range(self.sweeps_num): 176 | if self.remove_close: 177 | sweep_points_list.append(self._remove_close(points)) 178 | else: 179 | sweep_points_list.append(points) 180 | else: 181 | if len(results['sweeps']) <= self.sweeps_num: 182 | choices = np.arange(len(results['sweeps'])) 183 | elif self.test_mode: 184 | choices = np.arange(self.sweeps_num) 185 | else: 186 | choices = np.random.choice( 187 | len(results['sweeps']), self.sweeps_num, replace=False) 188 | for idx in choices: 189 | sweep = results['sweeps'][idx] 190 | points_sweep = self._load_points(sweep['data_path']) 191 | points_sweep = np.copy(points_sweep).reshape(-1, self.load_dim) 192 | if self.remove_close: 193 | points_sweep = self._remove_close(points_sweep,radius=2.2) 194 | sweep_ts = sweep['timestamp'] / 1e6 195 | 196 | # history ego to current ego 197 | sensor2ego_r = (l2e_r_mat @ sweep['sensor2lidar_rotation']) 198 | sensor2ego_t = sweep['sensor2lidar_translation'] + \ 199 | l2e_t @ np.linalg.inv(l2e_r_mat).T 200 | 201 | points_sweep[:, :3] = points_sweep[:, :3] @ sensor2ego_r.T 202 | points_sweep[:, :3] += sensor2ego_t 203 | 204 | points_sweep[:, 4] = ts - sweep_ts 205 | points_sweep = points.new_point(points_sweep) 206 | sweep_points_list.append(points_sweep) 207 | 208 | points = points.cat(sweep_points_list) 209 | points = points[:, self.use_dim] 210 | results['points'] = points 211 | return results 212 | 213 | def __repr__(self): 214 | """str: Return a string that describes the module.""" 215 | return f'{self.__class__.__name__}(sweeps_num={self.sweeps_num})' -------------------------------------------------------------------------------- /plugin/models/mapers/vectormapnet.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.nn.utils.rnn import pad_sequence 7 | from torchvision.models.resnet import resnet18, resnet50 8 | 9 | from mmdet3d.models.builder import (build_backbone, build_head, 10 | build_neck) 11 | 12 | from mmdet3d.models.builder import DETECTORS as MAPPERS 13 | from .base_mapper import BaseMapper 14 | 15 | 16 | @MAPPERS.register_module() 17 | class VectorMapNet(BaseMapper): 18 | 19 | def __init__(self, 20 | backbone_cfg=dict(), 21 | head_cfg=dict( 22 | vert_net_cfg=dict(), 23 | face_net_cfg=dict(), 24 | ), 25 | neck_input_channels=128, 26 | neck_cfg=None, 27 | with_auxiliary_head=False, 28 | only_det=False, 29 | train_cfg=None, 30 | test_cfg=None, 31 | pretrained=None, 32 | model_name=None, **kwargs): 33 | super(VectorMapNet, self).__init__() 34 | 35 | 36 | #Attribute 37 | self.model_name = model_name 38 | self.last_epoch = None 39 | self.only_det = only_det 40 | 41 | self.backbone = build_backbone(backbone_cfg) 42 | 43 | if neck_cfg is not None: 44 | self.neck_neck = build_backbone(neck_cfg.backbone) 45 | self.neck_neck.conv1 = nn.Conv2d( 46 | neck_input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) 47 | self.neck_project = build_neck(neck_cfg.neck) 48 | self.neck = self.multiscale_neck 49 | else: 50 | trunk = resnet18(pretrained=False, zero_init_residual=True) 51 | self.neck = nn.Sequential( 52 | nn.Conv2d(neck_input_channels, 64, kernel_size=(7, 7), stride=( 53 | 2, 2), padding=(3, 3), bias=False), 54 | nn.BatchNorm2d(64), 55 | nn.ReLU(inplace=True), 56 | nn.MaxPool2d(kernel_size=3, stride=2, padding=1, 57 | dilation=1, ceil_mode=False), 58 | trunk.layer1, 59 | nn.Conv2d(64, 128, kernel_size=1, bias=False), 60 | ) 61 | 62 | # BEV 63 | if hasattr(self.backbone,'bev_w'): 64 | self.bev_w = self.backbone.bev_w 65 | self.bev_h = self.backbone.bev_h 66 | 67 | 68 | self.head = build_head(head_cfg) 69 | 70 | def multiscale_neck(self, bev_embedding): 71 | 72 | multi_feat = self.neck_neck(bev_embedding) 73 | multi_feat = self.neck_project(multi_feat) 74 | 75 | return multi_feat 76 | 77 | def forward_train(self, img, polys, points=None, img_metas=None, **kwargs): 78 | ''' 79 | Args: 80 | img: torch.Tensor of shape [B, N, 3, H, W] 81 | N: number of cams 82 | vectors: list[list[Tuple(lines, length, label)]] 83 | - lines: np.array of shape [num_points, 2]. 84 | - length: int 85 | - label: int 86 | len(vectors) = batch_size 87 | len(vectors[_b]) = num of lines in sample _b 88 | img_metas: 89 | img_metas['lidar2img']: [B, N, 4, 4] 90 | 91 | Out: 92 | loss, log_vars, num_sample 93 | ''' 94 | # prepare labels and images 95 | batch, img, img_metas, valid_idx, points = self.batch_data( 96 | polys, img, img_metas, img.device, points) 97 | 98 | # corner cases use hard code to prevent code fail 99 | if self.last_epoch is None: 100 | self.last_epoch = [batch, img, img_metas, valid_idx, points] 101 | 102 | if len(valid_idx)==0: 103 | batch, img, img_metas, valid_idx, points = self.last_epoch 104 | else: 105 | del self.last_epoch 106 | self.last_epoch = [batch, img, img_metas, valid_idx, points] 107 | 108 | # Backbone 109 | _bev_feats = self.backbone(img, img_metas=img_metas, points=points) 110 | img_shape = \ 111 | [_bev_feats.shape[2:] for i in range(_bev_feats.shape[0])] 112 | 113 | # Neck 114 | bev_feats = self.neck(_bev_feats) 115 | 116 | preds_dict, losses_dict = \ 117 | self.head(batch, 118 | context={ 119 | 'bev_embeddings': bev_feats, 120 | 'batch_input_shape': _bev_feats.shape[2:], 121 | 'img_shape': img_shape, 122 | 'raw_bev_embeddings': _bev_feats}, 123 | only_det=self.only_det) 124 | 125 | # format outputs 126 | loss = 0 127 | for name, var in losses_dict.items(): 128 | loss = loss + var 129 | 130 | # update the log 131 | log_vars = {k: v.item() for k, v in losses_dict.items()} 132 | log_vars.update({'total': loss.item()}) 133 | 134 | num_sample = img.size(0) 135 | 136 | return loss, log_vars, num_sample 137 | 138 | @torch.no_grad() 139 | def forward_test(self, img, polys, points=None, img_metas=None, **kwargs): 140 | ''' 141 | inference pipeline 142 | ''' 143 | 144 | # prepare labels and images 145 | batch, img, img_metas, valid_idx, points = self.batch_data( 146 | polys, img, img_metas, img.device, points) 147 | 148 | # corner cases use hard code to prevent code fail 149 | if len(valid_idx)==0: 150 | return [None] 151 | 152 | token = [] 153 | for img_meta in img_metas: 154 | token.append(img_meta['sample_idx']) 155 | 156 | _bev_feats = self.backbone(img, img_metas, points=points) 157 | img_shape = [_bev_feats.shape[2:] for i in range(_bev_feats.shape[0])] 158 | # Neck 159 | bev_feats = self.neck(_bev_feats) 160 | 161 | context = {'bev_embeddings': bev_feats, 162 | 'batch_input_shape': _bev_feats.shape[2:], 163 | 'img_shape': img_shape, # XXX 164 | 'raw_bev_embeddings': _bev_feats} 165 | 166 | preds_dict = self.head(batch, 167 | context=context, 168 | condition_on_det=True, 169 | gt_condition=False, 170 | only_det=self.only_det) 171 | 172 | # Hard Code 173 | if preds_dict is None: 174 | return [None] 175 | 176 | results_list = self.head.post_process(preds_dict, token, batch, only_det=self.only_det) 177 | 178 | return results_list 179 | 180 | def batch_data(self, polys, imgs, img_metas, device, points=None): 181 | 182 | # filter none vector's case 183 | valid_idx = [i for i in range(len(polys)) if len(polys[i])] 184 | imgs = imgs[valid_idx] 185 | img_metas = [img_metas[i] for i in valid_idx] 186 | polys = [polys[i] for i in valid_idx] 187 | 188 | if points is not None: 189 | points = [points[i] for i in valid_idx] 190 | points = self.batch_points(points) 191 | 192 | if len(valid_idx) == 0: 193 | return None, None, None, valid_idx, None 194 | 195 | batch = {} 196 | batch['det'] = format_det(polys,device) 197 | batch['gen'] = format_gen(polys,device) 198 | 199 | return batch, imgs, img_metas, valid_idx, points 200 | 201 | def batch_points(self, points): 202 | 203 | pad_points = pad_sequence(points, batch_first=True) 204 | 205 | points_mask = torch.zeros_like(pad_points[:,:,0]).bool() 206 | for i in range(len(points)): 207 | valid_num = points[i].shape[0] 208 | points_mask[i][:valid_num] = True 209 | 210 | return (pad_points, points_mask) 211 | 212 | 213 | def format_det(polys, device): 214 | 215 | batch = { 216 | 'class_label':[], 217 | 'batch_idx':[], 218 | 'bbox': [], 219 | } 220 | 221 | for batch_idx, poly in enumerate(polys): 222 | 223 | keypoint_label = torch.from_numpy(poly['det_label']).to(device) 224 | keypoint = torch.from_numpy(poly['keypoint']).to(device) 225 | 226 | batch['class_label'].append(keypoint_label) 227 | batch['bbox'].append(keypoint) 228 | 229 | return batch 230 | 231 | 232 | def format_gen(polys,device): 233 | 234 | line_cls = [] 235 | polylines, polyline_masks, polyline_weights = [], [], [] 236 | bbox, line_cls, line_bs_idx = [], [], [] 237 | 238 | for batch_idx, poly in enumerate(polys): 239 | 240 | # convert to cuda tensor 241 | for k in poly.keys(): 242 | if isinstance(poly[k],np.ndarray): 243 | poly[k] = torch.from_numpy(poly[k]).to(device) 244 | else: 245 | poly[k] = [torch.from_numpy(v).to(device) for v in poly[k]] 246 | 247 | line_cls += poly['gen_label'] 248 | line_bs_idx += [batch_idx]*len(poly['gen_label']) 249 | 250 | # condition 251 | bbox += poly['qkeypoint'] 252 | 253 | # out 254 | polylines += poly['polylines'] 255 | polyline_masks += poly['polyline_masks'] 256 | polyline_weights += poly['polyline_weights'] 257 | 258 | batch = {} 259 | batch['lines_bs_idx'] = torch.tensor( 260 | line_bs_idx, dtype=torch.long, device=device) 261 | batch['lines_cls'] = torch.tensor( 262 | line_cls, dtype=torch.long, device=device) 263 | batch['bbox_flat'] = torch.stack(bbox, 0) 264 | 265 | # padding 266 | batch['polylines'] = pad_sequence(polylines, batch_first=True) 267 | batch['polyline_masks'] = pad_sequence(polyline_masks, batch_first=True) 268 | batch['polyline_weights'] = pad_sequence(polyline_weights, batch_first=True) 269 | 270 | return batch 271 | 272 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import argparse 4 | import copy 5 | import mmcv 6 | import os 7 | import time 8 | import torch 9 | import warnings 10 | from mmcv import Config, DictAction 11 | from mmcv.runner import get_dist_info, init_dist 12 | from os import path as osp 13 | 14 | from mmdet import __version__ as mmdet_version 15 | from mmdet3d import __version__ as mmdet3d_version 16 | from mmdet3d.apis import train_model 17 | from mmdet3d.datasets import build_dataset 18 | from mmdet3d.utils import collect_env, get_root_logger 19 | from mmseg import __version__ as mmseg_version 20 | 21 | # warper 22 | from mmdet_train import set_random_seed 23 | # from builder import build_model 24 | from mmdet3d.models import build_model 25 | 26 | 27 | def parse_args(): 28 | parser = argparse.ArgumentParser(description='Train a detector') 29 | parser.add_argument('config', help='train config file path') 30 | parser.add_argument('--work-dir', help='the dir to save logs and models') 31 | parser.add_argument( 32 | '--resume-from', help='the checkpoint file to resume from') 33 | parser.add_argument( 34 | '--no-validate', 35 | action='store_true', 36 | help='whether not to evaluate the checkpoint during training') 37 | group_gpus = parser.add_mutually_exclusive_group() 38 | group_gpus.add_argument( 39 | '--gpus', 40 | type=int, 41 | help='number of gpus to use ' 42 | '(only applicable to non-distributed training)') 43 | group_gpus.add_argument( 44 | '--gpu-ids', 45 | type=int, 46 | nargs='+', 47 | help='ids of gpus to use ' 48 | '(only applicable to non-distributed training)') 49 | parser.add_argument('--seed', type=int, default=0, help='random seed') 50 | parser.add_argument( 51 | '--deterministic', 52 | action='store_true', 53 | help='whether to set deterministic options for CUDNN backend.') 54 | parser.add_argument( 55 | '--options', 56 | nargs='+', 57 | action=DictAction, 58 | help='override some settings in the used config, the key-value pair ' 59 | 'in xxx=yyy format will be merged into config file (deprecate), ' 60 | 'change to --cfg-options instead.') 61 | parser.add_argument( 62 | '--cfg-options', 63 | nargs='+', 64 | action=DictAction, 65 | help='override some settings in the used config, the key-value pair ' 66 | 'in xxx=yyy format will be merged into config file. If the value to ' 67 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 68 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 69 | 'Note that the quotation marks are necessary and that no white space ' 70 | 'is allowed.') 71 | parser.add_argument( 72 | '--launcher', 73 | choices=['none', 'pytorch', 'slurm', 'mpi'], 74 | default='none', 75 | help='job launcher') 76 | parser.add_argument('--local_rank', type=int, default=0) 77 | parser.add_argument( 78 | '--autoscale-lr', 79 | action='store_true', 80 | help='automatically scale lr with the number of gpus') 81 | args = parser.parse_args() 82 | if 'LOCAL_RANK' not in os.environ: 83 | os.environ['LOCAL_RANK'] = str(args.local_rank) 84 | 85 | if args.options and args.cfg_options: 86 | raise ValueError( 87 | '--options and --cfg-options cannot be both specified, ' 88 | '--options is deprecated in favor of --cfg-options') 89 | if args.options: 90 | warnings.warn('--options is deprecated in favor of --cfg-options') 91 | args.cfg_options = args.options 92 | 93 | return args 94 | 95 | 96 | def main(): 97 | args = parse_args() 98 | 99 | cfg = Config.fromfile(args.config) 100 | if args.cfg_options is not None: 101 | cfg.merge_from_dict(args.cfg_options) 102 | # import modules from string list. 103 | if cfg.get('custom_imports', None): 104 | from mmcv.utils import import_modules_from_strings 105 | import_modules_from_strings(**cfg['custom_imports']) 106 | 107 | # set cudnn_benchmark 108 | if cfg.get('cudnn_benchmark', False): 109 | torch.backends.cudnn.benchmark = True 110 | 111 | # import modules from plguin/xx, registry will be updated 112 | import sys 113 | sys.path.append(os.path.abspath('.')) 114 | if hasattr(cfg, 'plugin'): 115 | if cfg.plugin: 116 | import importlib 117 | if hasattr(cfg, 'plugin_dir'): 118 | def import_path(plugin_dir): 119 | _module_dir = os.path.dirname(plugin_dir) 120 | _module_dir = _module_dir.split('/') 121 | _module_path = _module_dir[0] 122 | 123 | for m in _module_dir[1:]: 124 | _module_path = _module_path + '.' + m 125 | print(_module_path) 126 | plg_lib = importlib.import_module(_module_path) 127 | 128 | plugin_dirs = cfg.plugin_dir 129 | if not isinstance(plugin_dirs,list): 130 | plugin_dirs = [plugin_dirs,] 131 | for plugin_dir in plugin_dirs: 132 | import_path(plugin_dir) 133 | 134 | else: 135 | # import dir is the dirpath for the config file 136 | _module_dir = os.path.dirname(args.config) 137 | _module_dir = _module_dir.split('/') 138 | _module_path = _module_dir[0] 139 | for m in _module_dir[1:]: 140 | _module_path = _module_path + '.' + m 141 | print(_module_path) 142 | plg_lib = importlib.import_module(_module_path) 143 | 144 | # work_dir is determined in this priority: CLI > segment in file > filename 145 | if args.work_dir is not None: 146 | # update configs according to CLI args if args.work_dir is not None 147 | cfg.work_dir = args.work_dir 148 | elif cfg.get('work_dir', None) is None: 149 | # use config filename as default work_dir if cfg.work_dir is None 150 | cfg.work_dir = osp.join('./work_dirs', 151 | osp.splitext(osp.basename(args.config))[0]) 152 | if args.resume_from is not None: 153 | cfg.resume_from = args.resume_from 154 | if args.gpu_ids is not None: 155 | cfg.gpu_ids = args.gpu_ids 156 | else: 157 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) 158 | 159 | if args.autoscale_lr: 160 | # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) 161 | cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 162 | 163 | # init distributed env first, since logger depends on the dist info. 164 | if args.launcher == 'none': 165 | distributed = False 166 | else: 167 | distributed = True 168 | init_dist(args.launcher, **cfg.dist_params) 169 | # re-set gpu_ids with distributed training mode 170 | _, world_size = get_dist_info() 171 | cfg.gpu_ids = range(world_size) 172 | 173 | # create work_dir 174 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) 175 | # dump config 176 | cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) 177 | # init the logger before other steps 178 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 179 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log') 180 | # specify logger name, if we still use 'mmdet', the output info will be 181 | # filtered and won't be saved in the log_file 182 | # TODO: ugly workaround to judge whether we are training det or seg model 183 | if cfg.model.type in ['EncoderDecoder3D']: 184 | logger_name = 'mmseg' 185 | else: 186 | logger_name = 'mmdet' 187 | logger = get_root_logger( 188 | log_file=log_file, log_level=cfg.log_level, name=logger_name) 189 | 190 | # init the meta dict to record some important information such as 191 | # environment info and seed, which will be logged 192 | meta = dict() 193 | # log env info 194 | env_info_dict = collect_env() 195 | env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) 196 | dash_line = '-' * 60 + '\n' 197 | logger.info('Environment info:\n' + dash_line + env_info + '\n' + 198 | dash_line) 199 | meta['env_info'] = env_info 200 | meta['config'] = cfg.pretty_text 201 | 202 | # log some basic info 203 | logger.info(f'Distributed training: {distributed}') 204 | logger.info(f'Config:\n{cfg.pretty_text}') 205 | 206 | # set random seeds 207 | if args.seed is not None: 208 | logger.info(f'Set random seed to {args.seed}, ' 209 | f'deterministic: {args.deterministic}') 210 | set_random_seed(args.seed, deterministic=args.deterministic) 211 | cfg.seed = args.seed 212 | meta['seed'] = args.seed 213 | meta['exp_name'] = osp.basename(args.config) 214 | 215 | model = build_model( 216 | cfg.model, 217 | train_cfg=cfg.get('train_cfg'), 218 | test_cfg=cfg.get('test_cfg')) 219 | model.init_weights() 220 | 221 | logger.info(f'Model:\n{model}') 222 | cfg.data.train.work_dir = cfg.work_dir 223 | cfg.data.val.work_dir = cfg.work_dir 224 | datasets = [build_dataset(cfg.data.train)] 225 | if len(cfg.workflow) == 2: 226 | val_dataset = copy.deepcopy(cfg.data.val) 227 | # in case we use a dataset wrapper 228 | if 'dataset' in cfg.data.train: 229 | val_dataset.pipeline = cfg.data.train.dataset.pipeline 230 | else: 231 | val_dataset.pipeline = cfg.data.train.pipeline 232 | # set test_mode=False here in deep copied config 233 | # which do not affect AP/AR calculation later 234 | # refer to https://mmdetection3d.readthedocs.io/en/latest/tutorials/customize_runtime.html#customize-workflow # noqa 235 | val_dataset.test_mode = False 236 | datasets.append(build_dataset(val_dataset)) 237 | if cfg.checkpoint_config is not None: 238 | # save mmdet version, config file content and class names in 239 | # checkpoints as meta data 240 | cfg.checkpoint_config.meta = dict( 241 | mmdet_version=mmdet_version, 242 | mmseg_version=mmseg_version, 243 | mmdet3d_version=mmdet3d_version, 244 | config=cfg.pretty_text, 245 | CLASSES=None, 246 | PALETTE=datasets[0].PALETTE # for segmentors 247 | if hasattr(datasets[0], 'PALETTE') else None) 248 | # add an attribute for visualization convenience 249 | # model.CLASSES = datasets[0].CLASSES 250 | train_model( 251 | model, 252 | datasets, 253 | cfg, 254 | distributed=distributed, 255 | validate=(not args.no_validate), 256 | timestamp=timestamp, 257 | meta=meta) 258 | 259 | 260 | if __name__ == '__main__': 261 | main() 262 | --------------------------------------------------------------------------------