├── LICENSE ├── README.md ├── debug_test.py ├── debug_train.py ├── docker └── Dockerfile ├── docs ├── can_bus.ipynb ├── getting_started.md ├── install.md └── prepare_dataset.md ├── figs ├── 000868a72138448191b4092f75ed7776.jpg ├── 0141260a339d4b37addb55818bbae718.jpg ├── arch.png └── sota_results.png ├── icon └── car.png ├── projects ├── __init__.py ├── configs │ ├── _base_ │ │ ├── datasets │ │ │ ├── coco_instance.py │ │ │ ├── kitti-3d-3class.py │ │ │ ├── kitti-3d-car.py │ │ │ ├── lyft-3d.py │ │ │ ├── nuim_instance.py │ │ │ ├── nus-3d.py │ │ │ ├── nus-mono3d.py │ │ │ ├── range100_lyft-3d.py │ │ │ ├── s3dis-3d-5class.py │ │ │ ├── s3dis_seg-3d-13class.py │ │ │ ├── scannet-3d-18class.py │ │ │ ├── scannet_seg-3d-20class.py │ │ │ ├── sunrgbd-3d-10class.py │ │ │ ├── waymoD5-3d-3class.py │ │ │ └── waymoD5-3d-car.py │ │ ├── default_runtime.py │ │ ├── models │ │ │ ├── 3dssd.py │ │ │ ├── cascade_mask_rcnn_r50_fpn.py │ │ │ ├── centerpoint_01voxel_second_secfpn_nus.py │ │ │ ├── centerpoint_02pillar_second_secfpn_nus.py │ │ │ ├── fcos3d.py │ │ │ ├── groupfree3d.py │ │ │ ├── h3dnet.py │ │ │ ├── hv_pointpillars_fpn_lyft.py │ │ │ ├── hv_pointpillars_fpn_nus.py │ │ │ ├── hv_pointpillars_fpn_range100_lyft.py │ │ │ ├── hv_pointpillars_secfpn_kitti.py │ │ │ ├── hv_pointpillars_secfpn_waymo.py │ │ │ ├── hv_second_secfpn_kitti.py │ │ │ ├── hv_second_secfpn_waymo.py │ │ │ ├── imvotenet_image.py │ │ │ ├── mask_rcnn_r50_fpn.py │ │ │ ├── paconv_cuda_ssg.py │ │ │ ├── paconv_ssg.py │ │ │ ├── parta2.py │ │ │ ├── pointnet2_msg.py │ │ │ ├── pointnet2_ssg.py │ │ │ └── votenet.py │ │ └── schedules │ │ │ ├── cosine.py │ │ │ ├── cyclic_20e.py │ │ │ ├── cyclic_40e.py │ │ │ ├── mmdet_schedule_1x.py │ │ │ ├── schedule_2x.py │ │ │ ├── schedule_3x.py │ │ │ ├── seg_cosine_150e.py │ │ │ ├── seg_cosine_200e.py │ │ │ └── seg_cosine_50e.py │ ├── bevformer │ │ ├── bevformer_base.py │ │ ├── bevformer_base_seg_det_150x150.py │ │ ├── bevformer_small.py │ │ ├── bevformer_small_det.py │ │ ├── bevformer_small_seg.py │ │ ├── bevformer_small_seg_det.py │ │ ├── bevformer_small_seg_det_300x300.py │ │ └── bevformer_tiny.py │ ├── bevformer_fp16 │ │ ├── bevformer_small_seg_det_fp16.py │ │ └── bevformer_tiny_fp16.py │ └── datasets │ │ ├── custom_lyft-3d.py │ │ ├── custom_nus-3d.py │ │ └── custom_waymo-3d.py └── mmdet3d_plugin │ ├── __init__.py │ ├── apis │ ├── __init__.py │ └── test.py │ ├── bevformer │ ├── __init__.py │ ├── apis │ │ ├── __init__.py │ │ ├── mmdet_train.py │ │ ├── test.py │ │ └── train.py │ ├── dense_heads │ │ ├── __init__.py │ │ ├── bevformer_head.py │ │ ├── bevformer_seg_head.py │ │ └── loss_utils.py │ ├── detectors │ │ ├── __init__.py │ │ ├── bevformer.py │ │ └── bevformer_fp16.py │ ├── hooks │ │ ├── __init__.py │ │ └── custom_hooks.py │ ├── loss │ │ ├── __init__.py │ │ └── loss.py │ ├── modules │ │ ├── TransformerLSS.py │ │ ├── __init__.py │ │ ├── builder.py │ │ ├── custom_base_transformer_layer.py │ │ ├── decoder.py │ │ ├── encoder.py │ │ ├── multi_scale_deformable_attn_function.py │ │ ├── seg_subnet.py │ │ ├── seg_sugnet.py │ │ ├── seg_sunet.py │ │ ├── spatial_cross_attention.py │ │ ├── temporal_self_attention.py │ │ └── transformer.py │ └── runner │ │ ├── __init__.py │ │ ├── epoch_based_runner.py │ │ └── loss │ │ └── __init__.py │ ├── core │ ├── bbox │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── hungarian_assigner_3d.cpython-38.pyc │ │ │ └── hungarian_assigner_3d.py │ │ ├── coders │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── nms_free_coder.cpython-38.pyc │ │ │ └── nms_free_coder.py │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── match_cost.cpython-38.pyc │ │ │ └── match_cost.py │ │ └── util.py │ └── evaluation │ │ ├── __init__.py │ │ ├── eval_hooks.py │ │ └── kitti2waymo.py │ ├── datasets │ ├── __init__.py │ ├── builder.py │ ├── mtl_nuscenes_dataset_ego.py │ ├── nuscenes_dataset.py │ ├── nuscenes_mono_dataset.py │ ├── nuscnes_eval.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── bevsegmentation.py │ │ ├── binimg.py │ │ ├── formating.py │ │ ├── loading.py │ │ ├── motion_labels.py │ │ ├── rasterize.py │ │ └── transform_3d.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed_sampler.py │ │ ├── group_sampler.py │ │ └── sampler.py │ └── utils │ │ ├── __init__.py │ │ ├── geometry.py │ │ ├── instance.py │ │ ├── rasterize.py │ │ ├── vector_map.py │ │ └── warper.py │ ├── metrics │ ├── __init__.py │ └── metrics.py │ └── models │ ├── backbones │ ├── __init__.py │ └── vovnet.py │ ├── hooks │ ├── __init__.py │ └── hooks.py │ ├── opt │ ├── __init__.py │ └── adamw.py │ └── utils │ ├── __init__.py │ ├── bricks.py │ ├── grid_mask.py │ ├── position_embedding.py │ └── visual.py ├── requirements.txt ├── tools ├── __init__.py ├── analysis_tools │ ├── __init__.py │ ├── analyze_logs.py │ ├── benchmark.py │ ├── get_params.py │ └── visual.py ├── create_data.py ├── data_converter │ ├── __init__.py │ ├── create_gt_database.py │ ├── indoor_converter.py │ ├── kitti_converter.py │ ├── kitti_data_utils.py │ ├── lyft_converter.py │ ├── lyft_data_fixer.py │ ├── nuimage_converter.py │ ├── nuscenes_converter.py │ ├── s3dis_data_utils.py │ ├── scannet_data_utils.py │ ├── sunrgbd_data_utils.py │ └── waymo_converter.py ├── debug_train.py ├── dist_test.sh ├── dist_train.sh ├── fp16 │ ├── dist_train.sh │ └── train.py ├── misc │ ├── browse_dataset.py │ ├── fuse_conv_bn.py │ ├── print_config.py │ └── visualize_results.py ├── model_converters │ ├── convert_votenet_checkpoints.py │ ├── publish_model.py │ └── regnet2mmdet.py ├── test.py └── train.py ├── utils ├── compose_map_det_res.py ├── compose_small_base_vidio.py ├── compose_vidio.py ├── padding_seg_to_det.py └── test_grid_sample.py └── visual_det_seg.py /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | 2 | ARG CUDA_VERSION=11.3.1 3 | ARG OS_VERSION=20.04 4 | # pull a prebuilt image 5 | FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu${OS_VERSION} 6 | 7 | # setup timezone 8 | ENV TZ=US/Pacific 9 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone 10 | 11 | SHELL ["/bin/bash", "-c"] 12 | 13 | # Required to build Ubuntu 20.04 without user prompts with DLFW container 14 | ENV DEBIAN_FRONTEND=noninteractive 15 | 16 | # Install requried libraries 17 | RUN apt-get update && apt-get install -y software-properties-common 18 | RUN add-apt-repository ppa:ubuntu-toolchain-r/test 19 | RUN apt-get update && apt-get install -y --no-install-recommends \ 20 | libcurl4-openssl-dev \ 21 | wget \ 22 | zlib1g-dev \ 23 | git \ 24 | sudo \ 25 | ssh \ 26 | libssl-dev \ 27 | pbzip2 \ 28 | pv \ 29 | bzip2 \ 30 | unzip \ 31 | devscripts \ 32 | lintian \ 33 | fakeroot \ 34 | dh-make \ 35 | build-essential \ 36 | curl \ 37 | ca-certificates \ 38 | libx11-6 \ 39 | nano \ 40 | graphviz \ 41 | libgl1-mesa-glx \ 42 | openssh-server \ 43 | apt-transport-https 44 | 45 | # Install other dependencies 46 | RUN apt-get update && apt-get install -y --no-install-recommends \ 47 | libgtk2.0-0 \ 48 | libcanberra-gtk-module \ 49 | libsm6 libxext6 libxrender-dev \ 50 | libgtk2.0-dev pkg-config \ 51 | libopenmpi-dev \ 52 | && sudo rm -rf /var/lib/apt/lists/* 53 | 54 | # Install Miniconda 55 | RUN wget \ 56 | https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 57 | && mkdir /root/.conda \ 58 | && bash Miniconda3-latest-Linux-x86_64.sh -b \ 59 | && rm -f Miniconda3-latest-Linux-x86_64.sh 60 | 61 | ENV CONDA_DEFAULT_ENV=${project} 62 | ENV CONDA_PREFIX=/root/miniconda3/envs/$CONDA_DEFAULT_ENV 63 | ENV PATH=/root/miniconda3/bin:$CONDA_PREFIX/bin:$PATH 64 | 65 | # install python 3.8 66 | RUN conda install python=3.8 67 | RUN alias python='/root/miniconda3/envs/bin/python3.8' 68 | 69 | # Set environment and working directory 70 | ENV CUDA_HOME=/usr/local/cuda 71 | ENV LD_LIBRARY_PATH=$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64/:$LD_LIBRARY_PATH 72 | ENV PATH=$CUDA_HOME/bin:$PATH 73 | ENV CFLAGS="-I$CUDA_HOME/include $CFLAGS" 74 | ENV FORCE_CUDA="1" 75 | ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/root/miniconda3/envs/bin:$PATH 76 | 77 | # install pytorch 78 | RUN pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html 79 | 80 | # install opencv 81 | RUN python -m pip install opencv-python==4.5.5.62 82 | 83 | # install gcc 84 | RUN conda install -c omgarcia gcc-6 85 | 86 | # install torchpack 87 | RUN git clone https://github.com/zhijian-liu/torchpack.git 88 | RUN cd torchpack && python -m pip install -e . 89 | 90 | # install other dependencies 91 | RUN python -m pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html 92 | RUN python -m pip install pillow==8.4.0 \ 93 | tqdm \ 94 | mmdet==2.14.0 \ 95 | mmsegmentation==0.14.1 \ 96 | numba \ 97 | mpi4py \ 98 | nuscenes-devkit \ 99 | setuptools==59.5.0 100 | 101 | # install mmdetection3d from source 102 | ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" 103 | ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" 104 | ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" 105 | 106 | COPY . /BEVFormer 107 | 108 | RUN apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \ 109 | && apt-get clean \ 110 | && rm -rf /var/lib/apt/lists/* 111 | RUN cd /BEVFormer && git clone https://github.com/open-mmlab/mmdetection3d.git && \ 112 | cd /BEVFormer/mmdetection3d && \ 113 | git checkout v0.17.1 && \ 114 | python -m pip install -r requirements/build.txt && \ 115 | python -m pip install --no-cache-dir -e . 116 | 117 | # install timm 118 | RUN python -m pip install timm 119 | RUN pip install timm==0.6.12 torchmetrics==0.3.2 prettytable==3.5.0 120 | 121 | # libraries path 122 | RUN ln -s /usr/local/cuda/lib64/libcusolver.so.11 /usr/local/cuda/lib64/libcusolver.so.10 123 | 124 | WORKDIR /BEVFormer 125 | USER root 126 | RUN ["/bin/bash"] -------------------------------------------------------------------------------- /docs/getting_started.md: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | 3 | **Please ensure you have prepared the environment and the nuScenes dataset.** 4 | 5 | # Train and Test 6 | 7 | Train BEVFormer with 8 GPUs 8 | ``` 9 | ./tools/dist_train.sh ./projects/configs/bevformer/bevformer_base.py 8 10 | ``` 11 | 12 | Eval BEVFormer with 8 GPUs 13 | ``` 14 | ./tools/dist_test.sh ./projects/configs/bevformer/bevformer_base.py ./path/to/ckpts.pth 8 15 | ``` 16 | Note: using 1 GPU to eval can obtain slightly higher performance because continuous video may be truncated with multiple GPUs. By default we report the score evaled with 8 GPUs. 17 | 18 | 19 | 20 | # Using FP16 to train the model. 21 | The above training script can not support FP16 training, 22 | and we provide another script to train BEVFormer with FP16. 23 | 24 | ``` 25 | ./tools/fp16/dist_train.sh ./projects/configs/bevformer_fp16/bevformer_tiny_fp16.py 8 26 | ``` 27 | 28 | 29 | # Visualization 30 | 31 | see [visual.py](../tools/analysis_tools/visual.py) -------------------------------------------------------------------------------- /docs/install.md: -------------------------------------------------------------------------------- 1 | # Step-by-step installation instructions 2 | 3 | Following https://mmdetection3d.readthedocs.io/en/latest/getting_started.html#installation 4 | 5 | 6 | 7 | **a. Create a conda virtual environment and activate it.** 8 | ```shell 9 | conda create -n open-mmlab python=3.8 -y 10 | conda activate open-mmlab 11 | ``` 12 | 13 | **b. Install PyTorch and torchvision following the [official instructions](https://pytorch.org/).** 14 | ```shell 15 | pip install torch==1.9.1+cu111 torchvision==0.10.1+cu111 torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html 16 | # Recommended torch>=1.9 17 | 18 | ``` 19 | 20 | **c. Install gcc>=5 in conda env (optional).** 21 | ```shell 22 | conda install -c omgarcia gcc-6 # gcc-6.2 23 | ``` 24 | 25 | **c. Install mmcv-full.** 26 | ```shell 27 | pip install mmcv-full==1.4.0 28 | # pip install mmcv-full==1.4.0 -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html 29 | ``` 30 | 31 | **d. Install mmdet and mmseg.** 32 | ```shell 33 | pip install mmdet==2.14.0 34 | pip install mmsegmentation==0.14.1 35 | ``` 36 | 37 | **e. Install mmdet3d from source code.** 38 | ```shell 39 | git clone https://github.com/open-mmlab/mmdetection3d.git 40 | cd mmdetection3d 41 | git checkout v0.17.1 # Other versions may not be compatible. 42 | python setup.py install 43 | ``` 44 | 45 | **f. Install timm.** 46 | ```shell 47 | pip install timm 48 | ``` 49 | 50 | 51 | **g. Clone BEVFormer.** 52 | ``` 53 | git clone https://github.com/fundamentalvision/BEVFormer.git 54 | ``` 55 | 56 | **h. Prepare pretrained models.** 57 | ```shell 58 | cd bevformer 59 | mkdir ckpts 60 | 61 | cd ckpts & wget https://github.com/zhiqi-li/storage/releases/download/v1.0/r101_dcn_fcos3d_pretrain.pth 62 | ``` 63 | 64 | note: this pretrained model is the same model used in [detr3d](https://github.com/WangYueFt/detr3d) 65 | -------------------------------------------------------------------------------- /docs/prepare_dataset.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## NuScenes 4 | Download nuScenes V1.0 full dataset data and CAN bus expansion data [HERE](https://www.nuscenes.org/download). Prepare nuscenes data by running 5 | 6 | 7 | **Download CAN bus expansion** 8 | ``` 9 | # download 'can_bus.zip' 10 | unzip can_bus.zip 11 | # move can_bus to data dir 12 | ``` 13 | 14 | **Prepare nuScenes data** 15 | 16 | *We genetate custom annotation files which are different from mmdet3d's* 17 | ``` 18 | python tools/create_data.py nuscenes --root-path ./data/nuscenes --out-dir ./data/nuscenes --extra-tag nuscenes --version v1.0 --canbus ./data 19 | ``` 20 | 21 | Using the above code will generate `nuscenes_infos_temporal_{train,val}.pkl`. 22 | 23 | **Folder structure** 24 | ``` 25 | bevformer 26 | ├── projects/ 27 | ├── tools/ 28 | ├── configs/ 29 | ├── ckpts/ 30 | │ ├── r101_dcn_fcos3d_pretrain.pth 31 | ├── data/ 32 | │ ├── can_bus/ 33 | │ ├── nuscenes/ 34 | │ │ ├── maps/ 35 | │ │ ├── samples/ 36 | │ │ ├── sweeps/ 37 | │ │ ├── v1.0-test/ 38 | | | ├── v1.0-trainval/ 39 | | | ├── nuscenes_infos_temporal_train.pkl 40 | | | ├── nuscenes_infos_temporal_val.pkl 41 | ``` 42 | -------------------------------------------------------------------------------- /figs/000868a72138448191b4092f75ed7776.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/figs/000868a72138448191b4092f75ed7776.jpg -------------------------------------------------------------------------------- /figs/0141260a339d4b37addb55818bbae718.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/figs/0141260a339d4b37addb55818bbae718.jpg -------------------------------------------------------------------------------- /figs/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/figs/arch.png -------------------------------------------------------------------------------- /figs/sota_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/figs/sota_results.png -------------------------------------------------------------------------------- /icon/car.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/icon/car.png -------------------------------------------------------------------------------- /projects/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/projects/__init__.py -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/coco_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/coco/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 9 | dict(type='RandomFlip', flip_ratio=0.5), 10 | dict(type='Normalize', **img_norm_cfg), 11 | dict(type='Pad', size_divisor=32), 12 | dict(type='DefaultFormatBundle'), 13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 14 | ] 15 | test_pipeline = [ 16 | dict(type='LoadImageFromFile'), 17 | dict( 18 | type='MultiScaleFlipAug', 19 | img_scale=(1333, 800), 20 | flip=False, 21 | transforms=[ 22 | dict(type='Resize', keep_ratio=True), 23 | dict(type='RandomFlip'), 24 | dict(type='Normalize', **img_norm_cfg), 25 | dict(type='Pad', size_divisor=32), 26 | dict(type='ImageToTensor', keys=['img']), 27 | dict(type='Collect', keys=['img']), 28 | ]) 29 | ] 30 | data = dict( 31 | samples_per_gpu=2, 32 | workers_per_gpu=2, 33 | train=dict( 34 | type=dataset_type, 35 | ann_file=data_root + 'annotations/instances_train2017.json', 36 | img_prefix=data_root + 'train2017/', 37 | pipeline=train_pipeline), 38 | val=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/instances_val2017.json', 41 | img_prefix=data_root + 'val2017/', 42 | pipeline=test_pipeline), 43 | test=dict( 44 | type=dataset_type, 45 | ann_file=data_root + 'annotations/instances_val2017.json', 46 | img_prefix=data_root + 'val2017/', 47 | pipeline=test_pipeline)) 48 | evaluation = dict(metric=['bbox', 'segm']) 49 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/kitti-3d-3class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'KittiDataset' 3 | data_root = 'data/kitti/' 4 | class_names = ['Pedestrian', 'Cyclist', 'Car'] 5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1] 6 | input_modality = dict(use_lidar=True, use_camera=False) 7 | db_sampler = dict( 8 | data_root=data_root, 9 | info_path=data_root + 'kitti_dbinfos_train.pkl', 10 | rate=1.0, 11 | prepare=dict( 12 | filter_by_difficulty=[-1], 13 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)), 14 | classes=class_names, 15 | sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6)) 16 | 17 | file_client_args = dict(backend='disk') 18 | # Uncomment the following if use ceph or other file clients. 19 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 20 | # for more details. 21 | # file_client_args = dict( 22 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/')) 23 | 24 | train_pipeline = [ 25 | dict( 26 | type='LoadPointsFromFile', 27 | coord_type='LIDAR', 28 | load_dim=4, 29 | use_dim=4, 30 | file_client_args=file_client_args), 31 | dict( 32 | type='LoadAnnotations3D', 33 | with_bbox_3d=True, 34 | with_label_3d=True, 35 | file_client_args=file_client_args), 36 | dict(type='ObjectSample', db_sampler=db_sampler), 37 | dict( 38 | type='ObjectNoise', 39 | num_try=100, 40 | translation_std=[1.0, 1.0, 0.5], 41 | global_rot_range=[0.0, 0.0], 42 | rot_range=[-0.78539816, 0.78539816]), 43 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 44 | dict( 45 | type='GlobalRotScaleTrans', 46 | rot_range=[-0.78539816, 0.78539816], 47 | scale_ratio_range=[0.95, 1.05]), 48 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 50 | dict(type='PointShuffle'), 51 | dict(type='DefaultFormatBundle3D', class_names=class_names), 52 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 53 | ] 54 | test_pipeline = [ 55 | dict( 56 | type='LoadPointsFromFile', 57 | coord_type='LIDAR', 58 | load_dim=4, 59 | use_dim=4, 60 | file_client_args=file_client_args), 61 | dict( 62 | type='MultiScaleFlipAug3D', 63 | img_scale=(1333, 800), 64 | pts_scale_ratio=1, 65 | flip=False, 66 | transforms=[ 67 | dict( 68 | type='GlobalRotScaleTrans', 69 | rot_range=[0, 0], 70 | scale_ratio_range=[1., 1.], 71 | translation_std=[0, 0, 0]), 72 | dict(type='RandomFlip3D'), 73 | dict( 74 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 75 | dict( 76 | type='DefaultFormatBundle3D', 77 | class_names=class_names, 78 | with_label=False), 79 | dict(type='Collect3D', keys=['points']) 80 | ]) 81 | ] 82 | # construct a pipeline for data and gt loading in show function 83 | # please keep its loading function consistent with test_pipeline (e.g. client) 84 | eval_pipeline = [ 85 | dict( 86 | type='LoadPointsFromFile', 87 | coord_type='LIDAR', 88 | load_dim=4, 89 | use_dim=4, 90 | file_client_args=file_client_args), 91 | dict( 92 | type='DefaultFormatBundle3D', 93 | class_names=class_names, 94 | with_label=False), 95 | dict(type='Collect3D', keys=['points']) 96 | ] 97 | 98 | data = dict( 99 | samples_per_gpu=6, 100 | workers_per_gpu=4, 101 | train=dict( 102 | type='RepeatDataset', 103 | times=2, 104 | dataset=dict( 105 | type=dataset_type, 106 | data_root=data_root, 107 | ann_file=data_root + 'kitti_infos_train.pkl', 108 | split='training', 109 | pts_prefix='velodyne_reduced', 110 | pipeline=train_pipeline, 111 | modality=input_modality, 112 | classes=class_names, 113 | test_mode=False, 114 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 115 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 116 | box_type_3d='LiDAR')), 117 | val=dict( 118 | type=dataset_type, 119 | data_root=data_root, 120 | ann_file=data_root + 'kitti_infos_val.pkl', 121 | split='training', 122 | pts_prefix='velodyne_reduced', 123 | pipeline=test_pipeline, 124 | modality=input_modality, 125 | classes=class_names, 126 | test_mode=True, 127 | box_type_3d='LiDAR'), 128 | test=dict( 129 | type=dataset_type, 130 | data_root=data_root, 131 | ann_file=data_root + 'kitti_infos_val.pkl', 132 | split='training', 133 | pts_prefix='velodyne_reduced', 134 | pipeline=test_pipeline, 135 | modality=input_modality, 136 | classes=class_names, 137 | test_mode=True, 138 | box_type_3d='LiDAR')) 139 | 140 | evaluation = dict(interval=1, pipeline=eval_pipeline) 141 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/kitti-3d-car.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'KittiDataset' 3 | data_root = 'data/kitti/' 4 | class_names = ['Car'] 5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1] 6 | input_modality = dict(use_lidar=True, use_camera=False) 7 | db_sampler = dict( 8 | data_root=data_root, 9 | info_path=data_root + 'kitti_dbinfos_train.pkl', 10 | rate=1.0, 11 | prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)), 12 | classes=class_names, 13 | sample_groups=dict(Car=15)) 14 | 15 | file_client_args = dict(backend='disk') 16 | # Uncomment the following if use ceph or other file clients. 17 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 18 | # for more details. 19 | # file_client_args = dict( 20 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/')) 21 | 22 | train_pipeline = [ 23 | dict( 24 | type='LoadPointsFromFile', 25 | coord_type='LIDAR', 26 | load_dim=4, 27 | use_dim=4, 28 | file_client_args=file_client_args), 29 | dict( 30 | type='LoadAnnotations3D', 31 | with_bbox_3d=True, 32 | with_label_3d=True, 33 | file_client_args=file_client_args), 34 | dict(type='ObjectSample', db_sampler=db_sampler), 35 | dict( 36 | type='ObjectNoise', 37 | num_try=100, 38 | translation_std=[1.0, 1.0, 0.5], 39 | global_rot_range=[0.0, 0.0], 40 | rot_range=[-0.78539816, 0.78539816]), 41 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 42 | dict( 43 | type='GlobalRotScaleTrans', 44 | rot_range=[-0.78539816, 0.78539816], 45 | scale_ratio_range=[0.95, 1.05]), 46 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 47 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='PointShuffle'), 49 | dict(type='DefaultFormatBundle3D', class_names=class_names), 50 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 51 | ] 52 | test_pipeline = [ 53 | dict( 54 | type='LoadPointsFromFile', 55 | coord_type='LIDAR', 56 | load_dim=4, 57 | use_dim=4, 58 | file_client_args=file_client_args), 59 | dict( 60 | type='MultiScaleFlipAug3D', 61 | img_scale=(1333, 800), 62 | pts_scale_ratio=1, 63 | flip=False, 64 | transforms=[ 65 | dict( 66 | type='GlobalRotScaleTrans', 67 | rot_range=[0, 0], 68 | scale_ratio_range=[1., 1.], 69 | translation_std=[0, 0, 0]), 70 | dict(type='RandomFlip3D'), 71 | dict( 72 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 73 | dict( 74 | type='DefaultFormatBundle3D', 75 | class_names=class_names, 76 | with_label=False), 77 | dict(type='Collect3D', keys=['points']) 78 | ]) 79 | ] 80 | # construct a pipeline for data and gt loading in show function 81 | # please keep its loading function consistent with test_pipeline (e.g. client) 82 | eval_pipeline = [ 83 | dict( 84 | type='LoadPointsFromFile', 85 | coord_type='LIDAR', 86 | load_dim=4, 87 | use_dim=4, 88 | file_client_args=file_client_args), 89 | dict( 90 | type='DefaultFormatBundle3D', 91 | class_names=class_names, 92 | with_label=False), 93 | dict(type='Collect3D', keys=['points']) 94 | ] 95 | 96 | data = dict( 97 | samples_per_gpu=6, 98 | workers_per_gpu=4, 99 | train=dict( 100 | type='RepeatDataset', 101 | times=2, 102 | dataset=dict( 103 | type=dataset_type, 104 | data_root=data_root, 105 | ann_file=data_root + 'kitti_infos_train.pkl', 106 | split='training', 107 | pts_prefix='velodyne_reduced', 108 | pipeline=train_pipeline, 109 | modality=input_modality, 110 | classes=class_names, 111 | test_mode=False, 112 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 113 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 114 | box_type_3d='LiDAR')), 115 | val=dict( 116 | type=dataset_type, 117 | data_root=data_root, 118 | ann_file=data_root + 'kitti_infos_val.pkl', 119 | split='training', 120 | pts_prefix='velodyne_reduced', 121 | pipeline=test_pipeline, 122 | modality=input_modality, 123 | classes=class_names, 124 | test_mode=True, 125 | box_type_3d='LiDAR'), 126 | test=dict( 127 | type=dataset_type, 128 | data_root=data_root, 129 | ann_file=data_root + 'kitti_infos_val.pkl', 130 | split='training', 131 | pts_prefix='velodyne_reduced', 132 | pipeline=test_pipeline, 133 | modality=input_modality, 134 | classes=class_names, 135 | test_mode=True, 136 | box_type_3d='LiDAR')) 137 | 138 | evaluation = dict(interval=1, pipeline=eval_pipeline) 139 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/lyft-3d.py: -------------------------------------------------------------------------------- 1 | # If point cloud range is changed, the models should also change their point 2 | # cloud range accordingly 3 | point_cloud_range = [-80, -80, -5, 80, 80, 3] 4 | # For Lyft we usually do 9-class detection 5 | class_names = [ 6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle', 7 | 'bicycle', 'pedestrian', 'animal' 8 | ] 9 | dataset_type = 'LyftDataset' 10 | data_root = 'data/lyft/' 11 | # Input modality for Lyft dataset, this is consistent with the submission 12 | # format which requires the information in input_modality. 13 | input_modality = dict( 14 | use_lidar=True, 15 | use_camera=False, 16 | use_radar=False, 17 | use_map=False, 18 | use_external=False) 19 | file_client_args = dict(backend='disk') 20 | # Uncomment the following if use ceph or other file clients. 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 22 | # for more details. 23 | # file_client_args = dict( 24 | # backend='petrel', 25 | # path_mapping=dict({ 26 | # './data/lyft/': 's3://lyft/lyft/', 27 | # 'data/lyft/': 's3://lyft/lyft/' 28 | # })) 29 | train_pipeline = [ 30 | dict( 31 | type='LoadPointsFromFile', 32 | coord_type='LIDAR', 33 | load_dim=5, 34 | use_dim=5, 35 | file_client_args=file_client_args), 36 | dict( 37 | type='LoadPointsFromMultiSweeps', 38 | sweeps_num=10, 39 | file_client_args=file_client_args), 40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 41 | dict( 42 | type='GlobalRotScaleTrans', 43 | rot_range=[-0.3925, 0.3925], 44 | scale_ratio_range=[0.95, 1.05], 45 | translation_std=[0, 0, 0]), 46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='PointShuffle'), 50 | dict(type='DefaultFormatBundle3D', class_names=class_names), 51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 52 | ] 53 | test_pipeline = [ 54 | dict( 55 | type='LoadPointsFromFile', 56 | coord_type='LIDAR', 57 | load_dim=5, 58 | use_dim=5, 59 | file_client_args=file_client_args), 60 | dict( 61 | type='LoadPointsFromMultiSweeps', 62 | sweeps_num=10, 63 | file_client_args=file_client_args), 64 | dict( 65 | type='MultiScaleFlipAug3D', 66 | img_scale=(1333, 800), 67 | pts_scale_ratio=1, 68 | flip=False, 69 | transforms=[ 70 | dict( 71 | type='GlobalRotScaleTrans', 72 | rot_range=[0, 0], 73 | scale_ratio_range=[1., 1.], 74 | translation_std=[0, 0, 0]), 75 | dict(type='RandomFlip3D'), 76 | dict( 77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 78 | dict( 79 | type='DefaultFormatBundle3D', 80 | class_names=class_names, 81 | with_label=False), 82 | dict(type='Collect3D', keys=['points']) 83 | ]) 84 | ] 85 | # construct a pipeline for data and gt loading in show function 86 | # please keep its loading function consistent with test_pipeline (e.g. client) 87 | eval_pipeline = [ 88 | dict( 89 | type='LoadPointsFromFile', 90 | coord_type='LIDAR', 91 | load_dim=5, 92 | use_dim=5, 93 | file_client_args=file_client_args), 94 | dict( 95 | type='LoadPointsFromMultiSweeps', 96 | sweeps_num=10, 97 | file_client_args=file_client_args), 98 | dict( 99 | type='DefaultFormatBundle3D', 100 | class_names=class_names, 101 | with_label=False), 102 | dict(type='Collect3D', keys=['points']) 103 | ] 104 | 105 | data = dict( 106 | samples_per_gpu=2, 107 | workers_per_gpu=2, 108 | train=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'lyft_infos_train.pkl', 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | modality=input_modality, 115 | test_mode=False), 116 | val=dict( 117 | type=dataset_type, 118 | data_root=data_root, 119 | ann_file=data_root + 'lyft_infos_val.pkl', 120 | pipeline=test_pipeline, 121 | classes=class_names, 122 | modality=input_modality, 123 | test_mode=True), 124 | test=dict( 125 | type=dataset_type, 126 | data_root=data_root, 127 | ann_file=data_root + 'lyft_infos_test.pkl', 128 | pipeline=test_pipeline, 129 | classes=class_names, 130 | modality=input_modality, 131 | test_mode=True)) 132 | # For Lyft dataset, we usually evaluate the model at the end of training. 133 | # Since the models are trained by 24 epochs by default, we set evaluation 134 | # interval to be 24. Please change the interval accordingly if you do not 135 | # use a default schedule. 136 | evaluation = dict(interval=24, pipeline=eval_pipeline) 137 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/nuim_instance.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CocoDataset' 2 | data_root = 'data/nuimages/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | img_norm_cfg = dict( 8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 9 | train_pipeline = [ 10 | dict(type='LoadImageFromFile'), 11 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 12 | dict( 13 | type='Resize', 14 | img_scale=[(1280, 720), (1920, 1080)], 15 | multiscale_mode='range', 16 | keep_ratio=True), 17 | dict(type='RandomFlip', flip_ratio=0.5), 18 | dict(type='Normalize', **img_norm_cfg), 19 | dict(type='Pad', size_divisor=32), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 22 | ] 23 | test_pipeline = [ 24 | dict(type='LoadImageFromFile'), 25 | dict( 26 | type='MultiScaleFlipAug', 27 | img_scale=(1600, 900), 28 | flip=False, 29 | transforms=[ 30 | dict(type='Resize', keep_ratio=True), 31 | dict(type='RandomFlip'), 32 | dict(type='Normalize', **img_norm_cfg), 33 | dict(type='Pad', size_divisor=32), 34 | dict(type='ImageToTensor', keys=['img']), 35 | dict(type='Collect', keys=['img']), 36 | ]) 37 | ] 38 | data = dict( 39 | samples_per_gpu=2, 40 | workers_per_gpu=2, 41 | train=dict( 42 | type=dataset_type, 43 | ann_file=data_root + 'annotations/nuimages_v1.0-train.json', 44 | img_prefix=data_root, 45 | classes=class_names, 46 | pipeline=train_pipeline), 47 | val=dict( 48 | type=dataset_type, 49 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 50 | img_prefix=data_root, 51 | classes=class_names, 52 | pipeline=test_pipeline), 53 | test=dict( 54 | type=dataset_type, 55 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json', 56 | img_prefix=data_root, 57 | classes=class_names, 58 | pipeline=test_pipeline)) 59 | evaluation = dict(metric=['bbox', 'segm']) 60 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/nus-mono3d.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CustomNuScenesMonoDataset' 2 | data_root = 'data/nuscenes/' 3 | class_names = [ 4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', 5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier' 6 | ] 7 | # Input modality for nuScenes dataset, this is consistent with the submission 8 | # format which requires the information in input_modality. 9 | input_modality = dict( 10 | use_lidar=False, 11 | use_camera=True, 12 | use_radar=False, 13 | use_map=False, 14 | use_external=False) 15 | img_norm_cfg = dict( 16 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 17 | train_pipeline = [ 18 | dict(type='LoadImageFromFileMono3D'), 19 | dict( 20 | type='LoadAnnotations3D', 21 | with_bbox=True, 22 | with_label=True, 23 | with_attr_label=True, 24 | with_bbox_3d=True, 25 | with_label_3d=True, 26 | with_bbox_depth=True), 27 | dict(type='Resize', img_scale=(1600, 900), keep_ratio=True), 28 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='DefaultFormatBundle3D', class_names=class_names), 32 | dict( 33 | type='Collect3D', 34 | keys=[ 35 | 'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d', 36 | 'gt_labels_3d', 'centers2d', 'depths' 37 | ]), 38 | ] 39 | test_pipeline = [ 40 | dict(type='LoadImageFromFileMono3D'), 41 | dict( 42 | type='MultiScaleFlipAug', 43 | scale_factor=1.0, 44 | flip=False, 45 | transforms=[ 46 | dict(type='RandomFlip3D'), 47 | dict(type='Normalize', **img_norm_cfg), 48 | dict(type='Pad', size_divisor=32), 49 | dict( 50 | type='DefaultFormatBundle3D', 51 | class_names=class_names, 52 | with_label=False), 53 | dict(type='Collect3D', keys=['img']), 54 | ]) 55 | ] 56 | # construct a pipeline for data and gt loading in show function 57 | # please keep its loading function consistent with test_pipeline (e.g. client) 58 | eval_pipeline = [ 59 | dict(type='LoadImageFromFileMono3D'), 60 | dict( 61 | type='DefaultFormatBundle3D', 62 | class_names=class_names, 63 | with_label=False), 64 | dict(type='Collect3D', keys=['img']) 65 | ] 66 | 67 | data = dict( 68 | samples_per_gpu=2, 69 | workers_per_gpu=2, 70 | train=dict( 71 | type=dataset_type, 72 | data_root=data_root, 73 | ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json', 74 | img_prefix=data_root, 75 | classes=class_names, 76 | pipeline=train_pipeline, 77 | modality=input_modality, 78 | test_mode=False, 79 | box_type_3d='Camera'), 80 | val=dict( 81 | type=dataset_type, 82 | data_root=data_root, 83 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 84 | img_prefix=data_root, 85 | classes=class_names, 86 | pipeline=test_pipeline, 87 | modality=input_modality, 88 | test_mode=True, 89 | box_type_3d='Camera'), 90 | test=dict( 91 | type=dataset_type, 92 | data_root=data_root, 93 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json', 94 | img_prefix=data_root, 95 | classes=class_names, 96 | pipeline=test_pipeline, 97 | modality=input_modality, 98 | test_mode=True, 99 | box_type_3d='Camera')) 100 | evaluation = dict(interval=2) 101 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/range100_lyft-3d.py: -------------------------------------------------------------------------------- 1 | # If point cloud range is changed, the models should also change their point 2 | # cloud range accordingly 3 | point_cloud_range = [-100, -100, -5, 100, 100, 3] 4 | # For Lyft we usually do 9-class detection 5 | class_names = [ 6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle', 7 | 'bicycle', 'pedestrian', 'animal' 8 | ] 9 | dataset_type = 'LyftDataset' 10 | data_root = 'data/lyft/' 11 | # Input modality for Lyft dataset, this is consistent with the submission 12 | # format which requires the information in input_modality. 13 | input_modality = dict( 14 | use_lidar=True, 15 | use_camera=False, 16 | use_radar=False, 17 | use_map=False, 18 | use_external=False) 19 | file_client_args = dict(backend='disk') 20 | # Uncomment the following if use ceph or other file clients. 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 22 | # for more details. 23 | # file_client_args = dict( 24 | # backend='petrel', 25 | # path_mapping=dict({ 26 | # './data/lyft/': 's3://lyft/lyft/', 27 | # 'data/lyft/': 's3://lyft/lyft/' 28 | # })) 29 | train_pipeline = [ 30 | dict( 31 | type='LoadPointsFromFile', 32 | coord_type='LIDAR', 33 | load_dim=5, 34 | use_dim=5, 35 | file_client_args=file_client_args), 36 | dict( 37 | type='LoadPointsFromMultiSweeps', 38 | sweeps_num=10, 39 | file_client_args=file_client_args), 40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 41 | dict( 42 | type='GlobalRotScaleTrans', 43 | rot_range=[-0.3925, 0.3925], 44 | scale_ratio_range=[0.95, 1.05], 45 | translation_std=[0, 0, 0]), 46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='PointShuffle'), 50 | dict(type='DefaultFormatBundle3D', class_names=class_names), 51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 52 | ] 53 | test_pipeline = [ 54 | dict( 55 | type='LoadPointsFromFile', 56 | coord_type='LIDAR', 57 | load_dim=5, 58 | use_dim=5, 59 | file_client_args=file_client_args), 60 | dict( 61 | type='LoadPointsFromMultiSweeps', 62 | sweeps_num=10, 63 | file_client_args=file_client_args), 64 | dict( 65 | type='MultiScaleFlipAug3D', 66 | img_scale=(1333, 800), 67 | pts_scale_ratio=1, 68 | flip=False, 69 | transforms=[ 70 | dict( 71 | type='GlobalRotScaleTrans', 72 | rot_range=[0, 0], 73 | scale_ratio_range=[1., 1.], 74 | translation_std=[0, 0, 0]), 75 | dict(type='RandomFlip3D'), 76 | dict( 77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 78 | dict( 79 | type='DefaultFormatBundle3D', 80 | class_names=class_names, 81 | with_label=False), 82 | dict(type='Collect3D', keys=['points']) 83 | ]) 84 | ] 85 | # construct a pipeline for data and gt loading in show function 86 | # please keep its loading function consistent with test_pipeline (e.g. client) 87 | eval_pipeline = [ 88 | dict( 89 | type='LoadPointsFromFile', 90 | coord_type='LIDAR', 91 | load_dim=5, 92 | use_dim=5, 93 | file_client_args=file_client_args), 94 | dict( 95 | type='LoadPointsFromMultiSweeps', 96 | sweeps_num=10, 97 | file_client_args=file_client_args), 98 | dict( 99 | type='DefaultFormatBundle3D', 100 | class_names=class_names, 101 | with_label=False), 102 | dict(type='Collect3D', keys=['points']) 103 | ] 104 | 105 | data = dict( 106 | samples_per_gpu=2, 107 | workers_per_gpu=2, 108 | train=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'lyft_infos_train.pkl', 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | modality=input_modality, 115 | test_mode=False), 116 | val=dict( 117 | type=dataset_type, 118 | data_root=data_root, 119 | ann_file=data_root + 'lyft_infos_val.pkl', 120 | pipeline=test_pipeline, 121 | classes=class_names, 122 | modality=input_modality, 123 | test_mode=True), 124 | test=dict( 125 | type=dataset_type, 126 | data_root=data_root, 127 | ann_file=data_root + 'lyft_infos_test.pkl', 128 | pipeline=test_pipeline, 129 | classes=class_names, 130 | modality=input_modality, 131 | test_mode=True)) 132 | # For Lyft dataset, we usually evaluate the model at the end of training. 133 | # Since the models are trained by 24 epochs by default, we set evaluation 134 | # interval to be 24. Please change the interval accordingly if you do not 135 | # use a default schedule. 136 | evaluation = dict(interval=24, pipeline=eval_pipeline) 137 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/s3dis-3d-5class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'S3DISDataset' 3 | data_root = './data/s3dis/' 4 | class_names = ('table', 'chair', 'sofa', 'bookcase', 'board') 5 | train_area = [1, 2, 3, 4, 6] 6 | test_area = 5 7 | 8 | train_pipeline = [ 9 | dict( 10 | type='LoadPointsFromFile', 11 | coord_type='DEPTH', 12 | shift_height=True, 13 | load_dim=6, 14 | use_dim=[0, 1, 2, 3, 4, 5]), 15 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 16 | dict(type='PointSample', num_points=40000), 17 | dict( 18 | type='RandomFlip3D', 19 | sync_2d=False, 20 | flip_ratio_bev_horizontal=0.5, 21 | flip_ratio_bev_vertical=0.5), 22 | dict( 23 | type='GlobalRotScaleTrans', 24 | # following ScanNet dataset the rotation range is 5 degrees 25 | rot_range=[-0.087266, 0.087266], 26 | scale_ratio_range=[1.0, 1.0], 27 | shift_height=True), 28 | dict(type='DefaultFormatBundle3D', class_names=class_names), 29 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 30 | ] 31 | test_pipeline = [ 32 | dict( 33 | type='LoadPointsFromFile', 34 | coord_type='DEPTH', 35 | shift_height=True, 36 | load_dim=6, 37 | use_dim=[0, 1, 2, 3, 4, 5]), 38 | dict( 39 | type='MultiScaleFlipAug3D', 40 | img_scale=(1333, 800), 41 | pts_scale_ratio=1, 42 | flip=False, 43 | transforms=[ 44 | dict( 45 | type='GlobalRotScaleTrans', 46 | rot_range=[0, 0], 47 | scale_ratio_range=[1., 1.], 48 | translation_std=[0, 0, 0]), 49 | dict( 50 | type='RandomFlip3D', 51 | sync_2d=False, 52 | flip_ratio_bev_horizontal=0.5, 53 | flip_ratio_bev_vertical=0.5), 54 | dict(type='PointSample', num_points=40000), 55 | dict( 56 | type='DefaultFormatBundle3D', 57 | class_names=class_names, 58 | with_label=False), 59 | dict(type='Collect3D', keys=['points']) 60 | ]) 61 | ] 62 | # construct a pipeline for data and gt loading in show function 63 | # please keep its loading function consistent with test_pipeline (e.g. client) 64 | eval_pipeline = [ 65 | dict( 66 | type='LoadPointsFromFile', 67 | coord_type='DEPTH', 68 | shift_height=False, 69 | load_dim=6, 70 | use_dim=[0, 1, 2, 3, 4, 5]), 71 | dict( 72 | type='DefaultFormatBundle3D', 73 | class_names=class_names, 74 | with_label=False), 75 | dict(type='Collect3D', keys=['points']) 76 | ] 77 | 78 | data = dict( 79 | samples_per_gpu=8, 80 | workers_per_gpu=4, 81 | train=dict( 82 | type='RepeatDataset', 83 | times=5, 84 | dataset=dict( 85 | type='ConcatDataset', 86 | datasets=[ 87 | dict( 88 | type=dataset_type, 89 | data_root=data_root, 90 | ann_file=data_root + f's3dis_infos_Area_{i}.pkl', 91 | pipeline=train_pipeline, 92 | filter_empty_gt=False, 93 | classes=class_names, 94 | box_type_3d='Depth') for i in train_area 95 | ], 96 | separate_eval=False)), 97 | val=dict( 98 | type=dataset_type, 99 | data_root=data_root, 100 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 101 | pipeline=test_pipeline, 102 | classes=class_names, 103 | test_mode=True, 104 | box_type_3d='Depth'), 105 | test=dict( 106 | type=dataset_type, 107 | data_root=data_root, 108 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl', 109 | pipeline=test_pipeline, 110 | classes=class_names, 111 | test_mode=True, 112 | box_type_3d='Depth')) 113 | 114 | evaluation = dict(pipeline=eval_pipeline) 115 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/s3dis_seg-3d-13class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'S3DISSegDataset' 3 | data_root = './data/s3dis/' 4 | class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door', 5 | 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter') 6 | num_points = 4096 7 | train_area = [1, 2, 3, 4, 6] 8 | test_area = 5 9 | train_pipeline = [ 10 | dict( 11 | type='LoadPointsFromFile', 12 | coord_type='DEPTH', 13 | shift_height=False, 14 | use_color=True, 15 | load_dim=6, 16 | use_dim=[0, 1, 2, 3, 4, 5]), 17 | dict( 18 | type='LoadAnnotations3D', 19 | with_bbox_3d=False, 20 | with_label_3d=False, 21 | with_mask_3d=False, 22 | with_seg_3d=True), 23 | dict( 24 | type='PointSegClassMapping', 25 | valid_cat_ids=tuple(range(len(class_names))), 26 | max_cat_id=13), 27 | dict( 28 | type='IndoorPatchPointSample', 29 | num_points=num_points, 30 | block_size=1.0, 31 | ignore_index=len(class_names), 32 | use_normalized_coord=True, 33 | enlarge_size=0.2, 34 | min_unique_num=None), 35 | dict(type='NormalizePointsColor', color_mean=None), 36 | dict(type='DefaultFormatBundle3D', class_names=class_names), 37 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 38 | ] 39 | test_pipeline = [ 40 | dict( 41 | type='LoadPointsFromFile', 42 | coord_type='DEPTH', 43 | shift_height=False, 44 | use_color=True, 45 | load_dim=6, 46 | use_dim=[0, 1, 2, 3, 4, 5]), 47 | dict(type='NormalizePointsColor', color_mean=None), 48 | dict( 49 | # a wrapper in order to successfully call test function 50 | # actually we don't perform test-time-aug 51 | type='MultiScaleFlipAug3D', 52 | img_scale=(1333, 800), 53 | pts_scale_ratio=1, 54 | flip=False, 55 | transforms=[ 56 | dict( 57 | type='GlobalRotScaleTrans', 58 | rot_range=[0, 0], 59 | scale_ratio_range=[1., 1.], 60 | translation_std=[0, 0, 0]), 61 | dict( 62 | type='RandomFlip3D', 63 | sync_2d=False, 64 | flip_ratio_bev_horizontal=0.0, 65 | flip_ratio_bev_vertical=0.0), 66 | dict( 67 | type='DefaultFormatBundle3D', 68 | class_names=class_names, 69 | with_label=False), 70 | dict(type='Collect3D', keys=['points']) 71 | ]) 72 | ] 73 | # construct a pipeline for data and gt loading in show function 74 | # please keep its loading function consistent with test_pipeline (e.g. client) 75 | # we need to load gt seg_mask! 76 | eval_pipeline = [ 77 | dict( 78 | type='LoadPointsFromFile', 79 | coord_type='DEPTH', 80 | shift_height=False, 81 | use_color=True, 82 | load_dim=6, 83 | use_dim=[0, 1, 2, 3, 4, 5]), 84 | dict( 85 | type='LoadAnnotations3D', 86 | with_bbox_3d=False, 87 | with_label_3d=False, 88 | with_mask_3d=False, 89 | with_seg_3d=True), 90 | dict( 91 | type='PointSegClassMapping', 92 | valid_cat_ids=tuple(range(len(class_names))), 93 | max_cat_id=13), 94 | dict( 95 | type='DefaultFormatBundle3D', 96 | with_label=False, 97 | class_names=class_names), 98 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 99 | ] 100 | 101 | data = dict( 102 | samples_per_gpu=8, 103 | workers_per_gpu=4, 104 | # train on area 1, 2, 3, 4, 6 105 | # test on area 5 106 | train=dict( 107 | type=dataset_type, 108 | data_root=data_root, 109 | ann_files=[ 110 | data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area 111 | ], 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | test_mode=False, 115 | ignore_index=len(class_names), 116 | scene_idxs=[ 117 | data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy' 118 | for i in train_area 119 | ]), 120 | val=dict( 121 | type=dataset_type, 122 | data_root=data_root, 123 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', 124 | pipeline=test_pipeline, 125 | classes=class_names, 126 | test_mode=True, 127 | ignore_index=len(class_names), 128 | scene_idxs=data_root + 129 | f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'), 130 | test=dict( 131 | type=dataset_type, 132 | data_root=data_root, 133 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl', 134 | pipeline=test_pipeline, 135 | classes=class_names, 136 | test_mode=True, 137 | ignore_index=len(class_names))) 138 | 139 | evaluation = dict(pipeline=eval_pipeline) 140 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/scannet-3d-18class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ScanNetDataset' 3 | data_root = './data/scannet/' 4 | class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window', 5 | 'bookshelf', 'picture', 'counter', 'desk', 'curtain', 6 | 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub', 7 | 'garbagebin') 8 | train_pipeline = [ 9 | dict( 10 | type='LoadPointsFromFile', 11 | coord_type='DEPTH', 12 | shift_height=True, 13 | load_dim=6, 14 | use_dim=[0, 1, 2]), 15 | dict( 16 | type='LoadAnnotations3D', 17 | with_bbox_3d=True, 18 | with_label_3d=True, 19 | with_mask_3d=True, 20 | with_seg_3d=True), 21 | dict(type='GlobalAlignment', rotation_axis=2), 22 | dict( 23 | type='PointSegClassMapping', 24 | valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 25 | 36, 39), 26 | max_cat_id=40), 27 | dict(type='PointSample', num_points=40000), 28 | dict( 29 | type='RandomFlip3D', 30 | sync_2d=False, 31 | flip_ratio_bev_horizontal=0.5, 32 | flip_ratio_bev_vertical=0.5), 33 | dict( 34 | type='GlobalRotScaleTrans', 35 | rot_range=[-0.087266, 0.087266], 36 | scale_ratio_range=[1.0, 1.0], 37 | shift_height=True), 38 | dict(type='DefaultFormatBundle3D', class_names=class_names), 39 | dict( 40 | type='Collect3D', 41 | keys=[ 42 | 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask', 43 | 'pts_instance_mask' 44 | ]) 45 | ] 46 | test_pipeline = [ 47 | dict( 48 | type='LoadPointsFromFile', 49 | coord_type='DEPTH', 50 | shift_height=True, 51 | load_dim=6, 52 | use_dim=[0, 1, 2]), 53 | dict(type='GlobalAlignment', rotation_axis=2), 54 | dict( 55 | type='MultiScaleFlipAug3D', 56 | img_scale=(1333, 800), 57 | pts_scale_ratio=1, 58 | flip=False, 59 | transforms=[ 60 | dict( 61 | type='GlobalRotScaleTrans', 62 | rot_range=[0, 0], 63 | scale_ratio_range=[1., 1.], 64 | translation_std=[0, 0, 0]), 65 | dict( 66 | type='RandomFlip3D', 67 | sync_2d=False, 68 | flip_ratio_bev_horizontal=0.5, 69 | flip_ratio_bev_vertical=0.5), 70 | dict(type='PointSample', num_points=40000), 71 | dict( 72 | type='DefaultFormatBundle3D', 73 | class_names=class_names, 74 | with_label=False), 75 | dict(type='Collect3D', keys=['points']) 76 | ]) 77 | ] 78 | # construct a pipeline for data and gt loading in show function 79 | # please keep its loading function consistent with test_pipeline (e.g. client) 80 | eval_pipeline = [ 81 | dict( 82 | type='LoadPointsFromFile', 83 | coord_type='DEPTH', 84 | shift_height=False, 85 | load_dim=6, 86 | use_dim=[0, 1, 2]), 87 | dict(type='GlobalAlignment', rotation_axis=2), 88 | dict( 89 | type='DefaultFormatBundle3D', 90 | class_names=class_names, 91 | with_label=False), 92 | dict(type='Collect3D', keys=['points']) 93 | ] 94 | 95 | data = dict( 96 | samples_per_gpu=8, 97 | workers_per_gpu=4, 98 | train=dict( 99 | type='RepeatDataset', 100 | times=5, 101 | dataset=dict( 102 | type=dataset_type, 103 | data_root=data_root, 104 | ann_file=data_root + 'scannet_infos_train.pkl', 105 | pipeline=train_pipeline, 106 | filter_empty_gt=False, 107 | classes=class_names, 108 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 109 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 110 | box_type_3d='Depth')), 111 | val=dict( 112 | type=dataset_type, 113 | data_root=data_root, 114 | ann_file=data_root + 'scannet_infos_val.pkl', 115 | pipeline=test_pipeline, 116 | classes=class_names, 117 | test_mode=True, 118 | box_type_3d='Depth'), 119 | test=dict( 120 | type=dataset_type, 121 | data_root=data_root, 122 | ann_file=data_root + 'scannet_infos_val.pkl', 123 | pipeline=test_pipeline, 124 | classes=class_names, 125 | test_mode=True, 126 | box_type_3d='Depth')) 127 | 128 | evaluation = dict(pipeline=eval_pipeline) 129 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/scannet_seg-3d-20class.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | dataset_type = 'ScanNetSegDataset' 3 | data_root = './data/scannet/' 4 | class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table', 5 | 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk', 6 | 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink', 7 | 'bathtub', 'otherfurniture') 8 | num_points = 8192 9 | train_pipeline = [ 10 | dict( 11 | type='LoadPointsFromFile', 12 | coord_type='DEPTH', 13 | shift_height=False, 14 | use_color=True, 15 | load_dim=6, 16 | use_dim=[0, 1, 2, 3, 4, 5]), 17 | dict( 18 | type='LoadAnnotations3D', 19 | with_bbox_3d=False, 20 | with_label_3d=False, 21 | with_mask_3d=False, 22 | with_seg_3d=True), 23 | dict( 24 | type='PointSegClassMapping', 25 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 26 | 33, 34, 36, 39), 27 | max_cat_id=40), 28 | dict( 29 | type='IndoorPatchPointSample', 30 | num_points=num_points, 31 | block_size=1.5, 32 | ignore_index=len(class_names), 33 | use_normalized_coord=False, 34 | enlarge_size=0.2, 35 | min_unique_num=None), 36 | dict(type='NormalizePointsColor', color_mean=None), 37 | dict(type='DefaultFormatBundle3D', class_names=class_names), 38 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 39 | ] 40 | test_pipeline = [ 41 | dict( 42 | type='LoadPointsFromFile', 43 | coord_type='DEPTH', 44 | shift_height=False, 45 | use_color=True, 46 | load_dim=6, 47 | use_dim=[0, 1, 2, 3, 4, 5]), 48 | dict(type='NormalizePointsColor', color_mean=None), 49 | dict( 50 | # a wrapper in order to successfully call test function 51 | # actually we don't perform test-time-aug 52 | type='MultiScaleFlipAug3D', 53 | img_scale=(1333, 800), 54 | pts_scale_ratio=1, 55 | flip=False, 56 | transforms=[ 57 | dict( 58 | type='GlobalRotScaleTrans', 59 | rot_range=[0, 0], 60 | scale_ratio_range=[1., 1.], 61 | translation_std=[0, 0, 0]), 62 | dict( 63 | type='RandomFlip3D', 64 | sync_2d=False, 65 | flip_ratio_bev_horizontal=0.0, 66 | flip_ratio_bev_vertical=0.0), 67 | dict( 68 | type='DefaultFormatBundle3D', 69 | class_names=class_names, 70 | with_label=False), 71 | dict(type='Collect3D', keys=['points']) 72 | ]) 73 | ] 74 | # construct a pipeline for data and gt loading in show function 75 | # please keep its loading function consistent with test_pipeline (e.g. client) 76 | # we need to load gt seg_mask! 77 | eval_pipeline = [ 78 | dict( 79 | type='LoadPointsFromFile', 80 | coord_type='DEPTH', 81 | shift_height=False, 82 | use_color=True, 83 | load_dim=6, 84 | use_dim=[0, 1, 2, 3, 4, 5]), 85 | dict( 86 | type='LoadAnnotations3D', 87 | with_bbox_3d=False, 88 | with_label_3d=False, 89 | with_mask_3d=False, 90 | with_seg_3d=True), 91 | dict( 92 | type='PointSegClassMapping', 93 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 94 | 33, 34, 36, 39), 95 | max_cat_id=40), 96 | dict( 97 | type='DefaultFormatBundle3D', 98 | with_label=False, 99 | class_names=class_names), 100 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask']) 101 | ] 102 | 103 | data = dict( 104 | samples_per_gpu=8, 105 | workers_per_gpu=4, 106 | train=dict( 107 | type=dataset_type, 108 | data_root=data_root, 109 | ann_file=data_root + 'scannet_infos_train.pkl', 110 | pipeline=train_pipeline, 111 | classes=class_names, 112 | test_mode=False, 113 | ignore_index=len(class_names), 114 | scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy'), 115 | val=dict( 116 | type=dataset_type, 117 | data_root=data_root, 118 | ann_file=data_root + 'scannet_infos_val.pkl', 119 | pipeline=test_pipeline, 120 | classes=class_names, 121 | test_mode=True, 122 | ignore_index=len(class_names)), 123 | test=dict( 124 | type=dataset_type, 125 | data_root=data_root, 126 | ann_file=data_root + 'scannet_infos_val.pkl', 127 | pipeline=test_pipeline, 128 | classes=class_names, 129 | test_mode=True, 130 | ignore_index=len(class_names))) 131 | 132 | evaluation = dict(pipeline=eval_pipeline) 133 | -------------------------------------------------------------------------------- /projects/configs/_base_/datasets/sunrgbd-3d-10class.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'SUNRGBDDataset' 2 | data_root = 'data/sunrgbd/' 3 | class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser', 4 | 'night_stand', 'bookshelf', 'bathtub') 5 | train_pipeline = [ 6 | dict( 7 | type='LoadPointsFromFile', 8 | coord_type='DEPTH', 9 | shift_height=True, 10 | load_dim=6, 11 | use_dim=[0, 1, 2]), 12 | dict(type='LoadAnnotations3D'), 13 | dict( 14 | type='RandomFlip3D', 15 | sync_2d=False, 16 | flip_ratio_bev_horizontal=0.5, 17 | ), 18 | dict( 19 | type='GlobalRotScaleTrans', 20 | rot_range=[-0.523599, 0.523599], 21 | scale_ratio_range=[0.85, 1.15], 22 | shift_height=True), 23 | dict(type='PointSample', num_points=20000), 24 | dict(type='DefaultFormatBundle3D', class_names=class_names), 25 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 26 | ] 27 | test_pipeline = [ 28 | dict( 29 | type='LoadPointsFromFile', 30 | coord_type='DEPTH', 31 | shift_height=True, 32 | load_dim=6, 33 | use_dim=[0, 1, 2]), 34 | dict( 35 | type='MultiScaleFlipAug3D', 36 | img_scale=(1333, 800), 37 | pts_scale_ratio=1, 38 | flip=False, 39 | transforms=[ 40 | dict( 41 | type='GlobalRotScaleTrans', 42 | rot_range=[0, 0], 43 | scale_ratio_range=[1., 1.], 44 | translation_std=[0, 0, 0]), 45 | dict( 46 | type='RandomFlip3D', 47 | sync_2d=False, 48 | flip_ratio_bev_horizontal=0.5, 49 | ), 50 | dict(type='PointSample', num_points=20000), 51 | dict( 52 | type='DefaultFormatBundle3D', 53 | class_names=class_names, 54 | with_label=False), 55 | dict(type='Collect3D', keys=['points']) 56 | ]) 57 | ] 58 | # construct a pipeline for data and gt loading in show function 59 | # please keep its loading function consistent with test_pipeline (e.g. client) 60 | eval_pipeline = [ 61 | dict( 62 | type='LoadPointsFromFile', 63 | coord_type='DEPTH', 64 | shift_height=False, 65 | load_dim=6, 66 | use_dim=[0, 1, 2]), 67 | dict( 68 | type='DefaultFormatBundle3D', 69 | class_names=class_names, 70 | with_label=False), 71 | dict(type='Collect3D', keys=['points']) 72 | ] 73 | 74 | data = dict( 75 | samples_per_gpu=16, 76 | workers_per_gpu=4, 77 | train=dict( 78 | type='RepeatDataset', 79 | times=5, 80 | dataset=dict( 81 | type=dataset_type, 82 | data_root=data_root, 83 | ann_file=data_root + 'sunrgbd_infos_train.pkl', 84 | pipeline=train_pipeline, 85 | classes=class_names, 86 | filter_empty_gt=False, 87 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 88 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 89 | box_type_3d='Depth')), 90 | val=dict( 91 | type=dataset_type, 92 | data_root=data_root, 93 | ann_file=data_root + 'sunrgbd_infos_val.pkl', 94 | pipeline=test_pipeline, 95 | classes=class_names, 96 | test_mode=True, 97 | box_type_3d='Depth'), 98 | test=dict( 99 | type=dataset_type, 100 | data_root=data_root, 101 | ann_file=data_root + 'sunrgbd_infos_val.pkl', 102 | pipeline=test_pipeline, 103 | classes=class_names, 104 | test_mode=True, 105 | box_type_3d='Depth')) 106 | 107 | evaluation = dict(pipeline=eval_pipeline) 108 | -------------------------------------------------------------------------------- /projects/configs/_base_/default_runtime.py: -------------------------------------------------------------------------------- 1 | checkpoint_config = dict(interval=1) 2 | # yapf:disable push 3 | # By default we use textlogger hook and tensorboard 4 | # For more loggers see 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook 6 | log_config = dict( 7 | interval=50, 8 | hooks=[ 9 | dict(type='TextLoggerHook'), 10 | dict(type='TensorboardLoggerHook') 11 | ]) 12 | # yapf:enable 13 | dist_params = dict(backend='nccl') 14 | log_level = 'INFO' 15 | work_dir = None 16 | load_from = None 17 | resume_from = None 18 | workflow = [('train', 1)] 19 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/3dssd.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='SSD3DNet', 3 | backbone=dict( 4 | type='PointNet2SAMSG', 5 | in_channels=4, 6 | num_points=(4096, 512, (256, 256)), 7 | radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)), 8 | num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)), 9 | sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)), 10 | ((64, 64, 128), (64, 64, 128), (64, 96, 128)), 11 | ((128, 128, 256), (128, 192, 256), (128, 256, 256))), 12 | aggregation_channels=(64, 128, 256), 13 | fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')), 14 | fps_sample_range_lists=((-1), (-1), (512, -1)), 15 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 16 | sa_cfg=dict( 17 | type='PointSAModuleMSG', 18 | pool_mod='max', 19 | use_xyz=True, 20 | normalize_xyz=False)), 21 | bbox_head=dict( 22 | type='SSD3DHead', 23 | in_channels=256, 24 | vote_module_cfg=dict( 25 | in_channels=256, 26 | num_points=256, 27 | gt_per_seed=1, 28 | conv_channels=(128, ), 29 | conv_cfg=dict(type='Conv1d'), 30 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 31 | with_res_feat=False, 32 | vote_xyz_range=(3.0, 3.0, 2.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModuleMSG', 35 | num_point=256, 36 | radii=(4.8, 6.4), 37 | sample_nums=(16, 32), 38 | mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)), 39 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1), 40 | use_xyz=True, 41 | normalize_xyz=False, 42 | bias=True), 43 | pred_layer_cfg=dict( 44 | in_channels=1536, 45 | shared_conv_channels=(512, 128), 46 | cls_conv_channels=(128, ), 47 | reg_conv_channels=(128, ), 48 | conv_cfg=dict(type='Conv1d'), 49 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 50 | bias=True), 51 | conv_cfg=dict(type='Conv1d'), 52 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1), 53 | objectness_loss=dict( 54 | type='CrossEntropyLoss', 55 | use_sigmoid=True, 56 | reduction='sum', 57 | loss_weight=1.0), 58 | center_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 60 | dir_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | dir_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 64 | size_res_loss=dict( 65 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 66 | corner_loss=dict( 67 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0), 68 | vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05), 72 | test_cfg=dict( 73 | nms_cfg=dict(type='nms', iou_thr=0.1), 74 | sample_mod='spec', 75 | score_thr=0.0, 76 | per_class_proposal=True, 77 | max_output_num=100)) 78 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.1, 0.1, 0.2] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)), 6 | pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 7 | pts_middle_encoder=dict( 8 | type='SparseEncoder', 9 | in_channels=5, 10 | sparse_shape=[41, 1024, 1024], 11 | output_channels=128, 12 | order=('conv', 'norm', 'act'), 13 | encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128, 14 | 128)), 15 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)), 16 | block_type='basicblock'), 17 | pts_backbone=dict( 18 | type='SECOND', 19 | in_channels=256, 20 | out_channels=[128, 256], 21 | layer_nums=[5, 5], 22 | layer_strides=[1, 2], 23 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 24 | conv_cfg=dict(type='Conv2d', bias=False)), 25 | pts_neck=dict( 26 | type='SECONDFPN', 27 | in_channels=[128, 256], 28 | out_channels=[256, 256], 29 | upsample_strides=[1, 2], 30 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 31 | upsample_cfg=dict(type='deconv', bias=False), 32 | use_conv_for_no_stride=True), 33 | pts_bbox_head=dict( 34 | type='CenterHead', 35 | in_channels=sum([256, 256]), 36 | tasks=[ 37 | dict(num_class=1, class_names=['car']), 38 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 39 | dict(num_class=2, class_names=['bus', 'trailer']), 40 | dict(num_class=1, class_names=['barrier']), 41 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 42 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 43 | ], 44 | common_heads=dict( 45 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 46 | share_conv_channel=64, 47 | bbox_coder=dict( 48 | type='CenterPointBBoxCoder', 49 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 50 | max_num=500, 51 | score_threshold=0.1, 52 | out_size_factor=8, 53 | voxel_size=voxel_size[:2], 54 | code_size=9), 55 | separate_head=dict( 56 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 57 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 58 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 59 | norm_bbox=True), 60 | # model training and testing settings 61 | train_cfg=dict( 62 | pts=dict( 63 | grid_size=[1024, 1024, 40], 64 | voxel_size=voxel_size, 65 | out_size_factor=8, 66 | dense_reg=1, 67 | gaussian_overlap=0.1, 68 | max_objs=500, 69 | min_radius=2, 70 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 71 | test_cfg=dict( 72 | pts=dict( 73 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 74 | max_per_img=500, 75 | max_pool_nms=False, 76 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 77 | score_threshold=0.1, 78 | out_size_factor=8, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.2, 0.2, 8] 2 | model = dict( 3 | type='CenterPoint', 4 | pts_voxel_layer=dict( 5 | max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)), 6 | pts_voxel_encoder=dict( 7 | type='PillarFeatureNet', 8 | in_channels=5, 9 | feat_channels=[64], 10 | with_distance=False, 11 | voxel_size=(0.2, 0.2, 8), 12 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), 13 | legacy=False), 14 | pts_middle_encoder=dict( 15 | type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)), 16 | pts_backbone=dict( 17 | type='SECOND', 18 | in_channels=64, 19 | out_channels=[64, 128, 256], 20 | layer_nums=[3, 5, 5], 21 | layer_strides=[2, 2, 2], 22 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 23 | conv_cfg=dict(type='Conv2d', bias=False)), 24 | pts_neck=dict( 25 | type='SECONDFPN', 26 | in_channels=[64, 128, 256], 27 | out_channels=[128, 128, 128], 28 | upsample_strides=[0.5, 1, 2], 29 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01), 30 | upsample_cfg=dict(type='deconv', bias=False), 31 | use_conv_for_no_stride=True), 32 | pts_bbox_head=dict( 33 | type='CenterHead', 34 | in_channels=sum([128, 128, 128]), 35 | tasks=[ 36 | dict(num_class=1, class_names=['car']), 37 | dict(num_class=2, class_names=['truck', 'construction_vehicle']), 38 | dict(num_class=2, class_names=['bus', 'trailer']), 39 | dict(num_class=1, class_names=['barrier']), 40 | dict(num_class=2, class_names=['motorcycle', 'bicycle']), 41 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']), 42 | ], 43 | common_heads=dict( 44 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)), 45 | share_conv_channel=64, 46 | bbox_coder=dict( 47 | type='CenterPointBBoxCoder', 48 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 49 | max_num=500, 50 | score_threshold=0.1, 51 | out_size_factor=4, 52 | voxel_size=voxel_size[:2], 53 | code_size=9), 54 | separate_head=dict( 55 | type='SeparateHead', init_bias=-2.19, final_kernel=3), 56 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), 57 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25), 58 | norm_bbox=True), 59 | # model training and testing settings 60 | train_cfg=dict( 61 | pts=dict( 62 | grid_size=[512, 512, 1], 63 | voxel_size=voxel_size, 64 | out_size_factor=4, 65 | dense_reg=1, 66 | gaussian_overlap=0.1, 67 | max_objs=500, 68 | min_radius=2, 69 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])), 70 | test_cfg=dict( 71 | pts=dict( 72 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], 73 | max_per_img=500, 74 | max_pool_nms=False, 75 | min_radius=[4, 12, 10, 1, 0.85, 0.175], 76 | score_threshold=0.1, 77 | pc_range=[-51.2, -51.2], 78 | out_size_factor=4, 79 | voxel_size=voxel_size[:2], 80 | nms_type='rotate', 81 | pre_max_size=1000, 82 | post_max_size=83, 83 | nms_thr=0.2))) 84 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/fcos3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='FCOSMono3D', 3 | pretrained='open-mmlab://detectron2/resnet101_caffe', 4 | backbone=dict( 5 | type='ResNet', 6 | depth=101, 7 | num_stages=4, 8 | out_indices=(0, 1, 2, 3), 9 | frozen_stages=1, 10 | norm_cfg=dict(type='BN', requires_grad=False), 11 | norm_eval=True, 12 | style='caffe'), 13 | neck=dict( 14 | type='FPN', 15 | in_channels=[256, 512, 1024, 2048], 16 | out_channels=256, 17 | start_level=1, 18 | add_extra_convs='on_output', 19 | num_outs=5, 20 | relu_before_extra_convs=True), 21 | bbox_head=dict( 22 | type='FCOSMono3DHead', 23 | num_classes=10, 24 | in_channels=256, 25 | stacked_convs=2, 26 | feat_channels=256, 27 | use_direction_classifier=True, 28 | diff_rad_by_sin=True, 29 | pred_attrs=True, 30 | pred_velo=True, 31 | dir_offset=0.7854, # pi/4 32 | strides=[8, 16, 32, 64, 128], 33 | group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo 34 | cls_branch=(256, ), 35 | reg_branch=( 36 | (256, ), # offset 37 | (256, ), # depth 38 | (256, ), # size 39 | (256, ), # rot 40 | () # velo 41 | ), 42 | dir_branch=(256, ), 43 | attr_branch=(256, ), 44 | loss_cls=dict( 45 | type='FocalLoss', 46 | use_sigmoid=True, 47 | gamma=2.0, 48 | alpha=0.25, 49 | loss_weight=1.0), 50 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 51 | loss_dir=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_attr=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_centerness=dict( 56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 57 | norm_on_bbox=True, 58 | centerness_on_reg=True, 59 | center_sampling=True, 60 | conv_bias=True, 61 | dcn_on_last_conv=True), 62 | train_cfg=dict( 63 | allowed_border=0, 64 | code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05], 65 | pos_weight=-1, 66 | debug=False), 67 | test_cfg=dict( 68 | use_rotate_nms=True, 69 | nms_across_levels=False, 70 | nms_pre=1000, 71 | nms_thr=0.8, 72 | score_thr=0.05, 73 | min_bbox_size=0, 74 | max_per_img=200)) 75 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/groupfree3d.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='GroupFree3DNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=3, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 288)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='GroupFree3DHead', 20 | in_channels=288, 21 | num_decoder_layers=6, 22 | num_proposal=256, 23 | transformerlayers=dict( 24 | type='BaseTransformerLayer', 25 | attn_cfgs=dict( 26 | type='GroupFree3DMHA', 27 | embed_dims=288, 28 | num_heads=8, 29 | attn_drop=0.1, 30 | dropout_layer=dict(type='Dropout', drop_prob=0.1)), 31 | ffn_cfgs=dict( 32 | embed_dims=288, 33 | feedforward_channels=2048, 34 | ffn_drop=0.1, 35 | act_cfg=dict(type='ReLU', inplace=True)), 36 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 37 | 'norm')), 38 | pred_layer_cfg=dict( 39 | in_channels=288, shared_conv_channels=(288, 288), bias=True), 40 | sampling_objectness_loss=dict( 41 | type='FocalLoss', 42 | use_sigmoid=True, 43 | gamma=2.0, 44 | alpha=0.25, 45 | loss_weight=8.0), 46 | objectness_loss=dict( 47 | type='FocalLoss', 48 | use_sigmoid=True, 49 | gamma=2.0, 50 | alpha=0.25, 51 | loss_weight=1.0), 52 | center_loss=dict( 53 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 54 | dir_class_loss=dict( 55 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 56 | dir_res_loss=dict( 57 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 58 | size_class_loss=dict( 59 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 60 | size_res_loss=dict( 61 | type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0), 62 | semantic_loss=dict( 63 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 64 | # model training and testing settings 65 | train_cfg=dict(sample_mod='kps'), 66 | test_cfg=dict( 67 | sample_mod='kps', 68 | nms_thr=0.25, 69 | score_thr=0.0, 70 | per_class_proposal=True, 71 | prediction_stages='last')) 72 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-80, -80, -5, 80, 80, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]), 15 | pts_middle_encoder=dict(output_shape=[640, 640]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_nus.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.25, 0.25, 8] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=64, 11 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 12 | voxel_size=voxel_size, 13 | max_voxels=(30000, 40000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=4, 17 | feat_channels=[64, 64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-50, -50, -5, 50, 50, 3], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[2, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='FPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | act_cfg=dict(type='ReLU'), 37 | in_channels=[64, 128, 256], 38 | out_channels=256, 39 | start_level=0, 40 | num_outs=3), 41 | pts_bbox_head=dict( 42 | type='Anchor3DHead', 43 | num_classes=10, 44 | in_channels=256, 45 | feat_channels=256, 46 | use_direction_classifier=True, 47 | anchor_generator=dict( 48 | type='AlignedAnchor3DRangeGenerator', 49 | ranges=[[-50, -50, -1.8, 50, 50, -1.8]], 50 | scales=[1, 2, 4], 51 | sizes=[ 52 | [0.8660, 2.5981, 1.], # 1.5/sqrt(3) 53 | [0.5774, 1.7321, 1.], # 1/sqrt(3) 54 | [1., 1., 1.], 55 | [0.4, 0.4, 1], 56 | ], 57 | custom_values=[0, 0], 58 | rotations=[0, 1.57], 59 | reshape_out=True), 60 | assigner_per_size=False, 61 | diff_rad_by_sin=True, 62 | dir_offset=0.7854, # pi/4 63 | dir_limit_offset=0, 64 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9), 65 | loss_cls=dict( 66 | type='FocalLoss', 67 | use_sigmoid=True, 68 | gamma=2.0, 69 | alpha=0.25, 70 | loss_weight=1.0), 71 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 72 | loss_dir=dict( 73 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 74 | # model training and testing settings 75 | train_cfg=dict( 76 | pts=dict( 77 | assigner=dict( 78 | type='MaxIoUAssigner', 79 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 80 | pos_iou_thr=0.6, 81 | neg_iou_thr=0.3, 82 | min_pos_iou=0.3, 83 | ignore_iof_thr=-1), 84 | allowed_border=0, 85 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2], 86 | pos_weight=-1, 87 | debug=False)), 88 | test_cfg=dict( 89 | pts=dict( 90 | use_rotate_nms=True, 91 | nms_across_levels=False, 92 | nms_pre=1000, 93 | nms_thr=0.2, 94 | score_thr=0.05, 95 | min_bbox_size=0, 96 | max_num=500))) 97 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py: -------------------------------------------------------------------------------- 1 | _base_ = './hv_pointpillars_fpn_nus.py' 2 | 3 | # model settings (based on nuScenes model settings) 4 | # Voxel size for voxel encoder 5 | # Usually voxel size is changed consistently with the point cloud range 6 | # If point cloud range is modified, do remember to change all related 7 | # keys in the config. 8 | model = dict( 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-100, -100, -5, 100, 100, 3], 12 | max_voxels=(60000, 60000)), 13 | pts_voxel_encoder=dict( 14 | feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]), 15 | pts_middle_encoder=dict(output_shape=[800, 800]), 16 | pts_bbox_head=dict( 17 | num_classes=9, 18 | anchor_generator=dict( 19 | ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]), 20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)), 21 | # model training settings (based on nuScenes model settings) 22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]))) 23 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.16, 0.16, 4] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=32, # max_points_per_voxel 7 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000) # (training, testing) max_voxels 10 | ), 11 | voxel_encoder=dict( 12 | type='PillarFeatureNet', 13 | in_channels=4, 14 | feat_channels=[64], 15 | with_distance=False, 16 | voxel_size=voxel_size, 17 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]), 18 | middle_encoder=dict( 19 | type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=64, 23 | layer_nums=[3, 5, 5], 24 | layer_strides=[2, 2, 2], 25 | out_channels=[64, 128, 256]), 26 | neck=dict( 27 | type='SECONDFPN', 28 | in_channels=[64, 128, 256], 29 | upsample_strides=[1, 2, 4], 30 | out_channels=[128, 128, 128]), 31 | bbox_head=dict( 32 | type='Anchor3DHead', 33 | num_classes=3, 34 | in_channels=384, 35 | feat_channels=384, 36 | use_direction_classifier=True, 37 | anchor_generator=dict( 38 | type='Anchor3DRangeGenerator', 39 | ranges=[ 40 | [0, -39.68, -0.6, 70.4, 39.68, -0.6], 41 | [0, -39.68, -0.6, 70.4, 39.68, -0.6], 42 | [0, -39.68, -1.78, 70.4, 39.68, -1.78], 43 | ], 44 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], 45 | rotations=[0, 1.57], 46 | reshape_out=False), 47 | diff_rad_by_sin=True, 48 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 49 | loss_cls=dict( 50 | type='FocalLoss', 51 | use_sigmoid=True, 52 | gamma=2.0, 53 | alpha=0.25, 54 | loss_weight=1.0), 55 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 56 | loss_dir=dict( 57 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 58 | # model training and testing settings 59 | train_cfg=dict( 60 | assigner=[ 61 | dict( # for Pedestrian 62 | type='MaxIoUAssigner', 63 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 64 | pos_iou_thr=0.5, 65 | neg_iou_thr=0.35, 66 | min_pos_iou=0.35, 67 | ignore_iof_thr=-1), 68 | dict( # for Cyclist 69 | type='MaxIoUAssigner', 70 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 71 | pos_iou_thr=0.5, 72 | neg_iou_thr=0.35, 73 | min_pos_iou=0.35, 74 | ignore_iof_thr=-1), 75 | dict( # for Car 76 | type='MaxIoUAssigner', 77 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 78 | pos_iou_thr=0.6, 79 | neg_iou_thr=0.45, 80 | min_pos_iou=0.45, 81 | ignore_iof_thr=-1), 82 | ], 83 | allowed_border=0, 84 | pos_weight=-1, 85 | debug=False), 86 | test_cfg=dict( 87 | use_rotate_nms=True, 88 | nms_across_levels=False, 89 | nms_thr=0.01, 90 | score_thr=0.1, 91 | min_bbox_size=0, 92 | nms_pre=100, 93 | max_num=50)) 94 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.32, 0.32, 6] 7 | model = dict( 8 | type='MVXFasterRCNN', 9 | pts_voxel_layer=dict( 10 | max_num_points=20, 11 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(32000, 32000)), 14 | pts_voxel_encoder=dict( 15 | type='HardVFE', 16 | in_channels=5, 17 | feat_channels=[64], 18 | with_distance=False, 19 | voxel_size=voxel_size, 20 | with_cluster_center=True, 21 | with_voxel_center=True, 22 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4], 23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)), 24 | pts_middle_encoder=dict( 25 | type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]), 26 | pts_backbone=dict( 27 | type='SECOND', 28 | in_channels=64, 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | layer_nums=[3, 5, 5], 31 | layer_strides=[1, 2, 2], 32 | out_channels=[64, 128, 256]), 33 | pts_neck=dict( 34 | type='SECONDFPN', 35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 36 | in_channels=[64, 128, 256], 37 | upsample_strides=[1, 2, 4], 38 | out_channels=[128, 128, 128]), 39 | pts_bbox_head=dict( 40 | type='Anchor3DHead', 41 | num_classes=3, 42 | in_channels=384, 43 | feat_channels=384, 44 | use_direction_classifier=True, 45 | anchor_generator=dict( 46 | type='AlignedAnchor3DRangeGenerator', 47 | ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345], 48 | [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188], 49 | [-74.88, -74.88, 0, 74.88, 74.88, 0]], 50 | sizes=[ 51 | [2.08, 4.73, 1.77], # car 52 | [0.84, 1.81, 1.77], # cyclist 53 | [0.84, 0.91, 1.74] # pedestrian 54 | ], 55 | rotations=[0, 1.57], 56 | reshape_out=False), 57 | diff_rad_by_sin=True, 58 | dir_offset=0.7854, # pi/4 59 | dir_limit_offset=0, 60 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 61 | loss_cls=dict( 62 | type='FocalLoss', 63 | use_sigmoid=True, 64 | gamma=2.0, 65 | alpha=0.25, 66 | loss_weight=1.0), 67 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 68 | loss_dir=dict( 69 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 70 | # model training and testing settings 71 | train_cfg=dict( 72 | pts=dict( 73 | assigner=[ 74 | dict( # car 75 | type='MaxIoUAssigner', 76 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 77 | pos_iou_thr=0.55, 78 | neg_iou_thr=0.4, 79 | min_pos_iou=0.4, 80 | ignore_iof_thr=-1), 81 | dict( # cyclist 82 | type='MaxIoUAssigner', 83 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.3, 86 | min_pos_iou=0.3, 87 | ignore_iof_thr=-1), 88 | dict( # pedestrian 89 | type='MaxIoUAssigner', 90 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 91 | pos_iou_thr=0.5, 92 | neg_iou_thr=0.3, 93 | min_pos_iou=0.3, 94 | ignore_iof_thr=-1), 95 | ], 96 | allowed_border=0, 97 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 98 | pos_weight=-1, 99 | debug=False)), 100 | test_cfg=dict( 101 | pts=dict( 102 | use_rotate_nms=True, 103 | nms_across_levels=False, 104 | nms_pre=4096, 105 | nms_thr=0.25, 106 | score_thr=0.1, 107 | min_bbox_size=0, 108 | max_num=500))) 109 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_second_secfpn_kitti.py: -------------------------------------------------------------------------------- 1 | voxel_size = [0.05, 0.05, 0.1] 2 | 3 | model = dict( 4 | type='VoxelNet', 5 | voxel_layer=dict( 6 | max_num_points=5, 7 | point_cloud_range=[0, -40, -3, 70.4, 40, 1], 8 | voxel_size=voxel_size, 9 | max_voxels=(16000, 40000)), 10 | voxel_encoder=dict(type='HardSimpleVFE'), 11 | middle_encoder=dict( 12 | type='SparseEncoder', 13 | in_channels=4, 14 | sparse_shape=[41, 1600, 1408], 15 | order=('conv', 'norm', 'act')), 16 | backbone=dict( 17 | type='SECOND', 18 | in_channels=256, 19 | layer_nums=[5, 5], 20 | layer_strides=[1, 2], 21 | out_channels=[128, 256]), 22 | neck=dict( 23 | type='SECONDFPN', 24 | in_channels=[128, 256], 25 | upsample_strides=[1, 2], 26 | out_channels=[256, 256]), 27 | bbox_head=dict( 28 | type='Anchor3DHead', 29 | num_classes=3, 30 | in_channels=512, 31 | feat_channels=512, 32 | use_direction_classifier=True, 33 | anchor_generator=dict( 34 | type='Anchor3DRangeGenerator', 35 | ranges=[ 36 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 37 | [0, -40.0, -0.6, 70.4, 40.0, -0.6], 38 | [0, -40.0, -1.78, 70.4, 40.0, -1.78], 39 | ], 40 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]], 41 | rotations=[0, 1.57], 42 | reshape_out=False), 43 | diff_rad_by_sin=True, 44 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), 45 | loss_cls=dict( 46 | type='FocalLoss', 47 | use_sigmoid=True, 48 | gamma=2.0, 49 | alpha=0.25, 50 | loss_weight=1.0), 51 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), 52 | loss_dir=dict( 53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 54 | # model training and testing settings 55 | train_cfg=dict( 56 | assigner=[ 57 | dict( # for Pedestrian 58 | type='MaxIoUAssigner', 59 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 60 | pos_iou_thr=0.35, 61 | neg_iou_thr=0.2, 62 | min_pos_iou=0.2, 63 | ignore_iof_thr=-1), 64 | dict( # for Cyclist 65 | type='MaxIoUAssigner', 66 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 67 | pos_iou_thr=0.35, 68 | neg_iou_thr=0.2, 69 | min_pos_iou=0.2, 70 | ignore_iof_thr=-1), 71 | dict( # for Car 72 | type='MaxIoUAssigner', 73 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 74 | pos_iou_thr=0.6, 75 | neg_iou_thr=0.45, 76 | min_pos_iou=0.45, 77 | ignore_iof_thr=-1), 78 | ], 79 | allowed_border=0, 80 | pos_weight=-1, 81 | debug=False), 82 | test_cfg=dict( 83 | use_rotate_nms=True, 84 | nms_across_levels=False, 85 | nms_thr=0.01, 86 | score_thr=0.1, 87 | min_bbox_size=0, 88 | nms_pre=100, 89 | max_num=50)) 90 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/hv_second_secfpn_waymo.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | # Voxel size for voxel encoder 3 | # Usually voxel size is changed consistently with the point cloud range 4 | # If point cloud range is modified, do remember to change all related 5 | # keys in the config. 6 | voxel_size = [0.08, 0.08, 0.1] 7 | model = dict( 8 | type='VoxelNet', 9 | voxel_layer=dict( 10 | max_num_points=10, 11 | point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4], 12 | voxel_size=voxel_size, 13 | max_voxels=(80000, 90000)), 14 | voxel_encoder=dict(type='HardSimpleVFE', num_features=5), 15 | middle_encoder=dict( 16 | type='SparseEncoder', 17 | in_channels=5, 18 | sparse_shape=[61, 1280, 1920], 19 | order=('conv', 'norm', 'act')), 20 | backbone=dict( 21 | type='SECOND', 22 | in_channels=384, 23 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 24 | layer_nums=[5, 5], 25 | layer_strides=[1, 2], 26 | out_channels=[128, 256]), 27 | neck=dict( 28 | type='SECONDFPN', 29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01), 30 | in_channels=[128, 256], 31 | upsample_strides=[1, 2], 32 | out_channels=[256, 256]), 33 | bbox_head=dict( 34 | type='Anchor3DHead', 35 | num_classes=3, 36 | in_channels=512, 37 | feat_channels=512, 38 | use_direction_classifier=True, 39 | anchor_generator=dict( 40 | type='AlignedAnchor3DRangeGenerator', 41 | ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345], 42 | [-76.8, -51.2, 0, 76.8, 51.2, 0], 43 | [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]], 44 | sizes=[ 45 | [2.08, 4.73, 1.77], # car 46 | [0.84, 0.91, 1.74], # pedestrian 47 | [0.84, 1.81, 1.77] # cyclist 48 | ], 49 | rotations=[0, 1.57], 50 | reshape_out=False), 51 | diff_rad_by_sin=True, 52 | dir_offset=0.7854, # pi/4 53 | dir_limit_offset=0, 54 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7), 55 | loss_cls=dict( 56 | type='FocalLoss', 57 | use_sigmoid=True, 58 | gamma=2.0, 59 | alpha=0.25, 60 | loss_weight=1.0), 61 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), 62 | loss_dir=dict( 63 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)), 64 | # model training and testing settings 65 | train_cfg=dict( 66 | assigner=[ 67 | dict( # car 68 | type='MaxIoUAssigner', 69 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 70 | pos_iou_thr=0.55, 71 | neg_iou_thr=0.4, 72 | min_pos_iou=0.4, 73 | ignore_iof_thr=-1), 74 | dict( # pedestrian 75 | type='MaxIoUAssigner', 76 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 77 | pos_iou_thr=0.5, 78 | neg_iou_thr=0.3, 79 | min_pos_iou=0.3, 80 | ignore_iof_thr=-1), 81 | dict( # cyclist 82 | type='MaxIoUAssigner', 83 | iou_calculator=dict(type='BboxOverlapsNearest3D'), 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.3, 86 | min_pos_iou=0.3, 87 | ignore_iof_thr=-1) 88 | ], 89 | allowed_border=0, 90 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 91 | pos_weight=-1, 92 | debug=False), 93 | test_cfg=dict( 94 | use_rotate_nms=True, 95 | nms_across_levels=False, 96 | nms_pre=4096, 97 | nms_thr=0.25, 98 | score_thr=0.1, 99 | min_bbox_size=0, 100 | max_num=500)) 101 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/imvotenet_image.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='ImVoteNet', 3 | img_backbone=dict( 4 | type='ResNet', 5 | depth=50, 6 | num_stages=4, 7 | out_indices=(0, 1, 2, 3), 8 | frozen_stages=1, 9 | norm_cfg=dict(type='BN', requires_grad=False), 10 | norm_eval=True, 11 | style='caffe'), 12 | img_neck=dict( 13 | type='FPN', 14 | in_channels=[256, 512, 1024, 2048], 15 | out_channels=256, 16 | num_outs=5), 17 | img_rpn_head=dict( 18 | type='RPNHead', 19 | in_channels=256, 20 | feat_channels=256, 21 | anchor_generator=dict( 22 | type='AnchorGenerator', 23 | scales=[8], 24 | ratios=[0.5, 1.0, 2.0], 25 | strides=[4, 8, 16, 32, 64]), 26 | bbox_coder=dict( 27 | type='DeltaXYWHBBoxCoder', 28 | target_means=[.0, .0, .0, .0], 29 | target_stds=[1.0, 1.0, 1.0, 1.0]), 30 | loss_cls=dict( 31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 33 | img_roi_head=dict( 34 | type='StandardRoIHead', 35 | bbox_roi_extractor=dict( 36 | type='SingleRoIExtractor', 37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 38 | out_channels=256, 39 | featmap_strides=[4, 8, 16, 32]), 40 | bbox_head=dict( 41 | type='Shared2FCBBoxHead', 42 | in_channels=256, 43 | fc_out_channels=1024, 44 | roi_feat_size=7, 45 | num_classes=10, 46 | bbox_coder=dict( 47 | type='DeltaXYWHBBoxCoder', 48 | target_means=[0., 0., 0., 0.], 49 | target_stds=[0.1, 0.1, 0.2, 0.2]), 50 | reg_class_agnostic=False, 51 | loss_cls=dict( 52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))), 54 | 55 | # model training and testing settings 56 | train_cfg=dict( 57 | img_rpn=dict( 58 | assigner=dict( 59 | type='MaxIoUAssigner', 60 | pos_iou_thr=0.7, 61 | neg_iou_thr=0.3, 62 | min_pos_iou=0.3, 63 | match_low_quality=True, 64 | ignore_iof_thr=-1), 65 | sampler=dict( 66 | type='RandomSampler', 67 | num=256, 68 | pos_fraction=0.5, 69 | neg_pos_ub=-1, 70 | add_gt_as_proposals=False), 71 | allowed_border=-1, 72 | pos_weight=-1, 73 | debug=False), 74 | img_rpn_proposal=dict( 75 | nms_across_levels=False, 76 | nms_pre=2000, 77 | nms_post=1000, 78 | max_per_img=1000, 79 | nms=dict(type='nms', iou_threshold=0.7), 80 | min_bbox_size=0), 81 | img_rcnn=dict( 82 | assigner=dict( 83 | type='MaxIoUAssigner', 84 | pos_iou_thr=0.5, 85 | neg_iou_thr=0.5, 86 | min_pos_iou=0.5, 87 | match_low_quality=False, 88 | ignore_iof_thr=-1), 89 | sampler=dict( 90 | type='RandomSampler', 91 | num=512, 92 | pos_fraction=0.25, 93 | neg_pos_ub=-1, 94 | add_gt_as_proposals=True), 95 | pos_weight=-1, 96 | debug=False)), 97 | test_cfg=dict( 98 | img_rpn=dict( 99 | nms_across_levels=False, 100 | nms_pre=1000, 101 | nms_post=1000, 102 | max_per_img=1000, 103 | nms=dict(type='nms', iou_threshold=0.7), 104 | min_bbox_size=0), 105 | img_rcnn=dict( 106 | score_thr=0.05, 107 | nms=dict(type='nms', iou_threshold=0.5), 108 | max_per_img=100))) 109 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/mask_rcnn_r50_fpn.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='MaskRCNN', 4 | pretrained='torchvision://resnet50', 5 | backbone=dict( 6 | type='ResNet', 7 | depth=50, 8 | num_stages=4, 9 | out_indices=(0, 1, 2, 3), 10 | frozen_stages=1, 11 | norm_cfg=dict(type='BN', requires_grad=True), 12 | norm_eval=True, 13 | style='pytorch'), 14 | neck=dict( 15 | type='FPN', 16 | in_channels=[256, 512, 1024, 2048], 17 | out_channels=256, 18 | num_outs=5), 19 | rpn_head=dict( 20 | type='RPNHead', 21 | in_channels=256, 22 | feat_channels=256, 23 | anchor_generator=dict( 24 | type='AnchorGenerator', 25 | scales=[8], 26 | ratios=[0.5, 1.0, 2.0], 27 | strides=[4, 8, 16, 32, 64]), 28 | bbox_coder=dict( 29 | type='DeltaXYWHBBoxCoder', 30 | target_means=[.0, .0, .0, .0], 31 | target_stds=[1.0, 1.0, 1.0, 1.0]), 32 | loss_cls=dict( 33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 35 | roi_head=dict( 36 | type='StandardRoIHead', 37 | bbox_roi_extractor=dict( 38 | type='SingleRoIExtractor', 39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), 40 | out_channels=256, 41 | featmap_strides=[4, 8, 16, 32]), 42 | bbox_head=dict( 43 | type='Shared2FCBBoxHead', 44 | in_channels=256, 45 | fc_out_channels=1024, 46 | roi_feat_size=7, 47 | num_classes=80, 48 | bbox_coder=dict( 49 | type='DeltaXYWHBBoxCoder', 50 | target_means=[0., 0., 0., 0.], 51 | target_stds=[0.1, 0.1, 0.2, 0.2]), 52 | reg_class_agnostic=False, 53 | loss_cls=dict( 54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), 55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)), 56 | mask_roi_extractor=dict( 57 | type='SingleRoIExtractor', 58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), 59 | out_channels=256, 60 | featmap_strides=[4, 8, 16, 32]), 61 | mask_head=dict( 62 | type='FCNMaskHead', 63 | num_convs=4, 64 | in_channels=256, 65 | conv_out_channels=256, 66 | num_classes=80, 67 | loss_mask=dict( 68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), 69 | # model training and testing settings 70 | train_cfg=dict( 71 | rpn=dict( 72 | assigner=dict( 73 | type='MaxIoUAssigner', 74 | pos_iou_thr=0.7, 75 | neg_iou_thr=0.3, 76 | min_pos_iou=0.3, 77 | match_low_quality=True, 78 | ignore_iof_thr=-1), 79 | sampler=dict( 80 | type='RandomSampler', 81 | num=256, 82 | pos_fraction=0.5, 83 | neg_pos_ub=-1, 84 | add_gt_as_proposals=False), 85 | allowed_border=-1, 86 | pos_weight=-1, 87 | debug=False), 88 | rpn_proposal=dict( 89 | nms_across_levels=False, 90 | nms_pre=2000, 91 | nms_post=1000, 92 | max_num=1000, 93 | nms_thr=0.7, 94 | min_bbox_size=0), 95 | rcnn=dict( 96 | assigner=dict( 97 | type='MaxIoUAssigner', 98 | pos_iou_thr=0.5, 99 | neg_iou_thr=0.5, 100 | min_pos_iou=0.5, 101 | match_low_quality=True, 102 | ignore_iof_thr=-1), 103 | sampler=dict( 104 | type='RandomSampler', 105 | num=512, 106 | pos_fraction=0.25, 107 | neg_pos_ub=-1, 108 | add_gt_as_proposals=True), 109 | mask_size=28, 110 | pos_weight=-1, 111 | debug=False)), 112 | test_cfg=dict( 113 | rpn=dict( 114 | nms_across_levels=False, 115 | nms_pre=1000, 116 | nms_post=1000, 117 | max_num=1000, 118 | nms_thr=0.7, 119 | min_bbox_size=0), 120 | rcnn=dict( 121 | score_thr=0.05, 122 | nms=dict(type='nms', iou_threshold=0.5), 123 | max_per_img=100, 124 | mask_thr_binary=0.5))) 125 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/paconv_cuda_ssg.py: -------------------------------------------------------------------------------- 1 | _base_ = './paconv_ssg.py' 2 | 3 | model = dict( 4 | backbone=dict( 5 | sa_cfg=dict( 6 | type='PAConvCUDASAModule', 7 | scorenet_cfg=dict(mlp_channels=[8, 16, 16])))) 8 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/paconv_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=9, # [xyz, rgb, normalized_xyz] 7 | num_points=(1024, 256, 64, 16), 8 | radius=(None, None, None, None), # use kNN instead of ball query 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d', momentum=0.1), 14 | sa_cfg=dict( 15 | type='PAConvSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False, 19 | paconv_num_kernels=[16, 16, 16], 20 | paconv_kernel_input='w_neighbor', 21 | scorenet_input='w_neighbor_dist', 22 | scorenet_cfg=dict( 23 | mlp_channels=[16, 16, 16], 24 | score_norm='softmax', 25 | temp_factor=1.0, 26 | last_bn=False))), 27 | decode_head=dict( 28 | type='PAConvHead', 29 | # PAConv model's decoder takes skip connections from beckbone 30 | # different from PointNet++, it also concats input features in the last 31 | # level of decoder, leading to `128 + 6` as the channel number 32 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 33 | (128 + 6, 128, 128, 128)), 34 | channels=128, 35 | dropout_ratio=0.5, 36 | conv_cfg=dict(type='Conv1d'), 37 | norm_cfg=dict(type='BN1d'), 38 | act_cfg=dict(type='ReLU'), 39 | loss_decode=dict( 40 | type='CrossEntropyLoss', 41 | use_sigmoid=False, 42 | class_weight=None, # should be modified with dataset 43 | loss_weight=1.0)), 44 | # correlation loss to regularize PAConv's kernel weights 45 | loss_regularization=dict( 46 | type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0), 47 | # model training and testing settings 48 | train_cfg=dict(), 49 | test_cfg=dict(mode='slide')) 50 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/pointnet2_msg.py: -------------------------------------------------------------------------------- 1 | _base_ = './pointnet2_ssg.py' 2 | 3 | # model settings 4 | model = dict( 5 | backbone=dict( 6 | _delete_=True, 7 | type='PointNet2SAMSG', 8 | in_channels=6, # [xyz, rgb], should be modified with dataset 9 | num_points=(1024, 256, 64, 16), 10 | radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)), 11 | num_samples=((16, 32), (16, 32), (16, 32), (16, 32)), 12 | sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96, 13 | 128)), 14 | ((128, 196, 256), (128, 196, 256)), ((256, 256, 512), 15 | (256, 384, 512))), 16 | aggregation_channels=(None, None, None, None), 17 | fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')), 18 | fps_sample_range_lists=((-1), (-1), (-1), (-1)), 19 | dilated_group=(False, False, False, False), 20 | out_indices=(0, 1, 2, 3), 21 | sa_cfg=dict( 22 | type='PointSAModuleMSG', 23 | pool_mod='max', 24 | use_xyz=True, 25 | normalize_xyz=False)), 26 | decode_head=dict( 27 | fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128), 28 | (128, 128, 128, 128)))) 29 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/pointnet2_ssg.py: -------------------------------------------------------------------------------- 1 | # model settings 2 | model = dict( 3 | type='EncoderDecoder3D', 4 | backbone=dict( 5 | type='PointNet2SASSG', 6 | in_channels=6, # [xyz, rgb], should be modified with dataset 7 | num_points=(1024, 256, 64, 16), 8 | radius=(0.1, 0.2, 0.4, 0.8), 9 | num_samples=(32, 32, 32, 32), 10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256, 11 | 512)), 12 | fp_channels=(), 13 | norm_cfg=dict(type='BN2d'), 14 | sa_cfg=dict( 15 | type='PointSAModule', 16 | pool_mod='max', 17 | use_xyz=True, 18 | normalize_xyz=False)), 19 | decode_head=dict( 20 | type='PointNet2Head', 21 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128), 22 | (128, 128, 128, 128)), 23 | channels=128, 24 | dropout_ratio=0.5, 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | act_cfg=dict(type='ReLU'), 28 | loss_decode=dict( 29 | type='CrossEntropyLoss', 30 | use_sigmoid=False, 31 | class_weight=None, # should be modified with dataset 32 | loss_weight=1.0)), 33 | # model training and testing settings 34 | train_cfg=dict(), 35 | test_cfg=dict(mode='slide')) 36 | -------------------------------------------------------------------------------- /projects/configs/_base_/models/votenet.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='VoteNet', 3 | backbone=dict( 4 | type='PointNet2SASSG', 5 | in_channels=4, 6 | num_points=(2048, 1024, 512, 256), 7 | radius=(0.2, 0.4, 0.8, 1.2), 8 | num_samples=(64, 32, 16, 16), 9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256), 10 | (128, 128, 256)), 11 | fp_channels=((256, 256), (256, 256)), 12 | norm_cfg=dict(type='BN2d'), 13 | sa_cfg=dict( 14 | type='PointSAModule', 15 | pool_mod='max', 16 | use_xyz=True, 17 | normalize_xyz=True)), 18 | bbox_head=dict( 19 | type='VoteHead', 20 | vote_module_cfg=dict( 21 | in_channels=256, 22 | vote_per_seed=1, 23 | gt_per_seed=3, 24 | conv_channels=(256, 256), 25 | conv_cfg=dict(type='Conv1d'), 26 | norm_cfg=dict(type='BN1d'), 27 | norm_feats=True, 28 | vote_loss=dict( 29 | type='ChamferDistance', 30 | mode='l1', 31 | reduction='none', 32 | loss_dst_weight=10.0)), 33 | vote_aggregation_cfg=dict( 34 | type='PointSAModule', 35 | num_point=256, 36 | radius=0.3, 37 | num_sample=16, 38 | mlp_channels=[256, 128, 128, 128], 39 | use_xyz=True, 40 | normalize_xyz=True), 41 | pred_layer_cfg=dict( 42 | in_channels=128, shared_conv_channels=(128, 128), bias=True), 43 | conv_cfg=dict(type='Conv1d'), 44 | norm_cfg=dict(type='BN1d'), 45 | objectness_loss=dict( 46 | type='CrossEntropyLoss', 47 | class_weight=[0.2, 0.8], 48 | reduction='sum', 49 | loss_weight=5.0), 50 | center_loss=dict( 51 | type='ChamferDistance', 52 | mode='l2', 53 | reduction='sum', 54 | loss_src_weight=10.0, 55 | loss_dst_weight=10.0), 56 | dir_class_loss=dict( 57 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 58 | dir_res_loss=dict( 59 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0), 60 | size_class_loss=dict( 61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), 62 | size_res_loss=dict( 63 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0), 64 | semantic_loss=dict( 65 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)), 66 | # model training and testing settings 67 | train_cfg=dict( 68 | pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'), 69 | test_cfg=dict( 70 | sample_mod='seed', 71 | nms_thr=0.25, 72 | score_thr=0.05, 73 | per_class_proposal=True)) 74 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cosine.py: -------------------------------------------------------------------------------- 1 | # This schedule is mainly used by models with dynamic voxelization 2 | # optimizer 3 | lr = 0.003 # max learning rate 4 | optimizer = dict( 5 | type='AdamW', 6 | lr=lr, 7 | betas=(0.95, 0.99), # the momentum is change during training 8 | weight_decay=0.001) 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 10 | 11 | lr_config = dict( 12 | policy='CosineAnnealing', 13 | warmup='linear', 14 | warmup_iters=1000, 15 | warmup_ratio=1.0 / 10, 16 | min_lr_ratio=1e-5) 17 | 18 | momentum_config = None 19 | 20 | runner = dict(type='EpochBasedRunner', max_epochs=40) 21 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cyclic_20e.py: -------------------------------------------------------------------------------- 1 | # For nuScenes dataset, we usually evaluate the model at the end of training. 2 | # Since the models are trained by 24 epochs by default, we set evaluation 3 | # interval to be 20. Please change the interval accordingly if you do not 4 | # use a default schedule. 5 | # optimizer 6 | # This schedule is mainly used by models on nuScenes dataset 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01) 8 | # max_norm=10 is better for SECOND 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 10 | lr_config = dict( 11 | policy='cyclic', 12 | target_ratio=(10, 1e-4), 13 | cyclic_times=1, 14 | step_ratio_up=0.4, 15 | ) 16 | momentum_config = dict( 17 | policy='cyclic', 18 | target_ratio=(0.85 / 0.95, 1), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | 23 | # runtime settings 24 | runner = dict(type='EpochBasedRunner', max_epochs=20) 25 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/cyclic_40e.py: -------------------------------------------------------------------------------- 1 | # The schedule is usually used by models trained on KITTI dataset 2 | 3 | # The learning rate set in the cyclic schedule is the initial learning rate 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4), 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4 6 | lr = 0.0018 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use 8 | # the offcial AdamW optimizer implemented by PyTorch. 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) 10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch 12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa 13 | # We implement them in mmcv, for more details, please refer to 14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa 15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa 16 | lr_config = dict( 17 | policy='cyclic', 18 | target_ratio=(10, 1e-4), 19 | cyclic_times=1, 20 | step_ratio_up=0.4, 21 | ) 22 | momentum_config = dict( 23 | policy='cyclic', 24 | target_ratio=(0.85 / 0.95, 1), 25 | cyclic_times=1, 26 | step_ratio_up=0.4, 27 | ) 28 | # Although the max_epochs is 40, this schedule is usually used we 29 | # RepeatDataset with repeat ratio N, thus the actual max epoch 30 | # number could be Nx40 31 | runner = dict(type='EpochBasedRunner', max_epochs=40) 32 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/mmdet_schedule_1x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) 3 | optimizer_config = dict(grad_clip=None) 4 | # learning policy 5 | lr_config = dict( 6 | policy='step', 7 | warmup='linear', 8 | warmup_iters=500, 9 | warmup_ratio=0.001, 10 | step=[8, 11]) 11 | runner = dict(type='EpochBasedRunner', max_epochs=12) 12 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/schedule_2x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on nuScenes dataset 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01) 4 | # max_norm=10 is better for SECOND 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 6 | lr_config = dict( 7 | policy='step', 8 | warmup='linear', 9 | warmup_iters=1000, 10 | warmup_ratio=1.0 / 1000, 11 | step=[20, 23]) 12 | momentum_config = None 13 | # runtime settings 14 | runner = dict(type='EpochBasedRunner', max_epochs=24) 15 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/schedule_3x.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used by models on indoor dataset, 3 | # e.g., VoteNet on SUNRGBD and ScanNet 4 | lr = 0.008 # max learning rate 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01) 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32]) 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=36) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_150e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=150) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_200e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on ScanNet dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=200) 10 | -------------------------------------------------------------------------------- /projects/configs/_base_/schedules/seg_cosine_50e.py: -------------------------------------------------------------------------------- 1 | # optimizer 2 | # This schedule is mainly used on S3DIS dataset in segmentation task 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001) 4 | optimizer_config = dict(grad_clip=None) 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5) 6 | momentum_config = None 7 | 8 | # runtime settings 9 | runner = dict(type='EpochBasedRunner', max_epochs=50) 10 | -------------------------------------------------------------------------------- /projects/configs/datasets/custom_lyft-3d.py: -------------------------------------------------------------------------------- 1 | # If point cloud range is changed, the models should also change their point 2 | # cloud range accordingly 3 | point_cloud_range = [-80, -80, -5, 80, 80, 3] 4 | # For Lyft we usually do 9-class detection 5 | class_names = [ 6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle', 7 | 'bicycle', 'pedestrian', 'animal' 8 | ] 9 | dataset_type = 'CustomLyftDataset' 10 | data_root = 'data/lyft/' 11 | # Input modality for Lyft dataset, this is consistent with the submission 12 | # format which requires the information in input_modality. 13 | input_modality = dict( 14 | use_lidar=True, 15 | use_camera=False, 16 | use_radar=False, 17 | use_map=False, 18 | use_external=True) 19 | file_client_args = dict(backend='disk') 20 | # Uncomment the following if use ceph or other file clients. 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 22 | # for more details. 23 | # file_client_args = dict( 24 | # backend='petrel', 25 | # path_mapping=dict({ 26 | # './data/lyft/': 's3://lyft/lyft/', 27 | # 'data/lyft/': 's3://lyft/lyft/' 28 | # })) 29 | train_pipeline = [ 30 | dict( 31 | type='LoadPointsFromFile', 32 | coord_type='LIDAR', 33 | load_dim=5, 34 | use_dim=5, 35 | file_client_args=file_client_args), 36 | dict( 37 | type='LoadPointsFromMultiSweeps', 38 | sweeps_num=10, 39 | file_client_args=file_client_args), 40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), 41 | dict( 42 | type='GlobalRotScaleTrans', 43 | rot_range=[-0.3925, 0.3925], 44 | scale_ratio_range=[0.95, 1.05], 45 | translation_std=[0, 0, 0]), 46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5), 47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), 48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 49 | dict(type='PointShuffle'), 50 | dict(type='DefaultFormatBundle3D', class_names=class_names), 51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) 52 | ] 53 | test_pipeline = [ 54 | dict( 55 | type='LoadPointsFromFile', 56 | coord_type='LIDAR', 57 | load_dim=5, 58 | use_dim=5, 59 | file_client_args=file_client_args), 60 | dict( 61 | type='LoadPointsFromMultiSweeps', 62 | sweeps_num=10, 63 | file_client_args=file_client_args), 64 | dict( 65 | type='MultiScaleFlipAug3D', 66 | img_scale=(1333, 800), 67 | pts_scale_ratio=1, 68 | flip=False, 69 | transforms=[ 70 | dict( 71 | type='GlobalRotScaleTrans', 72 | rot_range=[0, 0], 73 | scale_ratio_range=[1., 1.], 74 | translation_std=[0, 0, 0]), 75 | dict(type='RandomFlip3D'), 76 | dict( 77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range), 78 | dict( 79 | type='DefaultFormatBundle3D', 80 | class_names=class_names, 81 | with_label=False), 82 | dict(type='Collect3D', keys=['points']) 83 | ]) 84 | ] 85 | # construct a pipeline for data and gt loading in show function 86 | # please keep its loading function consistent with test_pipeline (e.g. client) 87 | eval_pipeline = [ 88 | dict( 89 | type='LoadPointsFromFile', 90 | coord_type='LIDAR', 91 | load_dim=5, 92 | use_dim=5, 93 | file_client_args=file_client_args), 94 | dict( 95 | type='LoadPointsFromMultiSweeps', 96 | sweeps_num=10, 97 | file_client_args=file_client_args), 98 | dict( 99 | type='DefaultFormatBundle3D', 100 | class_names=class_names, 101 | with_label=False), 102 | dict(type='Collect3D', keys=['points']) 103 | ] 104 | 105 | data = dict( 106 | samples_per_gpu=2, 107 | workers_per_gpu=2, 108 | train=dict( 109 | type=dataset_type, 110 | data_root=data_root, 111 | ann_file=data_root + 'lyft_infos_train.pkl', 112 | pipeline=train_pipeline, 113 | classes=class_names, 114 | modality=input_modality, 115 | test_mode=False), 116 | val=dict( 117 | type=dataset_type, 118 | data_root=data_root, 119 | ann_file=data_root + 'lyft_infos_val.pkl', 120 | pipeline=test_pipeline, 121 | classes=class_names, 122 | modality=input_modality, 123 | test_mode=True), 124 | test=dict( 125 | type=dataset_type, 126 | data_root=data_root, 127 | ann_file=data_root + 'lyft_infos_val.pkl', 128 | pipeline=test_pipeline, 129 | classes=class_names, 130 | modality=input_modality, 131 | test_mode=True)) 132 | # For Lyft dataset, we usually evaluate the model at the end of training. 133 | # Since the models are trained by 24 epochs by default, we set evaluation 134 | # interval to be 24. Please change the interval accordingly if you do not 135 | # use a default schedule. 136 | evaluation = dict(interval=24, pipeline=eval_pipeline) -------------------------------------------------------------------------------- /projects/configs/datasets/custom_waymo-3d.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | # D5 in the config name means the whole dataset is divided into 5 folds 3 | # We only use one fold for efficient experiments 4 | dataset_type = 'CustomWaymoDataset' 5 | data_root = 'data/waymo/kitti_format/' 6 | file_client_args = dict(backend='disk') 7 | # Uncomment the following if use ceph or other file clients. 8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient 9 | # for more details. 10 | # file_client_args = dict( 11 | # backend='petrel', path_mapping=dict(data='s3://waymo_data/')) 12 | 13 | img_norm_cfg = dict( 14 | mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) 15 | class_names = ['Car', 'Pedestrian', 'Cyclist'] 16 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4] 17 | input_modality = dict(use_lidar=False, use_camera=True) 18 | db_sampler = dict( 19 | data_root=data_root, 20 | info_path=data_root + 'waymo_dbinfos_train.pkl', 21 | rate=1.0, 22 | prepare=dict( 23 | filter_by_difficulty=[-1], 24 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)), 25 | classes=class_names, 26 | sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10), 27 | points_loader=dict( 28 | type='LoadPointsFromFile', 29 | coord_type='LIDAR', 30 | load_dim=5, 31 | use_dim=[0, 1, 2, 3, 4], 32 | file_client_args=file_client_args)) 33 | 34 | 35 | 36 | train_pipeline = [ 37 | dict(type='LoadMultiViewImageFromFiles', to_float32=True), 38 | dict(type='PhotoMetricDistortionMultiViewImage'), 39 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_attr_label=False), 40 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), 41 | dict(type='ObjectNameFilter', classes=class_names), 42 | dict(type='NormalizeMultiviewImage', **img_norm_cfg), 43 | dict(type='PadMultiViewImage', size_divisor=32), 44 | dict(type='DefaultFormatBundle3D', class_names=class_names), 45 | dict(type='CustomCollect3D', keys=['gt_bboxes_3d', 'gt_labels_3d', 'img']) 46 | ] 47 | 48 | 49 | test_pipeline = [ 50 | dict(type='LoadMultiViewImageFromFiles', to_float32=True), 51 | dict(type='NormalizeMultiviewImage', **img_norm_cfg), 52 | dict(type='PadMultiViewImage', size_divisor=32), 53 | dict( 54 | type='MultiScaleFlipAug3D', 55 | img_scale=(1920, 1280), 56 | pts_scale_ratio=1, 57 | flip=False, 58 | transforms=[ 59 | dict( 60 | type='DefaultFormatBundle3D', 61 | class_names=class_names, 62 | with_label=False), 63 | dict(type='CustomCollect3D', keys=['img']) 64 | ]) 65 | ] 66 | 67 | 68 | # construct a pipeline for data and gt loading in show function 69 | # please keep its loading function consistent with test_pipeline (e.g. client) 70 | 71 | data = dict( 72 | samples_per_gpu=2, 73 | workers_per_gpu=4, 74 | train=dict( 75 | type='RepeatDataset', 76 | times=2, 77 | dataset=dict( 78 | type=dataset_type, 79 | data_root=data_root, 80 | ann_file=data_root + 'waymo_infos_train.pkl', 81 | split='training', 82 | pipeline=train_pipeline, 83 | modality=input_modality, 84 | classes=class_names, 85 | test_mode=False, 86 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset 87 | # and box_type_3d='Depth' in sunrgbd and scannet dataset. 88 | box_type_3d='LiDAR', 89 | # load one frame every five frames 90 | load_interval=5)), 91 | val=dict( 92 | type=dataset_type, 93 | data_root=data_root, 94 | ann_file=data_root + 'waymo_infos_val.pkl', 95 | split='training', 96 | pipeline=test_pipeline, 97 | modality=input_modality, 98 | classes=class_names, 99 | test_mode=True, 100 | box_type_3d='LiDAR'), 101 | test=dict( 102 | type=dataset_type, 103 | data_root=data_root, 104 | ann_file=data_root + 'waymo_infos_val.pkl', 105 | split='training', 106 | pipeline=test_pipeline, 107 | modality=input_modality, 108 | classes=class_names, 109 | test_mode=True, 110 | box_type_3d='LiDAR')) 111 | 112 | evaluation = dict(interval=24, pipeline=test_pipeline) -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/__init__.py: -------------------------------------------------------------------------------- 1 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D 2 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder 3 | from .core.bbox.match_costs import BBox3DL1Cost 4 | from .core.evaluation.eval_hooks import CustomDistEvalHook 5 | from .datasets.pipelines import ( 6 | PhotoMetricDistortionMultiViewImage, PadMultiViewImage, 7 | NormalizeMultiviewImage, CustomCollect3D) 8 | from .models.backbones.vovnet import VoVNet 9 | from .models.utils import * 10 | from .models.opt.adamw import AdamW2 11 | from .models.hooks import * 12 | from .bevformer import * 13 | from .metrics import * 14 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .test import multi_gpu_test, single_gpu_test 2 | 3 | __all__ = [ 4 | 'multi_gpu_test', 'single_gpu_test' 5 | ] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .dense_heads import * 3 | from .detectors import * 4 | from .modules import * 5 | from .runner import * 6 | from .hooks import * 7 | from .loss import * -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .train import custom_train_model 2 | from .mmdet_train import custom_train_detector 3 | # from .test import custom_multi_gpu_test -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/apis/train.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------- 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | # --------------------------------------------- 4 | # Modified by Zhiqi Li 5 | # --------------------------------------------- 6 | 7 | from .mmdet_train import custom_train_detector 8 | from mmseg.apis import train_segmentor 9 | from mmdet.apis import train_detector 10 | 11 | def custom_train_model(model, 12 | dataset, 13 | cfg, 14 | distributed=False, 15 | validate=False, 16 | timestamp=None, 17 | eval_model=None, 18 | meta=None): 19 | """A function wrapper for launching model training according to cfg. 20 | 21 | Because we need different eval_hook in runner. Should be deprecated in the 22 | future. 23 | """ 24 | if cfg.model.type in ['EncoderDecoder3D']: 25 | assert False 26 | else: 27 | custom_train_detector( 28 | model, 29 | dataset, 30 | cfg, 31 | distributed=distributed, 32 | validate=validate, 33 | timestamp=timestamp, 34 | eval_model=eval_model, 35 | meta=meta) 36 | 37 | 38 | def train_model(model, 39 | dataset, 40 | cfg, 41 | distributed=False, 42 | validate=False, 43 | timestamp=None, 44 | meta=None): 45 | """A function wrapper for launching model training according to cfg. 46 | 47 | Because we need different eval_hook in runner. Should be deprecated in the 48 | future. 49 | """ 50 | if cfg.model.type in ['EncoderDecoder3D']: 51 | train_segmentor( 52 | model, 53 | dataset, 54 | cfg, 55 | distributed=distributed, 56 | validate=validate, 57 | timestamp=timestamp, 58 | meta=meta) 59 | else: 60 | train_detector( 61 | model, 62 | dataset, 63 | cfg, 64 | distributed=distributed, 65 | validate=validate, 66 | timestamp=timestamp, 67 | meta=meta) 68 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer_head import BEVFormerHead 2 | from .bevformer_seg_head import BEVFormerHead_seg -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .bevformer import BEVFormer 2 | from .bevformer_fp16 import BEVFormer_fp16 -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/detectors/bevformer_fp16.py: -------------------------------------------------------------------------------- 1 | # --------------------------------------------- 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | # --------------------------------------------- 4 | # Modified by Zhiqi Li 5 | # --------------------------------------------- 6 | 7 | from tkinter.messagebox import NO 8 | import torch 9 | from mmcv.runner import force_fp32, auto_fp16 10 | from mmdet.models import DETECTORS 11 | from mmdet3d.core import bbox3d2result 12 | from mmdet3d.models.detectors.mvx_two_stage import MVXTwoStageDetector 13 | from projects.mmdet3d_plugin.models.utils.grid_mask import GridMask 14 | from projects.mmdet3d_plugin.bevformer.detectors.bevformer import BEVFormer 15 | import time 16 | import copy 17 | import numpy as np 18 | import mmdet3d 19 | from projects.mmdet3d_plugin.models.utils.bricks import run_time 20 | 21 | 22 | @DETECTORS.register_module() 23 | class BEVFormer_fp16(BEVFormer): 24 | """ 25 | The default version BEVFormer currently can not support FP16. 26 | We provide this version to resolve this issue. 27 | """ 28 | 29 | @auto_fp16(apply_to=('img', 'prev_bev', 'points')) 30 | def forward_train(self, 31 | points=None, 32 | img_metas=None, 33 | gt_bboxes_3d=None, 34 | gt_labels_3d=None, 35 | semantic_indices=None, 36 | gt_labels=None, 37 | gt_bboxes=None, 38 | img=None, 39 | proposals=None, 40 | gt_bboxes_ignore=None, 41 | img_depth=None, 42 | img_mask=None, 43 | prev_bev=None, 44 | ): 45 | """Forward training function. 46 | Args: 47 | points (list[torch.Tensor], optional): Points of each sample. 48 | Defaults to None. 49 | img_metas (list[dict], optional): Meta information of each sample. 50 | Defaults to None. 51 | gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`], optional): 52 | Ground truth 3D boxes. Defaults to None. 53 | gt_labels_3d (list[torch.Tensor], optional): Ground truth labels 54 | of 3D boxes. Defaults to None. 55 | gt_labels (list[torch.Tensor], optional): Ground truth labels 56 | of 2D boxes in images. Defaults to None. 57 | gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in 58 | images. Defaults to None. 59 | img (torch.Tensor optional): Images of each sample with shape 60 | (N, C, H, W). Defaults to None. 61 | proposals ([list[torch.Tensor], optional): Predicted proposals 62 | used for training Fast RCNN. Defaults to None. 63 | gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth 64 | 2D boxes in images to be ignored. Defaults to None. 65 | Returns: 66 | dict: Losses of different branches. 67 | """ 68 | 69 | img_feats = self.extract_feat(img=img, img_metas=img_metas) 70 | 71 | losses = dict() 72 | losses_pts = self.forward_pts_train(img_feats, gt_bboxes_3d, 73 | gt_labels_3d, img_metas, 74 | gt_bboxes_ignore, prev_bev=prev_bev) 75 | losses.update(losses_pts) 76 | return losses 77 | 78 | 79 | def val_step(self, data, optimizer): 80 | """ 81 | In BEVFormer_fp16, we use this `val_step` function to inference the `prev_pev`. 82 | This is not the standard function of `val_step`. 83 | """ 84 | 85 | img = data['img'] 86 | img_metas = data['img_metas'] 87 | img_feats = self.extract_feat(img=img, img_metas=img_metas) 88 | prev_bev = data.get('prev_bev', None) 89 | prev_bev = self.pts_bbox_head(img_feats, img_metas, prev_bev=prev_bev, only_bev=True) 90 | return prev_bev -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom_hooks import TransferWeight -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.hooks.hook import HOOKS, Hook 2 | from projects.mmdet3d_plugin.models.utils import run_time 3 | 4 | 5 | @HOOKS.register_module() 6 | class TransferWeight(Hook): 7 | 8 | def __init__(self, every_n_inters=1): 9 | self.every_n_inters=every_n_inters 10 | 11 | def after_train_iter(self, runner): 12 | if self.every_n_inner_iters(runner, self.every_n_inters): 13 | runner.eval_model.load_state_dict(runner.model.state_dict()) 14 | 15 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .loss import SimpleLoss -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .transformer import PerceptionTransformer 2 | from .spatial_cross_attention import SpatialCrossAttention, MSDeformableAttention3D 3 | from .temporal_self_attention import TemporalSelfAttention 4 | from .encoder import BEVFormerEncoder, BEVFormerLayer 5 | from .decoder import DetectionTransformerDecoder 6 | from .seg_subnet import SegEncode, DeconvEncode, SegEncode_v1 7 | from .TransformerLSS import TransformerLSS 8 | 9 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | SEG_ENCODER = Registry('seg_encoder') 4 | 5 | def build_seg_encoder(cfg, **default_args): 6 | """Builder of box sampler.""" 7 | return build_from_cfg(cfg, SEG_ENCODER, default_args) 8 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/seg_sugnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from torchvision.models.resnet import resnet18 5 | from ..modules.builder import SEG_ENCODER 6 | 7 | class Up(nn.Module): 8 | def __init__(self, in_channels, out_channels, scale_factor=2): 9 | super().__init__() 10 | 11 | self.up = nn.Upsample(scale_factor=scale_factor, mode='bilinear', 12 | align_corners=True) 13 | 14 | self.conv = nn.Sequential( 15 | nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False), 16 | nn.BatchNorm2d(out_channels), 17 | nn.ReLU(inplace=True), 18 | nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False), 19 | nn.BatchNorm2d(out_channels), 20 | nn.ReLU(inplace=True) 21 | ) 22 | 23 | def forward(self, x1, x2): 24 | x1 = self.up(x1) 25 | x1 = torch.cat([x2, x1], dim=1) #相当与通道维度上连接,以弥补因为使用mb导致的卷积信息丢失。 26 | return self.conv(x1) 27 | 28 | @SEG_ENCODER.register_module() 29 | class SegEncode(nn.Module): 30 | def __init__(self, inC, outC, size): 31 | super(SegEncode, self).__init__() 32 | trunk = resnet18(pretrained=False, zero_init_residual=True) 33 | self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3, bias=False) 34 | self.bn1 = trunk.bn1 35 | self.relu = trunk.relu 36 | self.up_sampler = nn.Upsample(size=size, mode='bilinear', align_corners=True) 37 | self.layer1 = trunk.layer1 38 | self.layer2 = trunk.layer2 39 | self.layer3 = trunk.layer3 40 | 41 | self.up1 = Up(64 + 256, 256, scale_factor=4) 42 | self.up2 = nn.Sequential( 43 | nn.Upsample(scale_factor=2, mode='bilinear', 44 | align_corners=True), 45 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False), 46 | nn.BatchNorm2d(128), 47 | nn.ReLU(inplace=True), 48 | nn.Conv2d(128, outC, kernel_size=1, padding=0), 49 | ) 50 | 51 | def forward(self, x): #torch.Size([2, 256, 200, 400]) 52 | x = self.up_sampler(x) 53 | x = self.conv1(x) #torch.Size([2, 64, 200, 400]) 54 | x = self.bn1(x) 55 | x = self.relu(x) 56 | 57 | x1 = self.layer1(x) #torch.Size([2, 64, 100, 200]) 58 | x = self.layer2(x1) #torch.Size([2, 128, 50, 100]) 59 | x2 = self.layer3(x) #torch.Size([2, 256, 25, 50]) 60 | 61 | x = self.up1(x2, x1) #torch.Size([2, 256, 100, 200]) 62 | x = self.up2(x) #torch.Size([2, 4, 200, 400]) 语义分割预测特征图 63 | 64 | return x 65 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/modules/seg_sunet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from torchvision.models.resnet import resnet18 5 | from ..modules.builder import SEG_ENCODER 6 | 7 | class Up(nn.Module): 8 | def __init__(self, in_channels, out_channels, scale_factor=2): 9 | super().__init__() 10 | 11 | self.up = nn.Upsample(scale_factor=scale_factor, mode='bilinear', 12 | align_corners=True) 13 | 14 | self.conv = nn.Sequential( 15 | nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False), 16 | nn.BatchNorm2d(out_channels), 17 | nn.ReLU(inplace=True), 18 | nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False), 19 | nn.BatchNorm2d(out_channels), 20 | nn.ReLU(inplace=True) 21 | ) 22 | 23 | def forward(self, x1, x2): 24 | x1 = self.up(x1) 25 | x1 = torch.cat([x2, x1], dim=1) #相当与通道维度上连接,以弥补因为使用mb导致的卷积信息丢失。 26 | return self.conv(x1) 27 | 28 | @SEG_ENCODER.register_module() 29 | class SegEncode(nn.Module): 30 | def __init__(self, inC, outC, size): 31 | super(SegEncode, self).__init__() 32 | trunk = resnet18(pretrained=False, zero_init_residual=True) 33 | self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3, bias=False) 34 | self.bn1 = trunk.bn1 35 | self.relu = trunk.relu 36 | self.up_sampler = nn.Upsample(size=size, mode='bilinear', align_corners=True) 37 | self.layer1 = trunk.layer1 38 | self.layer2 = trunk.layer2 39 | self.layer3 = trunk.layer3 40 | 41 | self.up1 = Up(64 + 256, 256, scale_factor=4) 42 | self.up2 = nn.Sequential( 43 | nn.Upsample(scale_factor=2, mode='bilinear', 44 | align_corners=True), 45 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False), 46 | nn.BatchNorm2d(128), 47 | nn.ReLU(inplace=True), 48 | nn.Conv2d(128, outC, kernel_size=1, padding=0), 49 | ) 50 | 51 | def forward(self, x): #torch.Size([2, 256, 200, 400]) 52 | x = self.up_sampler(x) 53 | x = self.conv1(x) #torch.Size([2, 64, 200, 400]) 54 | x = self.bn1(x) 55 | x = self.relu(x) 56 | 57 | x1 = self.layer1(x) #torch.Size([2, 64, 100, 200]) 58 | x = self.layer2(x1) #torch.Size([2, 128, 50, 100]) 59 | x2 = self.layer3(x) #torch.Size([2, 256, 25, 50]) 60 | 61 | x = self.up1(x2, x1) #torch.Size([2, 256, 100, 200]) 62 | x = self.up2(x) #torch.Size([2, 4, 200, 400]) 语义分割预测特征图 63 | 64 | return x 65 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .epoch_based_runner import EpochBasedRunner_video -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # --------------------------------------------- 3 | # Modified by Zhiqi Li 4 | # --------------------------------------------- 5 | 6 | import os.path as osp 7 | import torch 8 | import mmcv 9 | from mmcv.runner.base_runner import BaseRunner 10 | from mmcv.runner.epoch_based_runner import EpochBasedRunner 11 | from mmcv.runner.builder import RUNNERS 12 | from mmcv.runner.checkpoint import save_checkpoint 13 | from mmcv.runner.utils import get_host_info 14 | from pprint import pprint 15 | from mmcv.parallel.data_container import DataContainer 16 | 17 | 18 | @RUNNERS.register_module() 19 | class EpochBasedRunner_video(EpochBasedRunner): 20 | 21 | ''' 22 | # basic logic 23 | 24 | input_sequence = [a, b, c] # given a sequence of samples 25 | 26 | prev_bev = None 27 | for each in input_sequcene[:-1] 28 | prev_bev = eval_model(each, prev_bev)) # inference only. 29 | 30 | model(input_sequcene[-1], prev_bev) # train the last sample. 31 | ''' 32 | 33 | def __init__(self, 34 | model, 35 | eval_model=None, 36 | batch_processor=None, 37 | optimizer=None, 38 | work_dir=None, 39 | logger=None, 40 | meta=None, 41 | keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'], 42 | max_iters=None, 43 | max_epochs=None): 44 | super().__init__(model, 45 | batch_processor, 46 | optimizer, 47 | work_dir, 48 | logger, 49 | meta, 50 | max_iters, 51 | max_epochs) 52 | keys.append('img_metas') 53 | self.keys = keys 54 | self.eval_model = eval_model 55 | self.eval_model.eval() 56 | 57 | def run_iter(self, data_batch, train_mode, **kwargs): 58 | if self.batch_processor is not None: 59 | assert False 60 | # outputs = self.batch_processor( 61 | # self.model, data_batch, train_mode=train_mode, **kwargs) 62 | elif train_mode: 63 | 64 | num_samples = data_batch['img'].data[0].size(1) 65 | data_list = [] 66 | prev_bev = None 67 | for i in range(num_samples): 68 | data = {} 69 | for key in self.keys: 70 | if key not in ['img_metas', 'img', 'points']: 71 | data[key] = data_batch[key] 72 | else: 73 | if key == 'img': 74 | data['img'] = DataContainer(data=[data_batch['img'].data[0][:, i]], cpu_only=data_batch['img'].cpu_only, stack=True) 75 | elif key == 'img_metas': 76 | data['img_metas'] = DataContainer(data=[[each[i] for each in data_batch['img_metas'].data[0]]], cpu_only=data_batch['img_metas'].cpu_only) 77 | else: 78 | assert False 79 | data_list.append(data) 80 | with torch.no_grad(): 81 | for i in range(num_samples-1): 82 | if data_list[i]['img_metas'].data[0][0]['prev_bev_exists']: 83 | data_list[i]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False) 84 | prev_bev = self.eval_model.val_step(data_list[i], self.optimizer, **kwargs) 85 | if data_list[-1]['img_metas'].data[0][0]['prev_bev_exists']: 86 | data_list[-1]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False) 87 | outputs = self.model.train_step(data_list[-1], self.optimizer, **kwargs) 88 | else: 89 | assert False 90 | # outputs = self.model.val_step(data_batch, self.optimizer, **kwargs) 91 | 92 | if not isinstance(outputs, dict): 93 | raise TypeError('"batch_processor()" or "model.train_step()"' 94 | 'and "model.val_step()" must return a dict') 95 | if 'log_vars' in outputs: 96 | self.log_buffer.update(outputs['log_vars'], outputs['num_samples']) 97 | self.outputs = outputs -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/bevformer/runner/loss/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/projects/mmdet3d_plugin/bevformer/runner/loss/__init__.py -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .hungarian_assigner_3d import HungarianAssigner3D 2 | 3 | __all__ = ['HungarianAssigner3D'] 4 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_free_coder import NMSFreeCoder 2 | 3 | __all__ = ['NMSFreeCoder'] 4 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.core.bbox import BaseBBoxCoder 4 | from mmdet.core.bbox.builder import BBOX_CODERS 5 | from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox 6 | import numpy as np 7 | 8 | 9 | @BBOX_CODERS.register_module() 10 | class NMSFreeCoder(BaseBBoxCoder): 11 | """Bbox coder for NMS-free detector. 12 | Args: 13 | pc_range (list[float]): Range of point cloud. 14 | post_center_range (list[float]): Limit of the center. 15 | Default: None. 16 | max_num (int): Max number to be kept. Default: 100. 17 | score_threshold (float): Threshold to filter boxes based on score. 18 | Default: None. 19 | code_size (int): Code size of bboxes. Default: 9 20 | """ 21 | 22 | def __init__(self, 23 | pc_range, 24 | voxel_size=None, 25 | post_center_range=None, 26 | max_num=100, 27 | score_threshold=None, 28 | num_classes=10): 29 | self.pc_range = pc_range 30 | self.voxel_size = voxel_size 31 | self.post_center_range = post_center_range 32 | self.max_num = max_num 33 | self.score_threshold = score_threshold 34 | self.num_classes = num_classes 35 | 36 | def encode(self): 37 | 38 | pass 39 | 40 | def decode_single(self, cls_scores, bbox_preds): 41 | """Decode bboxes. 42 | Args: 43 | cls_scores (Tensor): Outputs from the classification head, \ 44 | shape [num_query, cls_out_channels]. Note \ 45 | cls_out_channels should includes background. 46 | bbox_preds (Tensor): Outputs from the regression \ 47 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 48 | Shape [num_query, 9]. 49 | Returns: 50 | list[dict]: Decoded boxes. 51 | """ 52 | max_num = self.max_num 53 | 54 | cls_scores = cls_scores.sigmoid() 55 | scores, indexs = cls_scores.view(-1).topk(max_num) 56 | labels = indexs % self.num_classes 57 | bbox_index = indexs // self.num_classes 58 | bbox_preds = bbox_preds[bbox_index] 59 | 60 | final_box_preds = denormalize_bbox(bbox_preds, self.pc_range) 61 | final_scores = scores 62 | final_preds = labels 63 | 64 | # use score threshold 65 | if self.score_threshold is not None: 66 | thresh_mask = final_scores > self.score_threshold 67 | tmp_score = self.score_threshold 68 | while thresh_mask.sum() == 0: 69 | tmp_score *= 0.9 70 | if tmp_score < 0.01: 71 | thresh_mask = final_scores > -1 72 | break 73 | thresh_mask = final_scores >= tmp_score 74 | 75 | if self.post_center_range is not None: 76 | self.post_center_range = torch.tensor( 77 | self.post_center_range, device=scores.device) 78 | mask = (final_box_preds[..., :3] >= 79 | self.post_center_range[:3]).all(1) 80 | mask &= (final_box_preds[..., :3] <= 81 | self.post_center_range[3:]).all(1) 82 | 83 | if self.score_threshold: 84 | mask &= thresh_mask 85 | 86 | boxes3d = final_box_preds[mask] 87 | scores = final_scores[mask] 88 | 89 | labels = final_preds[mask] 90 | predictions_dict = { 91 | 'bboxes': boxes3d, 92 | 'scores': scores, 93 | 'labels': labels 94 | } 95 | 96 | else: 97 | raise NotImplementedError( 98 | 'Need to reorganize output as a batch, only ' 99 | 'support post_center_range is not None for now!') 100 | return predictions_dict 101 | 102 | def decode(self, preds_dicts): 103 | """Decode bboxes. 104 | Args: 105 | all_cls_scores (Tensor): Outputs from the classification head, \ 106 | shape [nb_dec, bs, num_query, cls_out_channels]. Note \ 107 | cls_out_channels should includes background. 108 | all_bbox_preds (Tensor): Sigmoid outputs from the regression \ 109 | head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \ 110 | Shape [nb_dec, bs, num_query, 9]. 111 | Returns: 112 | list[dict]: Decoded boxes. 113 | """ 114 | all_cls_scores = preds_dicts['all_cls_scores'][-1] 115 | all_bbox_preds = preds_dicts['all_bbox_preds'][-1] 116 | 117 | batch_size = all_cls_scores.size()[0] 118 | predictions_list = [] 119 | for i in range(batch_size): 120 | predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i])) 121 | return predictions_list 122 | 123 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from mmdet.core.bbox.match_costs import build_match_cost 2 | from .match_cost import BBox3DL1Cost 3 | 4 | __all__ = ['build_match_cost', 'BBox3DL1Cost'] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-38.pyc -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST 3 | 4 | 5 | @MATCH_COST.register_module() 6 | class BBox3DL1Cost(object): 7 | """BBox3DL1Cost. 8 | Args: 9 | weight (int | float, optional): loss_weight 10 | """ 11 | 12 | def __init__(self, weight=1.): 13 | self.weight = weight 14 | 15 | def __call__(self, bbox_pred, gt_bboxes): 16 | """ 17 | Args: 18 | bbox_pred (Tensor): Predicted boxes with normalized coordinates 19 | (cx, cy, w, h), which are all in range [0, 1]. Shape 20 | [num_query, 4]. 21 | gt_bboxes (Tensor): Ground truth boxes with normalized 22 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4]. 23 | Returns: 24 | torch.Tensor: bbox_cost value with weight 25 | """ 26 | bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1) 27 | return bbox_cost * self.weight -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/bbox/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def normalize_bbox(bboxes, pc_range): 5 | 6 | cx = bboxes[..., 0:1] 7 | cy = bboxes[..., 1:2] 8 | cz = bboxes[..., 2:3] 9 | w = bboxes[..., 3:4].log() 10 | l = bboxes[..., 4:5].log() 11 | h = bboxes[..., 5:6].log() 12 | 13 | rot = bboxes[..., 6:7] 14 | if bboxes.size(-1) > 7: 15 | vx = bboxes[..., 7:8] 16 | vy = bboxes[..., 8:9] 17 | normalized_bboxes = torch.cat( 18 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1 19 | ) 20 | else: 21 | normalized_bboxes = torch.cat( 22 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1 23 | ) 24 | return normalized_bboxes 25 | 26 | def denormalize_bbox(normalized_bboxes, pc_range): 27 | # rotation 28 | rot_sine = normalized_bboxes[..., 6:7] 29 | 30 | rot_cosine = normalized_bboxes[..., 7:8] 31 | rot = torch.atan2(rot_sine, rot_cosine) 32 | 33 | # center in the bev 34 | cx = normalized_bboxes[..., 0:1] 35 | cy = normalized_bboxes[..., 1:2] 36 | cz = normalized_bboxes[..., 4:5] 37 | 38 | # size 39 | w = normalized_bboxes[..., 2:3] 40 | l = normalized_bboxes[..., 3:4] 41 | h = normalized_bboxes[..., 5:6] 42 | 43 | w = w.exp() 44 | l = l.exp() 45 | h = h.exp() 46 | if normalized_bboxes.size(-1) > 8: 47 | # velocity 48 | vx = normalized_bboxes[:, 8:9] 49 | vy = normalized_bboxes[:, 9:10] 50 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1) 51 | else: 52 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1) 53 | return denormalized_bboxes -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval_hooks import CustomDistEvalHook -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/core/evaluation/eval_hooks.py: -------------------------------------------------------------------------------- 1 | 2 | # Note: Considering that MMCV's EvalHook updated its interface in V1.3.16, 3 | # in order to avoid strong version dependency, we did not directly 4 | # inherit EvalHook but BaseDistEvalHook. 5 | 6 | import bisect 7 | import os.path as osp 8 | 9 | import mmcv 10 | import torch.distributed as dist 11 | from mmcv.runner import DistEvalHook as BaseDistEvalHook 12 | from mmcv.runner import EvalHook as BaseEvalHook 13 | from torch.nn.modules.batchnorm import _BatchNorm 14 | from mmdet.core.evaluation.eval_hooks import DistEvalHook 15 | 16 | 17 | def _calc_dynamic_intervals(start_interval, dynamic_interval_list): 18 | assert mmcv.is_list_of(dynamic_interval_list, tuple) 19 | 20 | dynamic_milestones = [0] 21 | dynamic_milestones.extend( 22 | [dynamic_interval[0] for dynamic_interval in dynamic_interval_list]) 23 | dynamic_intervals = [start_interval] 24 | dynamic_intervals.extend( 25 | [dynamic_interval[1] for dynamic_interval in dynamic_interval_list]) 26 | return dynamic_milestones, dynamic_intervals 27 | 28 | 29 | class CustomDistEvalHook(BaseDistEvalHook): 30 | 31 | def __init__(self, *args, dynamic_intervals=None, **kwargs): 32 | super(CustomDistEvalHook, self).__init__(*args, **kwargs) 33 | self.use_dynamic_intervals = dynamic_intervals is not None 34 | if self.use_dynamic_intervals: 35 | self.dynamic_milestones, self.dynamic_intervals = \ 36 | _calc_dynamic_intervals(self.interval, dynamic_intervals) 37 | 38 | def _decide_interval(self, runner): 39 | if self.use_dynamic_intervals: 40 | progress = runner.epoch if self.by_epoch else runner.iter 41 | step = bisect.bisect(self.dynamic_milestones, (progress + 1)) 42 | # Dynamically modify the evaluation interval 43 | self.interval = self.dynamic_intervals[step - 1] 44 | 45 | def before_train_epoch(self, runner): 46 | """Evaluate the model only at the start of training by epoch.""" 47 | self._decide_interval(runner) 48 | super().before_train_epoch(runner) 49 | 50 | def before_train_iter(self, runner): 51 | self._decide_interval(runner) 52 | super().before_train_iter(runner) 53 | 54 | def _do_evaluate(self, runner): 55 | """perform evaluation and save ckpt.""" 56 | # Synchronization of BatchNorm's buffer (running_mean 57 | # and running_var) is not supported in the DDP of pytorch, 58 | # which may cause the inconsistent performance of models in 59 | # different ranks, so we broadcast BatchNorm's buffers 60 | # of rank 0 to other ranks to avoid this. 61 | if self.broadcast_bn_buffer: 62 | model = runner.model 63 | for name, module in model.named_modules(): 64 | if isinstance(module, 65 | _BatchNorm) and module.track_running_stats: 66 | dist.broadcast(module.running_var, 0) 67 | dist.broadcast(module.running_mean, 0) 68 | 69 | if not self._should_evaluate(runner): 70 | return 71 | 72 | tmpdir = self.tmpdir 73 | if tmpdir is None: 74 | tmpdir = osp.join(runner.work_dir, '.eval_hook') 75 | 76 | from projects.mmdet3d_plugin.apis.test import multi_gpu_test # to solve circlur import 77 | 78 | results = multi_gpu_test( 79 | runner.model, 80 | self.dataloader, 81 | tmpdir=tmpdir, 82 | gpu_collect=self.gpu_collect) 83 | if runner.rank == 0: 84 | if results == []: 85 | pass 86 | else: 87 | print('\n') 88 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) 89 | 90 | key_score = self.evaluate(runner, results) 91 | 92 | if self.save_best: 93 | self._save_ckpt(runner, key_score) 94 | 95 | class EvalHook(BaseEvalHook): 96 | 97 | def _do_evaluate(self, runner): 98 | """perform evaluation and save ckpt.""" 99 | if not self._should_evaluate(runner): 100 | return 101 | 102 | from projects.mmdet3d_plugin.apis.test import single_gpu_test 103 | results = single_gpu_test(runner.model, self.dataloader, show=False) 104 | if results == []: 105 | pass 106 | else: 107 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) 108 | key_score = self.evaluate(runner, results) 109 | if self.save_best: 110 | self._save_ckpt(runner, key_score) -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .nuscenes_dataset import CustomNuScenesDataset 2 | from .builder import custom_build_dataset 3 | 4 | __all__ = [ 5 | 'CustomNuScenesDataset' 6 | ] 7 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .transform_3d import ( 2 | PadMultiViewImage, NormalizeMultiviewImage, 3 | PhotoMetricDistortionMultiViewImage, CustomCollect3D, RandomScaleImageMultiViewImage) 4 | from .formating import CustomDefaultFormatBundle3D 5 | from .rasterize import RasterizeMapVectors 6 | from .loading import LoadMultiViewImageFromFiles_MTL, LoadAnnotations3D_MTL 7 | from .binimg import LSS_Segmentation 8 | from .bevsegmentation import BEVFusionSegmentation 9 | __all__ = [ 10 | 'PadMultiViewImage', 11 | 'NormalizeMultiviewImage', 12 | 'RasterizeMapVectors', 13 | 'PhotoMetricDistortionMultiViewImage', 14 | 'CustomDefaultFormatBundle3D', 15 | 'CustomCollect3D', 16 | 'RandomScaleImageMultiViewImage', 17 | 'LoadMultiViewImageFromFiles_MTL', 18 | 'LoadAnnotations3D_MTL', 19 | 'LSS_Segmentation', 20 | 'BEVFusionSegmentation' 21 | ] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/bevsegmentation.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Tuple 2 | 3 | import torch 4 | import numpy as np 5 | from nuscenes.map_expansion.map_api import NuScenesMap 6 | from nuscenes.map_expansion.map_api import locations as LOCATIONS 7 | from nuscenes.eval.common.utils import quaternion_yaw, Quaternion 8 | from mmdet.datasets.builder import PIPELINES 9 | 10 | 11 | @PIPELINES.register_module() 12 | class BEVFusionSegmentation: 13 | """ 14 | 获取BEVFusion论文中使用的分割标注 15 | 16 | """ 17 | def __init__( 18 | self, 19 | dataset_root, 20 | map_grid_conf, 21 | classes=('drivable_area', 'ped_crossing', 22 | 'walkway', 'stop_line', 'carpark_area', 23 | 'divider'), 24 | ): 25 | super().__init__() 26 | xbound = map_grid_conf['xbound'] 27 | ybound = map_grid_conf['ybound'] 28 | patch_h = ybound[1] - ybound[0] 29 | patch_w = xbound[1] - xbound[0] 30 | canvas_h = int(patch_h / ybound[2]) 31 | canvas_w = int(patch_w / xbound[2]) 32 | self.patch_size = (patch_h, patch_w) 33 | self.canvas_size = (canvas_h, canvas_w) 34 | self.classes = classes 35 | 36 | self.maps = {} 37 | for location in LOCATIONS: 38 | self.maps[location] = NuScenesMap(dataset_root, location) 39 | 40 | def show_seg(self, labels): 41 | 42 | mask_colors = [ 43 | np.random.randint(0, 256, (1, 3), dtype=np.uint8) 44 | for _ in range(len(self.classes) + 1) 45 | ] 46 | img = np.zeros((200, 400, 3)) 47 | 48 | for index, mask_ in enumerate(labels): 49 | color_mask = mask_colors[index] 50 | mask_ = mask_.astype(bool) 51 | img[mask_] = color_mask 52 | 53 | return img 54 | 55 | def __call__(self, results): 56 | 57 | location, ego2global_translation, ego2global_rotation = \ 58 | results['location'], results['ego2global_translation'], results['ego2global_rotation'] 59 | map_pose = ego2global_translation[:2] 60 | rotation = Quaternion(ego2global_rotation) 61 | 62 | patch_box = (map_pose[0], map_pose[1], self.patch_size[0], self.patch_size[1]) 63 | patch_angle = quaternion_yaw(rotation) / np.pi * 180 64 | 65 | mappings = {} 66 | for name in self.classes: 67 | if name == "drivable_area*": 68 | mappings[name] = ["road_segment", "lane"] 69 | elif name == "divider": 70 | mappings[name] = ["road_divider", "lane_divider"] 71 | else: 72 | mappings[name] = [name] 73 | 74 | layer_names = [] 75 | for name in mappings: 76 | layer_names.extend(mappings[name]) 77 | layer_names = list(set(layer_names)) 78 | 79 | masks = self.maps[location].get_map_mask( 80 | patch_box=patch_box, 81 | patch_angle=patch_angle, 82 | layer_names=layer_names, 83 | canvas_size=self.canvas_size, 84 | ) 85 | # masks = masks[:, ::-1, :].copy() 86 | # masks = masks.transpose(0, 2, 1) 87 | masks = masks.astype(np.bool) 88 | 89 | num_classes = len(self.classes) 90 | labels = np.zeros((num_classes, *self.canvas_size), dtype=np.long) 91 | for k, name in enumerate(self.classes): 92 | for layer_name in mappings[name]: 93 | index = layer_names.index(layer_name) 94 | labels[k, masks[index]] = 1 95 | 96 | semantic_masks = labels # 这里已经进行了one_hot编码了 97 | num_cls = semantic_masks.shape[0] 98 | indices = np.arange(1, num_cls + 1).reshape(-1, 1, 1) 99 | semantic_indices = np.sum(semantic_masks * indices, axis=0) 100 | semantic_indices = np.where(semantic_indices > 5, 6, semantic_indices) 101 | 102 | results.update({ 103 | 'semantic_map': torch.from_numpy(semantic_masks), 104 | 'semantic_indices': torch.from_numpy(semantic_indices).long(), 105 | }) 106 | return results -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/binimg.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | 4 | import numpy as np 5 | 6 | from nuscenes.utils.data_classes import Box 7 | from pyquaternion import Quaternion 8 | from mmdet.datasets.builder import PIPELINES 9 | 10 | 11 | import warnings 12 | warnings.filterwarnings('ignore') 13 | 14 | 15 | @PIPELINES.register_module() 16 | class LSS_Segmentation(object): 17 | """ 18 | 获取LSS论文中使用的分割标注 19 | 20 | """ 21 | 22 | def __init__(self, 23 | map_grid_conf=None 24 | ): 25 | 26 | 27 | dx, bx, nx = self.gen_dx_bx(map_grid_conf['xbound'], map_grid_conf['ybound'], map_grid_conf['zbound']) 28 | self.dx, self.bx, self.nx = dx.numpy(), bx.numpy(), nx.numpy() 29 | 30 | @staticmethod 31 | def gen_dx_bx(xbound, ybound, zbound): 32 | dx = torch.Tensor([row[2] for row in [xbound, ybound, zbound]]) 33 | bx = torch.Tensor([row[0] + row[2] / 2.0 for row in [xbound, ybound, zbound]]) 34 | nx = torch.LongTensor([(row[1] - row[0]) / row[2] for row in [xbound, ybound, zbound]]) 35 | 36 | return dx, bx, nx 37 | 38 | def __call__(self, results): 39 | egopose = results['egopose'] 40 | trans = -np.array(egopose['translation']) 41 | rot = Quaternion(egopose['rotation']).inverse 42 | bin_img = np.zeros((self.nx[0], self.nx[1])) 43 | 44 | inst_ = results['inst'] 45 | for inst in inst_: 46 | # add category for lyft 47 | if not inst['category_name'].split('.')[0] == 'vehicle': 48 | continue 49 | box = Box(inst['translation'], inst['size'], Quaternion(inst['rotation'])) 50 | box.translate(trans) 51 | box.rotate(rot) 52 | 53 | pts = box.bottom_corners()[:2].T 54 | pts = np.round( 55 | (pts - self.bx[:2] + self.dx[:2] / 2.) / self.dx[:2] 56 | ).astype(np.int32) 57 | pts[:, [1, 0]] = pts[:, [0, 1]] 58 | cv2.fillPoly(bin_img, [pts], 1.0) 59 | 60 | 61 | results.update({ 62 | 'semantic_indices': torch.from_numpy(bin_img) 63 | }) 64 | 65 | return results 66 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/formating.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | import numpy as np 4 | from mmcv.parallel import DataContainer as DC 5 | 6 | from mmdet3d.core.bbox import BaseInstance3DBoxes 7 | from mmdet3d.core.points import BasePoints 8 | from mmdet.datasets.builder import PIPELINES 9 | from mmdet.datasets.pipelines import to_tensor 10 | from mmdet3d.datasets.pipelines import DefaultFormatBundle3D 11 | 12 | @PIPELINES.register_module() 13 | class CustomDefaultFormatBundle3D(DefaultFormatBundle3D): 14 | """Default formatting bundle. 15 | It simplifies the pipeline of formatting common fields for voxels, 16 | including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and 17 | "gt_semantic_seg". 18 | These fields are formatted as follows. 19 | - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) 20 | - proposals: (1)to tensor, (2)to DataContainer 21 | - gt_bboxes: (1)to tensor, (2)to DataContainer 22 | - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer 23 | - gt_labels: (1)to tensor, (2)to DataContainer 24 | """ 25 | 26 | def __call__(self, results): 27 | """Call function to transform and format common fields in results. 28 | Args: 29 | results (dict): Result dict contains the data to convert. 30 | Returns: 31 | dict: The result dict contains the data that is formatted with 32 | default bundle. 33 | """ 34 | # Format 3D data 35 | results = super(CustomDefaultFormatBundle3D, self).__call__(results) 36 | results['gt_map_masks'] = DC( 37 | to_tensor(results['gt_map_masks']), stack=True) 38 | 39 | return results -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/pipelines/rasterize.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | from mmdet.datasets.builder import PIPELINES 5 | from ..utils import preprocess_map 6 | 7 | import warnings 8 | warnings.filterwarnings('ignore') 9 | 10 | 11 | @PIPELINES.register_module() 12 | class RasterizeMapVectors(object): 13 | """Load multi channel images from a list of separate channel files. 14 | 15 | Expects results['img_filename'] to be a list of filenames. 16 | 17 | Args: 18 | to_float32 (bool): Whether to convert the img to float32. 19 | Defaults to False. 20 | color_type (str): Color type of the file. Defaults to 'unchanged'. 21 | """ 22 | 23 | def __init__(self, 24 | map_grid_conf=None, 25 | map_max_channel=3, 26 | map_thickness=5, 27 | map_angle_class=36 28 | ): 29 | 30 | self.map_max_channel = map_max_channel 31 | self.map_thickness = map_thickness 32 | self.map_angle_class = map_angle_class 33 | 34 | map_xbound, map_ybound = map_grid_conf['xbound'], map_grid_conf['ybound'] 35 | 36 | # patch_size: 在 y, x 方向上的坐标 range 37 | patch_h = map_ybound[1] - map_ybound[0] 38 | patch_w = map_xbound[1] - map_xbound[0] 39 | 40 | # canvas_size: 在 y, x 方向上的 bev 尺寸 41 | canvas_h = int(patch_h / map_ybound[2]) 42 | canvas_w = int(patch_w / map_xbound[2]) 43 | 44 | self.map_patch_size = (patch_h, patch_w) 45 | self.map_canvas_size = (canvas_h, canvas_w) 46 | 47 | def __call__(self, results): 48 | vectors = results['vectors'] 49 | for vector in vectors: 50 | vector['pts'] = vector['pts'][:, :2] 51 | 52 | semantic_masks, instance_masks, forward_masks, backward_masks = preprocess_map( 53 | vectors, self.map_patch_size, self.map_canvas_size, self.map_max_channel, self.map_thickness, self.map_angle_class) 54 | 55 | semantic_masks = semantic_masks.numpy() 56 | num_cls = semantic_masks.shape[0] 57 | indices = np.arange(1, num_cls + 1).reshape(-1, 1, 1) 58 | semantic_indices = np.sum(semantic_masks * indices, axis=0) 59 | 60 | results.update({ 61 | 'semantic_map': torch.from_numpy(semantic_masks), 62 | 'instance_map': torch.from_numpy(instance_masks), 63 | 'semantic_indices': torch.from_numpy(semantic_indices).long(), 64 | 'forward_direction': torch.from_numpy(forward_masks), 65 | 'backward_direction': torch.from_numpy(backward_masks), 66 | }) 67 | 68 | return results 69 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .group_sampler import DistributedGroupSampler 2 | from .distributed_sampler import DistributedSampler 3 | from .sampler import SAMPLER, build_sampler 4 | 5 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.utils.data import DistributedSampler as _DistributedSampler 5 | from .sampler import SAMPLER 6 | 7 | 8 | @SAMPLER.register_module() 9 | class DistributedSampler(_DistributedSampler): 10 | 11 | def __init__(self, 12 | dataset=None, 13 | num_replicas=None, 14 | rank=None, 15 | shuffle=True, 16 | seed=0): 17 | super().__init__( 18 | dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) 19 | # for the compatibility from PyTorch 1.3+ 20 | self.seed = seed if seed is not None else 0 21 | 22 | def __iter__(self): 23 | # deterministically shuffle based on epoch 24 | if self.shuffle: 25 | assert False 26 | else: 27 | indices = torch.arange(len(self.dataset)).tolist() 28 | 29 | # add extra samples to make it evenly divisible 30 | # in case that indices is shorter than half of total_size 31 | indices = (indices * 32 | math.ceil(self.total_size / len(indices)))[:self.total_size] 33 | assert len(indices) == self.total_size 34 | 35 | # subsample 36 | per_replicas = self.total_size//self.num_replicas 37 | # indices = indices[self.rank:self.total_size:self.num_replicas] 38 | indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas] 39 | assert len(indices) == self.num_samples 40 | 41 | return iter(indices) 42 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/group_sampler.py: -------------------------------------------------------------------------------- 1 | 2 | # Copyright (c) OpenMMLab. All rights reserved. 3 | import math 4 | 5 | import numpy as np 6 | import torch 7 | from mmcv.runner import get_dist_info 8 | from torch.utils.data import Sampler 9 | from .sampler import SAMPLER 10 | import random 11 | from IPython import embed 12 | 13 | 14 | @SAMPLER.register_module() 15 | class DistributedGroupSampler(Sampler): 16 | """Sampler that restricts data loading to a subset of the dataset. 17 | It is especially useful in conjunction with 18 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 19 | process can pass a DistributedSampler instance as a DataLoader sampler, 20 | and load a subset of the original dataset that is exclusive to it. 21 | .. note:: 22 | Dataset is assumed to be of constant size. 23 | Arguments: 24 | dataset: Dataset used for sampling. 25 | num_replicas (optional): Number of processes participating in 26 | distributed training. 27 | rank (optional): Rank of the current process within num_replicas. 28 | seed (int, optional): random seed used to shuffle the sampler if 29 | ``shuffle=True``. This number should be identical across all 30 | processes in the distributed group. Default: 0. 31 | """ 32 | 33 | def __init__(self, 34 | dataset, 35 | samples_per_gpu=1, 36 | num_replicas=None, 37 | rank=None, 38 | seed=0): 39 | _rank, _num_replicas = get_dist_info() 40 | if num_replicas is None: 41 | num_replicas = _num_replicas 42 | if rank is None: 43 | rank = _rank 44 | self.dataset = dataset 45 | self.samples_per_gpu = samples_per_gpu 46 | self.num_replicas = num_replicas 47 | self.rank = rank 48 | self.epoch = 0 49 | self.seed = seed if seed is not None else 0 50 | 51 | assert hasattr(self.dataset, 'flag') 52 | self.flag = self.dataset.flag 53 | self.group_sizes = np.bincount(self.flag) 54 | 55 | self.num_samples = 0 56 | for i, j in enumerate(self.group_sizes): 57 | self.num_samples += int( 58 | math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu / 59 | self.num_replicas)) * self.samples_per_gpu 60 | self.total_size = self.num_samples * self.num_replicas 61 | 62 | def __iter__(self): 63 | # deterministically shuffle based on epoch 64 | g = torch.Generator() 65 | g.manual_seed(self.epoch + self.seed) 66 | 67 | indices = [] 68 | for i, size in enumerate(self.group_sizes): 69 | if size > 0: 70 | indice = np.where(self.flag == i)[0] 71 | assert len(indice) == size 72 | # add .numpy() to avoid bug when selecting indice in parrots. 73 | # TODO: check whether torch.randperm() can be replaced by 74 | # numpy.random.permutation(). 75 | indice = indice[list( 76 | torch.randperm(int(size), generator=g).numpy())].tolist() 77 | extra = int( 78 | math.ceil( 79 | size * 1.0 / self.samples_per_gpu / self.num_replicas) 80 | ) * self.samples_per_gpu * self.num_replicas - len(indice) 81 | # pad indice 82 | tmp = indice.copy() 83 | for _ in range(extra // size): 84 | indice.extend(tmp) 85 | indice.extend(tmp[:extra % size]) 86 | indices.extend(indice) 87 | 88 | assert len(indices) == self.total_size 89 | 90 | indices = [ 91 | indices[j] for i in list( 92 | torch.randperm( 93 | len(indices) // self.samples_per_gpu, generator=g)) 94 | for j in range(i * self.samples_per_gpu, (i + 1) * 95 | self.samples_per_gpu) 96 | ] 97 | 98 | # subsample 99 | offset = self.num_samples * self.rank 100 | indices = indices[offset:offset + self.num_samples] 101 | assert len(indices) == self.num_samples 102 | 103 | return iter(indices) 104 | 105 | def __len__(self): 106 | return self.num_samples 107 | 108 | def set_epoch(self, epoch): 109 | self.epoch = epoch 110 | 111 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/samplers/sampler.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils.registry import Registry, build_from_cfg 2 | 3 | SAMPLER = Registry('sampler') 4 | 5 | 6 | def build_sampler(cfg, default_args): 7 | return build_from_cfg(cfg, SAMPLER, default_args) 8 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/datasets/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .vector_map import VectorizedLocalMap 2 | from .rasterize import preprocess_map 3 | from .warper import FeatureWarper -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .metrics import IntersectionOverUnion, PanopticMetric 2 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .vovnet import VoVNet 2 | 3 | __all__ = ['VoVNet'] -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/hooks/__init__.py: -------------------------------------------------------------------------------- 1 | from .hooks import GradChecker -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/hooks/hooks.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner.hooks.hook import HOOKS, Hook 2 | import os.path as osp 3 | import torch.distributed as dist 4 | from mmcv.runner import DistEvalHook as BaseDistEvalHook 5 | from mmcv.runner import EvalHook as BaseEvalHook 6 | from torch.nn.modules.batchnorm import _BatchNorm 7 | 8 | @HOOKS.register_module() 9 | class GradChecker(Hook): 10 | 11 | def after_train_iter(self, runner): 12 | for key, val in runner.model.named_parameters(): 13 | if val.grad == None and val.requires_grad: 14 | print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key)) 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/opt/__init__.py: -------------------------------------------------------------------------------- 1 | from .adamw import AdamW2 -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .bricks import run_time 3 | from .grid_mask import GridMask 4 | from .position_embedding import RelPositionEmbedding 5 | from .visual import save_tensor -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/bricks.py: -------------------------------------------------------------------------------- 1 | import functools 2 | import time 3 | from collections import defaultdict 4 | import torch 5 | time_maps = defaultdict(lambda :0.) 6 | count_maps = defaultdict(lambda :0.) 7 | def run_time(name): 8 | def middle(fn): 9 | def wrapper(*args, **kwargs): 10 | torch.cuda.synchronize() 11 | start = time.time() 12 | res = fn(*args, **kwargs) 13 | torch.cuda.synchronize() 14 | time_maps['%s : %s'%(name, fn.__name__) ] += time.time()-start 15 | count_maps['%s : %s'%(name, fn.__name__) ] +=1 16 | print("%s : %s takes up %f "% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] )) 17 | return res 18 | return wrapper 19 | return middle 20 | -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/grid_mask.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from PIL import Image 5 | from mmcv.runner import force_fp32, auto_fp16 6 | 7 | class Grid(object): 8 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 9 | self.use_h = use_h 10 | self.use_w = use_w 11 | self.rotate = rotate 12 | self.offset = offset 13 | self.ratio = ratio 14 | self.mode=mode 15 | self.st_prob = prob 16 | self.prob = prob 17 | 18 | def set_prob(self, epoch, max_epoch): 19 | self.prob = self.st_prob * epoch / max_epoch 20 | 21 | def __call__(self, img, label): 22 | if np.random.rand() > self.prob: 23 | return img, label 24 | h = img.size(1) 25 | w = img.size(2) 26 | self.d1 = 2 27 | self.d2 = min(h, w) 28 | hh = int(1.5*h) 29 | ww = int(1.5*w) 30 | d = np.random.randint(self.d1, self.d2) 31 | if self.ratio == 1: 32 | self.l = np.random.randint(1, d) 33 | else: 34 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 35 | mask = np.ones((hh, ww), np.float32) 36 | st_h = np.random.randint(d) 37 | st_w = np.random.randint(d) 38 | if self.use_h: 39 | for i in range(hh//d): 40 | s = d*i + st_h 41 | t = min(s+self.l, hh) 42 | mask[s:t,:] *= 0 43 | if self.use_w: 44 | for i in range(ww//d): 45 | s = d*i + st_w 46 | t = min(s+self.l, ww) 47 | mask[:,s:t] *= 0 48 | 49 | r = np.random.randint(self.rotate) 50 | mask = Image.fromarray(np.uint8(mask)) 51 | mask = mask.rotate(r) 52 | mask = np.asarray(mask) 53 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 54 | 55 | mask = torch.from_numpy(mask).float() 56 | if self.mode == 1: 57 | mask = 1-mask 58 | 59 | mask = mask.expand_as(img) 60 | if self.offset: 61 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float() 62 | offset = (1 - mask) * offset 63 | img = img * mask + offset 64 | else: 65 | img = img * mask 66 | 67 | return img, label 68 | 69 | 70 | class GridMask(nn.Module): 71 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.): 72 | super(GridMask, self).__init__() 73 | self.use_h = use_h 74 | self.use_w = use_w 75 | self.rotate = rotate 76 | self.offset = offset 77 | self.ratio = ratio 78 | self.mode = mode 79 | self.st_prob = prob 80 | self.prob = prob 81 | self.fp16_enable = False 82 | def set_prob(self, epoch, max_epoch): 83 | self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5 84 | @auto_fp16() 85 | def forward(self, x): 86 | if np.random.rand() > self.prob or not self.training: 87 | return x 88 | n,c,h,w = x.size() 89 | x = x.view(-1,h,w) 90 | hh = int(1.5*h) 91 | ww = int(1.5*w) 92 | d = np.random.randint(2, h) 93 | self.l = min(max(int(d*self.ratio+0.5),1),d-1) 94 | mask = np.ones((hh, ww), np.float32) 95 | st_h = np.random.randint(d) 96 | st_w = np.random.randint(d) 97 | if self.use_h: 98 | for i in range(hh//d): 99 | s = d*i + st_h 100 | t = min(s+self.l, hh) 101 | mask[s:t,:] *= 0 102 | if self.use_w: 103 | for i in range(ww//d): 104 | s = d*i + st_w 105 | t = min(s+self.l, ww) 106 | mask[:,s:t] *= 0 107 | 108 | r = np.random.randint(self.rotate) 109 | mask = Image.fromarray(np.uint8(mask)) 110 | mask = mask.rotate(r) 111 | mask = np.asarray(mask) 112 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w] 113 | 114 | mask = torch.from_numpy(mask).to(x.dtype).cuda() 115 | if self.mode == 1: 116 | mask = 1-mask 117 | mask = mask.expand_as(x) 118 | if self.offset: 119 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).to(x.dtype).cuda() 120 | x = x * mask + offset * (1 - mask) 121 | else: 122 | x = x * mask 123 | 124 | return x.view(n,c,h,w) -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/position_embedding.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | 5 | class RelPositionEmbedding(nn.Module): 6 | def __init__(self, num_pos_feats=64, pos_norm=True): 7 | super().__init__() 8 | self.num_pos_feats = num_pos_feats 9 | self.fc = nn.Linear(4, self.num_pos_feats,bias=False) 10 | #nn.init.orthogonal_(self.fc.weight) 11 | #self.fc.weight.requires_grad = False 12 | self.pos_norm = pos_norm 13 | if self.pos_norm: 14 | self.norm = nn.LayerNorm(self.num_pos_feats) 15 | def forward(self, tensor): 16 | #mask = nesttensor.mask 17 | B,C,H,W = tensor.shape 18 | #print('tensor.shape', tensor.shape) 19 | y_range = (torch.arange(H) / float(H - 1)).to(tensor.device) 20 | #y_axis = torch.stack((y_range, 1-y_range),dim=1) 21 | y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1) 22 | y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2) 23 | 24 | x_range = (torch.arange(W) / float(W - 1)).to(tensor.device) 25 | #x_axis =torch.stack((x_range,1-x_range),dim=1) 26 | x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1) 27 | x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2) 28 | x_pos = torch.cat((y_axis, x_axis), dim=1) 29 | x_pos = self.fc(x_pos) 30 | 31 | if self.pos_norm: 32 | x_pos = self.norm(x_pos) 33 | #print('xpos,', x_pos.max(),x_pos.min()) 34 | return x_pos -------------------------------------------------------------------------------- /projects/mmdet3d_plugin/models/utils/visual.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision.utils import make_grid 3 | import torchvision 4 | import matplotlib.pyplot as plt 5 | import cv2 6 | 7 | 8 | def convert_color(img_path): 9 | plt.figure() 10 | img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) 11 | plt.imsave(img_path, img, cmap=plt.get_cmap('viridis')) 12 | plt.close() 13 | 14 | 15 | def save_tensor(tensor, path, pad_value=254.0,): 16 | print('save_tensor', path) 17 | tensor = tensor.to(torch.float).detach().cpu() 18 | if tensor.type() == 'torch.BoolTensor': 19 | tensor = tensor*255 20 | if len(tensor.shape) == 3: 21 | tensor = tensor.unsqueeze(1) 22 | tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy() 23 | torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path) 24 | convert_color(path) 25 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torchmetrics==0.3.2 2 | prettytable 3 | shapely==1.8.5 -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/tools/__init__.py -------------------------------------------------------------------------------- /tools/analysis_tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Bin-ze/BEVFormer_segmentation_detection/8e2a1beee39946f393322a293f127a489b093377/tools/analysis_tools/__init__.py -------------------------------------------------------------------------------- /tools/analysis_tools/benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import time 4 | import torch 5 | from mmcv import Config 6 | from mmcv.parallel import MMDataParallel 7 | from mmcv.runner import load_checkpoint, wrap_fp16_model 8 | import sys 9 | sys.path.append('.') 10 | from projects.mmdet3d_plugin.datasets.builder import build_dataloader 11 | from projects.mmdet3d_plugin.datasets import custom_build_dataset 12 | # from mmdet3d.datasets import build_dataloader, build_dataset 13 | from mmdet3d.models import build_detector 14 | #from tools.misc.fuse_conv_bn import fuse_module 15 | 16 | 17 | def parse_args(): 18 | parser = argparse.ArgumentParser(description='MMDet benchmark a model') 19 | parser.add_argument('config', help='test config file path') 20 | parser.add_argument('--checkpoint', default=None, help='checkpoint file') 21 | parser.add_argument('--samples', default=2000, help='samples to benchmark') 22 | parser.add_argument( 23 | '--log-interval', default=50, help='interval of logging') 24 | parser.add_argument( 25 | '--fuse-conv-bn', 26 | action='store_true', 27 | help='Whether to fuse conv and bn, this will slightly increase' 28 | 'the inference speed') 29 | args = parser.parse_args() 30 | return args 31 | 32 | 33 | def main(): 34 | args = parse_args() 35 | 36 | cfg = Config.fromfile(args.config) 37 | # set cudnn_benchmark 38 | if cfg.get('cudnn_benchmark', False): 39 | torch.backends.cudnn.benchmark = True 40 | cfg.model.pretrained = None 41 | cfg.data.test.test_mode = True 42 | 43 | # build the dataloader 44 | # TODO: support multiple images per gpu (only minor changes are needed) 45 | print(cfg.data.test) 46 | dataset = custom_build_dataset(cfg.data.test) 47 | data_loader = build_dataloader( 48 | dataset, 49 | samples_per_gpu=1, 50 | workers_per_gpu=cfg.data.workers_per_gpu, 51 | dist=False, 52 | shuffle=False) 53 | 54 | # build the model and load checkpoint 55 | cfg.model.train_cfg = None 56 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg')) 57 | fp16_cfg = cfg.get('fp16', None) 58 | if fp16_cfg is not None: 59 | wrap_fp16_model(model) 60 | if args.checkpoint is not None: 61 | load_checkpoint(model, args.checkpoint, map_location='cpu') 62 | #if args.fuse_conv_bn: 63 | # model = fuse_module(model) 64 | 65 | model = MMDataParallel(model, device_ids=[0]) 66 | 67 | model.eval() 68 | 69 | # the first several iterations may be very slow so skip them 70 | num_warmup = 5 71 | pure_inf_time = 0 72 | 73 | # benchmark with several samples and take the average 74 | for i, data in enumerate(data_loader): 75 | torch.cuda.synchronize() 76 | start_time = time.perf_counter() 77 | with torch.no_grad(): 78 | model(return_loss=False, rescale=True, **data) 79 | 80 | torch.cuda.synchronize() 81 | elapsed = time.perf_counter() - start_time 82 | 83 | if i >= num_warmup: 84 | pure_inf_time += elapsed 85 | if (i + 1) % args.log_interval == 0: 86 | fps = (i + 1 - num_warmup) / pure_inf_time 87 | print(f'Done image [{i + 1:<3}/ {args.samples}], ' 88 | f'fps: {fps:.1f} img / s') 89 | 90 | if (i + 1) == args.samples: 91 | pure_inf_time += elapsed 92 | fps = (i + 1 - num_warmup) / pure_inf_time 93 | print(f'Overall fps: {fps:.1f} img / s') 94 | break 95 | 96 | 97 | if __name__ == '__main__': 98 | main() 99 | -------------------------------------------------------------------------------- /tools/analysis_tools/get_params.py: -------------------------------------------------------------------------------- 1 | import torch 2 | file_path = './ckpts/bevformer_v4.pth' 3 | model = torch.load(file_path, map_location='cpu') 4 | all = 0 5 | for key in list(model['state_dict'].keys()): 6 | all += model['state_dict'][key].nelement() 7 | print(all) 8 | 9 | # smaller 63374123 10 | # v4 69140395 11 | -------------------------------------------------------------------------------- /tools/data_converter/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | -------------------------------------------------------------------------------- /tools/data_converter/lyft_data_fixer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import numpy as np 4 | import os 5 | 6 | 7 | def fix_lyft(root_folder='./data/lyft', version='v1.01'): 8 | # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000 # noqa 9 | lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin' 10 | root_folder = os.path.join(root_folder, f'{version}-train') 11 | lidar_path = os.path.join(root_folder, lidar_path) 12 | assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \ 13 | f'dataset and make sure {lidar_path} is present.' 14 | points = np.fromfile(lidar_path, dtype=np.float32, count=-1) 15 | try: 16 | points.reshape([-1, 5]) 17 | print(f'This fix is not required for version {version}.') 18 | except ValueError: 19 | new_points = np.array(list(points) + [100.0, 1.0], dtype='float32') 20 | new_points.tofile(lidar_path) 21 | print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.') 22 | 23 | 24 | parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser') 25 | parser.add_argument( 26 | '--root-folder', 27 | type=str, 28 | default='./data/lyft', 29 | help='specify the root path of Lyft dataset') 30 | parser.add_argument( 31 | '--version', 32 | type=str, 33 | default='v1.01', 34 | help='specify Lyft dataset version') 35 | args = parser.parse_args() 36 | 37 | if __name__ == '__main__': 38 | fix_lyft(root_folder=args.root_folder, version=args.version) 39 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29503} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --eval bbox 11 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-28509} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic 10 | -------------------------------------------------------------------------------- /tools/fp16/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-28508} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic 10 | -------------------------------------------------------------------------------- /tools/misc/fuse_conv_bn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import torch 4 | from mmcv.runner import save_checkpoint 5 | from torch import nn as nn 6 | 7 | from mmdet.apis import init_model 8 | 9 | 10 | def fuse_conv_bn(conv, bn): 11 | """During inference, the functionary of batch norm layers is turned off but 12 | only the mean and var alone channels are used, which exposes the chance to 13 | fuse it with the preceding conv layers to save computations and simplify 14 | network structures.""" 15 | conv_w = conv.weight 16 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like( 17 | bn.running_mean) 18 | 19 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps) 20 | conv.weight = nn.Parameter(conv_w * 21 | factor.reshape([conv.out_channels, 1, 1, 1])) 22 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias) 23 | return conv 24 | 25 | 26 | def fuse_module(m): 27 | last_conv = None 28 | last_conv_name = None 29 | 30 | for name, child in m.named_children(): 31 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)): 32 | if last_conv is None: # only fuse BN that is after Conv 33 | continue 34 | fused_conv = fuse_conv_bn(last_conv, child) 35 | m._modules[last_conv_name] = fused_conv 36 | # To reduce changes, set BN as Identity instead of deleting it. 37 | m._modules[name] = nn.Identity() 38 | last_conv = None 39 | elif isinstance(child, nn.Conv2d): 40 | last_conv = child 41 | last_conv_name = name 42 | else: 43 | fuse_module(child) 44 | return m 45 | 46 | 47 | def parse_args(): 48 | parser = argparse.ArgumentParser( 49 | description='fuse Conv and BN layers in a model') 50 | parser.add_argument('config', help='config file path') 51 | parser.add_argument('checkpoint', help='checkpoint file path') 52 | parser.add_argument('out', help='output path of the converted model') 53 | args = parser.parse_args() 54 | return args 55 | 56 | 57 | def main(): 58 | args = parse_args() 59 | # build the model from a config file and a checkpoint file 60 | model = init_model(args.config, args.checkpoint) 61 | # fuse conv and bn layers of the model 62 | fused_model = fuse_module(model) 63 | save_checkpoint(fused_model, args.out) 64 | 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /tools/misc/print_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | from mmcv import Config, DictAction 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser(description='Print the whole config') 8 | parser.add_argument('config', help='config file path') 9 | parser.add_argument( 10 | '--options', nargs='+', action=DictAction, help='arguments in dict') 11 | args = parser.parse_args() 12 | 13 | return args 14 | 15 | 16 | def main(): 17 | args = parse_args() 18 | 19 | cfg = Config.fromfile(args.config) 20 | if args.options is not None: 21 | cfg.merge_from_dict(args.options) 22 | print(f'Config:\n{cfg.pretty_text}') 23 | 24 | 25 | if __name__ == '__main__': 26 | main() 27 | -------------------------------------------------------------------------------- /tools/misc/visualize_results.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import mmcv 4 | from mmcv import Config 5 | 6 | from mmdet3d.datasets import build_dataset 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser( 11 | description='MMDet3D visualize the results') 12 | parser.add_argument('config', help='test config file path') 13 | parser.add_argument('--result', help='results file in pickle format') 14 | parser.add_argument( 15 | '--show-dir', help='directory where visualize results will be saved') 16 | args = parser.parse_args() 17 | 18 | return args 19 | 20 | 21 | def main(): 22 | args = parse_args() 23 | 24 | if args.result is not None and \ 25 | not args.result.endswith(('.pkl', '.pickle')): 26 | raise ValueError('The results file must be a pkl file.') 27 | 28 | cfg = Config.fromfile(args.config) 29 | cfg.data.test.test_mode = True 30 | 31 | # build the dataset 32 | dataset = build_dataset(cfg.data.test) 33 | results = mmcv.load(args.result) 34 | 35 | if getattr(dataset, 'show', None) is not None: 36 | # data loading pipeline for showing 37 | eval_pipeline = cfg.get('eval_pipeline', {}) 38 | if eval_pipeline: 39 | dataset.show(results, args.show_dir, pipeline=eval_pipeline) 40 | else: 41 | dataset.show(results, args.show_dir) # use default pipeline 42 | else: 43 | raise NotImplementedError( 44 | 'Show is not implemented for dataset {}!'.format( 45 | type(dataset).__name__)) 46 | 47 | 48 | if __name__ == '__main__': 49 | main() 50 | -------------------------------------------------------------------------------- /tools/model_converters/publish_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import subprocess 4 | import torch 5 | 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser( 9 | description='Process a checkpoint to be published') 10 | parser.add_argument('in_file', help='input checkpoint filename') 11 | parser.add_argument('out_file', help='output checkpoint filename') 12 | args = parser.parse_args() 13 | return args 14 | 15 | 16 | def process_checkpoint(in_file, out_file): 17 | checkpoint = torch.load(in_file, map_location='cpu') 18 | # remove optimizer for smaller file size 19 | if 'optimizer' in checkpoint: 20 | del checkpoint['optimizer'] 21 | # if it is necessary to remove some sensitive data in checkpoint['meta'], 22 | # add the code here. 23 | torch.save(checkpoint, out_file) 24 | sha = subprocess.check_output(['sha256sum', out_file]).decode() 25 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) 26 | subprocess.Popen(['mv', out_file, final_file]) 27 | 28 | 29 | def main(): 30 | args = parse_args() 31 | process_checkpoint(args.in_file, args.out_file) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /tools/model_converters/regnet2mmdet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import argparse 3 | import torch 4 | from collections import OrderedDict 5 | 6 | 7 | def convert_stem(model_key, model_weight, state_dict, converted_names): 8 | new_key = model_key.replace('stem.conv', 'conv1') 9 | new_key = new_key.replace('stem.bn', 'bn1') 10 | state_dict[new_key] = model_weight 11 | converted_names.add(model_key) 12 | print(f'Convert {model_key} to {new_key}') 13 | 14 | 15 | def convert_head(model_key, model_weight, state_dict, converted_names): 16 | new_key = model_key.replace('head.fc', 'fc') 17 | state_dict[new_key] = model_weight 18 | converted_names.add(model_key) 19 | print(f'Convert {model_key} to {new_key}') 20 | 21 | 22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names): 23 | split_keys = model_key.split('.') 24 | layer, block, module = split_keys[:3] 25 | block_id = int(block[1:]) 26 | layer_name = f'layer{int(layer[1:])}' 27 | block_name = f'{block_id - 1}' 28 | 29 | if block_id == 1 and module == 'bn': 30 | new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}' 31 | elif block_id == 1 and module == 'proj': 32 | new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}' 33 | elif module == 'f': 34 | if split_keys[3] == 'a_bn': 35 | module_name = 'bn1' 36 | elif split_keys[3] == 'b_bn': 37 | module_name = 'bn2' 38 | elif split_keys[3] == 'c_bn': 39 | module_name = 'bn3' 40 | elif split_keys[3] == 'a': 41 | module_name = 'conv1' 42 | elif split_keys[3] == 'b': 43 | module_name = 'conv2' 44 | elif split_keys[3] == 'c': 45 | module_name = 'conv3' 46 | new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}' 47 | else: 48 | raise ValueError(f'Unsupported conversion of key {model_key}') 49 | print(f'Convert {model_key} to {new_key}') 50 | state_dict[new_key] = model_weight 51 | converted_names.add(model_key) 52 | 53 | 54 | def convert(src, dst): 55 | """Convert keys in pycls pretrained RegNet models to mmdet style.""" 56 | # load caffe model 57 | regnet_model = torch.load(src) 58 | blobs = regnet_model['model_state'] 59 | # convert to pytorch style 60 | state_dict = OrderedDict() 61 | converted_names = set() 62 | for key, weight in blobs.items(): 63 | if 'stem' in key: 64 | convert_stem(key, weight, state_dict, converted_names) 65 | elif 'head' in key: 66 | convert_head(key, weight, state_dict, converted_names) 67 | elif key.startswith('s'): 68 | convert_reslayer(key, weight, state_dict, converted_names) 69 | 70 | # check if all layers are converted 71 | for key in blobs: 72 | if key not in converted_names: 73 | print(f'not converted: {key}') 74 | # save checkpoint 75 | checkpoint = dict() 76 | checkpoint['state_dict'] = state_dict 77 | torch.save(checkpoint, dst) 78 | 79 | 80 | def main(): 81 | parser = argparse.ArgumentParser(description='Convert model keys') 82 | parser.add_argument('src', help='src detectron model path') 83 | parser.add_argument('dst', help='save path') 84 | args = parser.parse_args() 85 | convert(args.src, args.dst) 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /utils/compose_map_det_res.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | 5 | path = '/root/work_code/BEVFormer/visual_res_base/' 6 | 7 | visual_list = list(filter(lambda x: "camera" in x, os.listdir(path))) 8 | 9 | for img in visual_list: 10 | det_img = path + img 11 | seg_img = path + img.split('_')[0] + '.png' 12 | 13 | det_img = cv2.imread(det_img) 14 | seg_img = cv2.imread(seg_img) 15 | seg_img = np.rot90(seg_img, 1, [0, 1]) 16 | 17 | scale = det_img.shape[0] / seg_img.shape[0] 18 | width, height = int(scale * seg_img.shape[1]), det_img.shape[0] 19 | 20 | seg_img = cv2.resize(seg_img, (width, height), interpolation=cv2.INTER_LINEAR) 21 | 22 | # 拼接 23 | cancat_img = np.hstack((det_img, seg_img)) 24 | cancat_img = cv2.resize(cancat_img, (cancat_img.shape[1]//2, cancat_img.shape[0]//2), interpolation=cv2.INTER_LINEAR) 25 | 26 | cv2.imwrite(f"visual/{img.split('_')[0]}.png", cancat_img) 27 | 28 | -------------------------------------------------------------------------------- /utils/compose_small_base_vidio.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | import mmcv 5 | import numpy as np 6 | 7 | from nuscenes.nuscenes import NuScenes 8 | 9 | PATH_base = '/home/guozebin/work_code/BEVFormer/visual_res_base/' 10 | PATH_small = '/home/guozebin/work_code/BEVFormer/visual_res_small/' 11 | video_path = 'small_base_compare_demo.mp4' 12 | 13 | if __name__ == '__main__': 14 | count = 0 15 | nusc = NuScenes(version='v1.0-trainval', dataroot='/home/guozebin/work_code/BEVFormer/data/nuscenes', verbose=True) 16 | bevformer_results = mmcv.load('/home/guozebin/work_code/BEVFormer/val/work_dirs/' 17 | 'bevformer_small_seg_det_300x300/Tue_Jan_31_16_25_12_2023/pts_bbox/results_nusc.json') 18 | sample_token_list = list(bevformer_results['results'].keys())[1000:2000] 19 | for id in range(0, 1000): 20 | 21 | if sample_token_list[id] + '.jpg' not in os.listdir(PATH_base): 22 | continue 23 | 24 | print(f"handle {PATH_base + sample_token_list[id] +'.jpg'}") 25 | print(f"handle {PATH_small + sample_token_list[id] +'.jpg'}") 26 | count += 1 27 | im_base = os.path.join(PATH_base, sample_token_list[id] + '.jpg') 28 | im_small = os.path.join(PATH_small, sample_token_list[id] + '.jpg') 29 | 30 | im_base = cv2.imread(im_base) 31 | # 添加指标值 32 | im_base = cv2.putText(im_base, "mAP: 41.9, NDS: 51.3, mIoU: 44.1", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2) 33 | 34 | im_small = cv2.imread(im_small) 35 | # 添加指标值 36 | im_small = cv2.putText(im_small, "mAP: 38.2, NDS: 48.7, mIoU: 40.4", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2) 37 | 38 | # 添加一个黑色隔离带用于区分两个结果 39 | split = np.zeros([20, im_base.shape[1], 3]) 40 | # 拼接 41 | im = np.vstack((im_small, split, im_base)) 42 | if count == 1: 43 | fps, w, h = 5, im.shape[1], im.shape[0] 44 | out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) 45 | out.write(im.astype(np.uint8)) 46 | 47 | print('Done!') 48 | -------------------------------------------------------------------------------- /utils/compose_vidio.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | import mmcv 5 | 6 | from nuscenes.nuscenes import NuScenes 7 | 8 | PATH = '/home/guozebin/work_code/BEVFormer/visual_res_base' 9 | video_path = 'seg_det_demo_best_v1.mp4' 10 | 11 | if __name__ == '__main__': 12 | count = 0 13 | nusc = NuScenes(version='v1.0-trainval', dataroot='/home/guozebin/work_code/BEVFormer/data/nuscenes', verbose=True) 14 | bevformer_results = mmcv.load( 15 | '/home/guozebin/work_code/BEVFormer/val/work_dirs/bevformer_small_seg_det_300x300/Tue_Jan_31_16_25_12_2023/pts_bbox/results_nusc.json') 16 | sample_token_list = list(bevformer_results['results'].keys())[1000:2000] 17 | for id in range(0, 1000): 18 | 19 | if sample_token_list[id] + '.jpg' not in os.listdir(PATH): 20 | continue 21 | 22 | print(f"handle {PATH + sample_token_list[id] +'.jpg'}") 23 | count += 1 24 | im = os.path.join(PATH, sample_token_list[id] + '.jpg') 25 | im = cv2.imread(im) 26 | 27 | if count == 1: 28 | fps, w, h = 5, im.shape[1], im.shape[0] 29 | out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) 30 | out.write(im) 31 | print('Done!') -------------------------------------------------------------------------------- /utils/padding_seg_to_det.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | path = '/root/work_code/BEVFormer/visual_rot/000681a060c04755a1537cf83b53ba57.png' 5 | 6 | det_grid_conf = { 7 | 'xbound': [-51.2, 51.2, 0.68], 8 | 'ybound': [-51.2, 51.2, 0.68], 9 | } 10 | 11 | map_grid_conf = { 12 | 'xbound': [-30.0, 30.0, 0.15], 13 | 'ybound': [-15.0, 15.0, 0.15], 14 | } 15 | 16 | def padding_seg_to_det(path): 17 | 18 | seg = cv2.imread(path) 19 | h, w, _ = seg.shape 20 | 21 | det_w = int((det_grid_conf['xbound'][1] - det_grid_conf['xbound'][0])/(map_grid_conf['xbound'][1] - map_grid_conf['xbound'][0]) * w) 22 | det_h = det_w 23 | 24 | new_img = np.zeros((det_h, det_w, 3)) 25 | new_img = np.where(new_img == 0, 255, 0) 26 | new_img[det_h // 2 - h // 2: det_h // 2 + h//2, det_w // 2 - w // 2: det_w // 2 + w//2, :] = seg 27 | 28 | new_img = np.rot90(new_img, 1, [0, 1]) 29 | 30 | 31 | return new_img 32 | 33 | 34 | if __name__ == '__main__': 35 | 36 | img = padding_seg_to_det(path=path) 37 | cv2.imwrite("a.jpg", img) -------------------------------------------------------------------------------- /utils/test_grid_sample.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | import numpy as np 4 | 5 | PATH = '/home/guozebin/work_code/BEVFormer/visual/gt000050_000.jpg' 6 | 7 | # 8 | img = cv2.imread(PATH) 9 | img1 = torch.from_numpy(img) 10 | # test rot 11 | img = torch.rot90(img1, k=-1, dims=[0,1]) 12 | cv2.imwrite('rot90.jpg', img.numpy()) 13 | 14 | img = torch.flip(img, dims=[1]) 15 | 16 | cv2.imwrite('flip.jpg', img.numpy()) 17 | --------------------------------------------------------------------------------