├── LICENSE
├── README.md
├── assets
    ├── exp_1.png
    ├── exp_2.png
    ├── pipeline.png
    ├── teaser.png
    └── vis.png
├── ops
    ├── __init__.py
    ├── octree_ops.cpp
    ├── octree_ops.py
    └── octree_ops_cuda.cu
├── projects
    ├── __init__.py
    ├── configs
    │   ├── OctreeOcc
    │   │   └── octreeocc_base.py
    │   ├── _base_
    │   │   ├── datasets
    │   │   │   ├── coco_instance.py
    │   │   │   ├── kitti-3d-3class.py
    │   │   │   ├── kitti-3d-car.py
    │   │   │   ├── lyft-3d.py
    │   │   │   ├── nuim_instance.py
    │   │   │   ├── nus-3d.py
    │   │   │   ├── nus-mono3d.py
    │   │   │   ├── range100_lyft-3d.py
    │   │   │   ├── s3dis-3d-5class.py
    │   │   │   ├── s3dis_seg-3d-13class.py
    │   │   │   ├── scannet-3d-18class.py
    │   │   │   ├── scannet_seg-3d-20class.py
    │   │   │   ├── sunrgbd-3d-10class.py
    │   │   │   ├── waymoD5-3d-3class.py
    │   │   │   └── waymoD5-3d-car.py
    │   │   ├── default_runtime.py
    │   │   ├── models
    │   │   │   ├── 3dssd.py
    │   │   │   ├── cascade_mask_rcnn_r50_fpn.py
    │   │   │   ├── centerpoint_01voxel_second_secfpn_nus.py
    │   │   │   ├── centerpoint_02pillar_second_secfpn_nus.py
    │   │   │   ├── fcos3d.py
    │   │   │   ├── groupfree3d.py
    │   │   │   ├── h3dnet.py
    │   │   │   ├── hv_pointpillars_fpn_lyft.py
    │   │   │   ├── hv_pointpillars_fpn_nus.py
    │   │   │   ├── hv_pointpillars_fpn_range100_lyft.py
    │   │   │   ├── hv_pointpillars_secfpn_kitti.py
    │   │   │   ├── hv_pointpillars_secfpn_waymo.py
    │   │   │   ├── hv_second_secfpn_kitti.py
    │   │   │   ├── hv_second_secfpn_waymo.py
    │   │   │   ├── imvotenet_image.py
    │   │   │   ├── mask_rcnn_r50_fpn.py
    │   │   │   ├── paconv_cuda_ssg.py
    │   │   │   ├── paconv_ssg.py
    │   │   │   ├── parta2.py
    │   │   │   ├── pointnet2_msg.py
    │   │   │   ├── pointnet2_ssg.py
    │   │   │   └── votenet.py
    │   │   └── schedules
    │   │   │   ├── cosine.py
    │   │   │   ├── cyclic_20e.py
    │   │   │   ├── cyclic_40e.py
    │   │   │   ├── mmdet_schedule_1x.py
    │   │   │   ├── schedule_2x.py
    │   │   │   ├── schedule_3x.py
    │   │   │   ├── seg_cosine_150e.py
    │   │   │   ├── seg_cosine_200e.py
    │   │   │   └── seg_cosine_50e.py
    │   ├── datasets
    │   │   ├── custom_lyft-3d.py
    │   │   ├── custom_nus-3d.py
    │   │   └── custom_waymo-3d.py
    │   └── label_mapping
    │   │   ├── nuscenes.yaml
    │   │   ├── semantic-kitti-multiscan.yaml
    │   │   └── semantic-kitti.yaml
    └── mmdet3d_plugin
    │   ├── __init__.py
    │   ├── bevformer
    │       ├── __init__.py
    │       ├── apis
    │       │   ├── __init__.py
    │       │   ├── mmdet_train.py
    │       │   ├── test.py
    │       │   ├── test_dense.py
    │       │   └── train.py
    │       ├── dense_heads
    │       │   ├── __init__.py
    │       │   ├── depth_head.py
    │       │   ├── occ_head.py
    │       │   └── seg_head.py
    │       ├── detectors
    │       │   ├── __init__.py
    │       │   └── octree_occ.py
    │       ├── hooks
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-37.pyc
    │       │   │   └── custom_hooks.cpython-37.pyc
    │       │   └── custom_hooks.py
    │       ├── modules
    │       │   ├── __init__.py
    │       │   ├── custom_base_transformer_layer.py
    │       │   ├── decoder.py
    │       │   ├── deformable_self_attention_3D_custom.py
    │       │   ├── multi_scale_3ddeformable_attn_function.py
    │       │   ├── multi_scale_deformable_attn_3D_custom_function.py
    │       │   ├── multi_scale_deformable_attn_function.py
    │       │   ├── occ_encoder.py
    │       │   ├── occ_mlp_decoder.py
    │       │   ├── occ_spatial_attention.py
    │       │   ├── octree_transformer.py
    │       │   ├── residual_block_3d.py
    │       │   └── spatial_cross_attention.py
    │       └── runner
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │       ├── __init__.cpython-37.pyc
    │       │       └── epoch_based_runner.cpython-37.pyc
    │       │   └── epoch_based_runner.py
    │   ├── core
    │       ├── bbox
    │       │   ├── __pycache__
    │       │   │   └── util.cpython-37.pyc
    │       │   ├── assigners
    │       │   │   ├── __init__.py
    │       │   │   ├── __pycache__
    │       │   │   │   ├── __init__.cpython-37.pyc
    │       │   │   │   └── hungarian_assigner_3d.cpython-37.pyc
    │       │   │   └── hungarian_assigner_3d.py
    │       │   ├── coders
    │       │   │   ├── __init__.py
    │       │   │   ├── __pycache__
    │       │   │   │   ├── __init__.cpython-37.pyc
    │       │   │   │   └── nms_free_coder.cpython-37.pyc
    │       │   │   └── nms_free_coder.py
    │       │   ├── match_costs
    │       │   │   ├── __init__.py
    │       │   │   ├── __pycache__
    │       │   │   │   ├── __init__.cpython-37.pyc
    │       │   │   │   └── match_cost.cpython-37.pyc
    │       │   │   └── match_cost.py
    │       │   └── util.py
    │       └── evaluation
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │       ├── __init__.cpython-37.pyc
    │       │       └── eval_hooks.cpython-37.pyc
    │       │   ├── eval_hooks.py
    │       │   └── kitti2waymo.py
    │   ├── datasets
    │       ├── __init__.py
    │       ├── builder.py
    │       ├── nuscenes_dataset.py
    │       ├── nuscenes_dataset_lidarseg.py
    │       ├── nuscenes_dataset_occ.py
    │       ├── nuscenes_mono_dataset.py
    │       ├── nuscenes_occ.py
    │       ├── nuscnes_eval.py
    │       ├── occ_metrics.py
    │       ├── pipelines
    │       │   ├── __init__.py
    │       │   ├── compose.py
    │       │   ├── formating.py
    │       │   ├── loading.py
    │       │   └── transform_3d.py
    │       └── samplers
    │       │   ├── __init__.py
    │       │   ├── distributed_sampler.py
    │       │   ├── group_sampler.py
    │       │   └── sampler.py
    │   ├── models
    │       ├── backbones
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-37.pyc
    │       │   │   ├── internv2_impl16.cpython-37.pyc
    │       │   │   └── vovnet.cpython-37.pyc
    │       │   ├── internv2_impl16.py
    │       │   ├── sam_modeling
    │       │   │   ├── __init__.py
    │       │   │   ├── __pycache__
    │       │   │   │   ├── __init__.cpython-37.pyc
    │       │   │   │   ├── common.cpython-37.pyc
    │       │   │   │   └── image_encoder.cpython-37.pyc
    │       │   │   ├── common.py
    │       │   │   └── image_encoder.py
    │       │   └── vovnet.py
    │       ├── hooks
    │       │   ├── __init__.py
    │       │   └── hooks.py
    │       ├── losses
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-37.pyc
    │       │   │   ├── dice_loss.cpython-37.pyc
    │       │   │   ├── focal_loss.cpython-37.pyc
    │       │   │   ├── lovasz_losses.cpython-37.pyc
    │       │   │   ├── lovasz_softmax.cpython-37.pyc
    │       │   │   ├── nusc_param.cpython-37.pyc
    │       │   │   └── semkitti.cpython-37.pyc
    │       │   ├── dice_loss.py
    │       │   ├── focal_loss.py
    │       │   ├── lovasz_losses.py
    │       │   ├── lovasz_softmax.py
    │       │   ├── nusc_param.py
    │       │   └── semkitti.py
    │       ├── opt
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │   │   ├── __init__.cpython-37.pyc
    │       │   │   └── adamw.cpython-37.pyc
    │       │   └── adamw.py
    │       └── utils
    │       │   ├── __init__.py
    │       │   ├── __pycache__
    │       │       ├── __init__.cpython-37.pyc
    │       │       ├── bev_visualize.cpython-37.pyc
    │       │       ├── bricks.cpython-37.pyc
    │       │       ├── grid_mask.cpython-37.pyc
    │       │       ├── position_embedding.cpython-37.pyc
    │       │       └── visual.cpython-37.pyc
    │       │   ├── bev_visualize.py
    │       │   ├── bricks.py
    │       │   ├── grid_mask.py
    │       │   ├── occupied_prob.npz
    │       │   ├── occupied_prob_with_l3.npz
    │       │   ├── position_embedding.py
    │       │   ├── table.npz
    │       │   └── visual.py
    │   └── ops
    │       └── src
    │           └── octree_ops_cuda.cu
├── setup.py
└── tools
    ├── create_data.py
    ├── create_data_occ.py
    ├── data_converter
        ├── __init__.py
        ├── create_gt_database.py
        ├── indoor_converter.py
        ├── kitti_converter.py
        ├── kitti_data_utils.py
        ├── lyft_converter.py
        ├── lyft_data_fixer.py
        ├── nuimage_converter.py
        ├── nuscenes_converter.py
        ├── nuscenes_occ_converter.py
        ├── s3dis_data_utils.py
        ├── scannet_data_utils.py
        ├── sunrgbd_data_utils.py
        └── waymo_converter.py
    ├── dist_test.sh
    ├── dist_test_dense.sh
    ├── dist_test_occ.sh
    ├── dist_test_seg.sh
    ├── dist_train.sh
    ├── eval_metrics
        ├── lidar_seg.py
        └── lidar_seg_convert.py
    ├── fp16
        ├── dist_train.sh
        └── train.py
    ├── learning_map._nus.yaml
    ├── misc
        ├── browse_dataset.py
        ├── fuse_conv_bn.py
        ├── print_config.py
        └── visualize_results.py
    ├── model_converters
        ├── convert_votenet_checkpoints.py
        ├── publish_model.py
        └── regnet2mmdet.py
    ├── test.py
    ├── test_dense.py
    └── train.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 4DVLab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <p align="center">
 2 |   <h1 align="center">OctreeOcc: Efficient and Multi-Granularity Occupancy Prediction Using Octree Queries</h1>
 3 |   <p align="center">
 4 |     <a href="https://yuhanglu2000.github.io">Yuhang Lu</a>,
 5 |     <a href="https://xingezhu.me">Xinge Zhu</a>,
 6 |     <a href="https://tai-wang.github.io">Tai Wang</a>,
 7 |     <a href="https://yuexinma.me/aboutme.html">Yuexin Ma</a>
 8 |   </p>
 9 |   <p align="center">
10 |     <a href="https://arxiv.org/pdf/2312.03774.pdf"><strong>Paper</strong></a>
11 |   </p>
12 | </p>
13 | 
14 | ## Main Idea
15 | 
16 | <p align="center">
17 |   <img src="assets/teaser.png" alt="OctreeOcc Teaser" width="80%">
18 | </p>
19 | 
20 | OctreeOcc employs octree queries to offer varying granularity for distinct semantic regions, thereby diminishing the requisite number of queries for modeling and mitigating the issue of low information density in 3D space.
21 | 
22 | ## Architecture Overview
23 | 
24 | <p align="center">
25 |   <img src="./assets/pipeline.png" alt="Pipeline" width="100%">
26 | </p>
27 | 
28 | Given multi-view images, we extract multi-scale image features utilizing an image backbone. Subsequently, the initial octree structure is derived through image segmentation priors, and the transformation of dense queries into octree queries is effected. Following this, we concomitantly refine octree queries and rectify the octree structure through the octree encoder. Finally, we decode from the octree query and obtain occupancy prediction outcomes for this frame. For better visualisation, the diagram of Iterative Structure Rectification module shows octree query and mask in 2D form (quadtree).
29 | 
30 | ## Performance
31 | 
32 | <p align="center">
33 |   <img src="./assets/exp_1.png" alt="Main Results" width="100%">
34 | </p>
35 | 
36 | <p align="center">
37 |   <img src="./assets/exp_2.png" alt="Efficiency Results" width="80%">
38 | </p>
39 | 
40 | Experiments conducted on the Occ3D-nuScenes dataset demonstrate that our approach enhances performance while substantially decreasing computational overhead (even when compared to 2D modeling approaches).
41 | 
42 | ## Visualization
43 | 
44 | <p align="center">
45 |   <img src="./assets/vis.png" alt="Visualization" width="100%">
46 | </p>
47 | 
48 | Qualitative results on Occ3D-nuScenes validation set. The first row displays input multi-view images, while the second row showcases the occupancy prediction results of PanoOcc, FBOCC, our methods, and the ground truth.
49 | 
50 | ## Coming Soon
51 | 
52 | - Detailed guidance documents
53 | - Data generation scripts
54 | 
55 | Stay tuned for updates. Thank you for your interest in our work!
56 | 
57 | 
58 | ## Citation
59 | ```
60 | @article{lu2024octreeocc,
61 |   title={Octreeocc: Efficient and multi-granularity occupancy prediction using octree queries},
62 |   author={Lu, Yuhang and Zhu, Xinge and Wang, Tai and Ma, Yuexin},
63 |   journal={Advances in Neural Information Processing Systems},
64 |   volume={37},
65 |   pages={79618--79641},
66 |   year={2024}
67 | }
68 | ```
69 | 


--------------------------------------------------------------------------------
/assets/exp_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/assets/exp_1.png


--------------------------------------------------------------------------------
/assets/exp_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/assets/exp_2.png


--------------------------------------------------------------------------------
/assets/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/assets/pipeline.png


--------------------------------------------------------------------------------
/assets/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/assets/teaser.png


--------------------------------------------------------------------------------
/assets/vis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/assets/vis.png


--------------------------------------------------------------------------------
/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from .octree_ops import create_octree_mask_l1_to_l2, create_octree_mask_l2_to_l3
2 | 
3 | __all__ = [
4 |     'create_octree_mask_l1_to_l2', 
5 |     'create_octree_mask_l2_to_l3'
6 | ]


--------------------------------------------------------------------------------
/ops/octree_ops.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | #include <ATen/ATen.h>
 3 | #include <c10/cuda/CUDAGuard.h>
 4 | 
 5 | // CUDA前向函数声明
 6 | void octree_mask_l1_to_l2_forward_cuda(
 7 |     const at::Tensor& octree_l1, 
 8 |     at::Tensor& mask_l2);
 9 | 
10 | void octree_mask_l2_to_l3_forward_cuda(
11 |     const at::Tensor& octree_l2, 
12 |     at::Tensor& mask_l3);
13 | 
14 | // Python绑定函数
15 | void octree_mask_l1_to_l2_forward(
16 |     const at::Tensor& octree_l1, 
17 |     at::Tensor& mask_l2) {
18 |     
19 |     // 确保输入在CUDA上
20 |     TORCH_CHECK(octree_l1.is_cuda(), "octree_l1 must be a CUDA tensor");
21 |     TORCH_CHECK(mask_l2.is_cuda(), "mask_l2 must be a CUDA tensor");
22 |     
23 |     // 确保输入是布尔类型
24 |     TORCH_CHECK(octree_l1.scalar_type() == at::ScalarType::Bool, "octree_l1 must be bool tensor");
25 |     TORCH_CHECK(mask_l2.scalar_type() == at::ScalarType::Bool, "mask_l2 must be bool tensor");
26 |     
27 |     // 确保维度正确
28 |     TORCH_CHECK(octree_l1.dim() == 4, "octree_l1 must be a 4D tensor");
29 |     TORCH_CHECK(mask_l2.dim() == 4, "mask_l2 must be a 4D tensor");
30 |     
31 |     // 确保形状正确
32 |     int B = octree_l1.size(0);
33 |     int H = octree_l1.size(1);
34 |     int W = octree_l1.size(2);
35 |     int D = octree_l1.size(3);
36 |     
37 |     TORCH_CHECK(mask_l2.size(0) == B, "batch size mismatch");
38 |     TORCH_CHECK(mask_l2.size(1) == H*2, "height mismatch");
39 |     TORCH_CHECK(mask_l2.size(2) == W*2, "width mismatch");
40 |     TORCH_CHECK(mask_l2.size(3) == D*2, "depth mismatch");
41 |     
42 |     // 调用CUDA实现
43 |     const at::cuda::OptionalCUDAGuard device_guard(device_of(octree_l1));
44 |     octree_mask_l1_to_l2_forward_cuda(octree_l1, mask_l2);
45 | }
46 | 
47 | void octree_mask_l2_to_l3_forward(
48 |     const at::Tensor& octree_l2, 
49 |     at::Tensor& mask_l3) {
50 |     
51 |     // 确保输入在CUDA上
52 |     TORCH_CHECK(octree_l2.is_cuda(), "octree_l2 must be a CUDA tensor");
53 |     TORCH_CHECK(mask_l3.is_cuda(), "mask_l3 must be a CUDA tensor");
54 |     
55 |     // 确保输入是布尔类型
56 |     TORCH_CHECK(octree_l2.scalar_type() == at::ScalarType::Bool, "octree_l2 must be bool tensor");
57 |     TORCH_CHECK(mask_l3.scalar_type() == at::ScalarType::Bool, "mask_l3 must be bool tensor");
58 |     
59 |     // 确保维度正确
60 |     TORCH_CHECK(octree_l2.dim() == 4, "octree_l2 must be a 4D tensor");
61 |     TORCH_CHECK(mask_l3.dim() == 4, "mask_l3 must be a 4D tensor");
62 |     
63 |     // 确保形状正确
64 |     int B = octree_l2.size(0);
65 |     int H = octree_l2.size(1);
66 |     int W = octree_l2.size(2);
67 |     int D = octree_l2.size(3);
68 |     
69 |     TORCH_CHECK(mask_l3.size(0) == B, "batch size mismatch");
70 |     TORCH_CHECK(mask_l3.size(1) == H*2, "height mismatch");
71 |     TORCH_CHECK(mask_l3.size(2) == W*2, "width mismatch");
72 |     TORCH_CHECK(mask_l3.size(3) == D*2, "depth mismatch");
73 |     
74 |     // 调用CUDA实现
75 |     const at::cuda::OptionalCUDAGuard device_guard(device_of(octree_l2));
76 |     octree_mask_l2_to_l3_forward_cuda(octree_l2, mask_l3);
77 | }
78 | 
79 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
80 |     m.def("octree_mask_l1_to_l2_forward", &octree_mask_l1_to_l2_forward, "Octree mask L1 to L2 forward");
81 |     m.def("octree_mask_l2_to_l3_forward", &octree_mask_l2_to_l3_forward, "Octree mask L2 to L3 forward");
82 | }


--------------------------------------------------------------------------------
/ops/octree_ops.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | from torch.utils.cpp_extension import load
 5 | import os
 6 | 
 7 | # 动态加载CUDA扩展
 8 | current_dir = os.path.dirname(os.path.abspath(__file__))
 9 | _ext = load(
10 |     name='octree_ops',
11 |     sources=[
12 |         os.path.join(current_dir, 'octree_ops.cpp'),
13 |         os.path.join(current_dir, 'octree_ops_cuda.cu')
14 |     ],
15 |     extra_cflags=['-O3'],
16 |     verbose=True
17 | )
18 | 
19 | class OctreeMaskL1ToL2Function(Function):
20 |     @staticmethod
21 |     def forward(ctx, octree_l1):
22 |         """从L1级别生成L2级别的八叉树掩码
23 |         
24 |         Args:
25 |             octree_l1 (torch.Tensor): [B, H, W, D] 布尔值掩码
26 |             
27 |         Returns:
28 |             torch.Tensor: [B, 2*H, 2*W, 2*D] 布尔值掩码
29 |         """
30 |         # 确保输入是布尔值
31 |         octree_l1 = octree_l1.bool()
32 |         
33 |         # 创建输出张量
34 |         B, H, W, D = octree_l1.shape
35 |         mask_l2 = torch.zeros(B, H*2, W*2, D*2, dtype=torch.bool, device=octree_l1.device)
36 |         
37 |         # 调用CUDA操作
38 |         _ext.octree_mask_l1_to_l2_forward(octree_l1, mask_l2)
39 |         
40 |         return mask_l2
41 |     
42 |     @staticmethod
43 |     @once_differentiable
44 |     def backward(ctx, grad_output):
45 |         # 该操作不需要梯度计算
46 |         return None
47 | 
48 | class OctreeMaskL2ToL3Function(Function):
49 |     @staticmethod
50 |     def forward(ctx, octree_l2):
51 |         """从L2级别生成L3级别的八叉树掩码
52 |         
53 |         Args:
54 |             octree_l2 (torch.Tensor): [B, H, W, D] 布尔值掩码
55 |             
56 |         Returns:
57 |             torch.Tensor: [B, 2*H, 2*W, 2*D] 布尔值掩码
58 |         """
59 |         # 确保输入是布尔值
60 |         octree_l2 = octree_l2.bool()
61 |         
62 |         # 创建输出张量
63 |         B, H, W, D = octree_l2.shape
64 |         mask_l3 = torch.zeros(B, H*2, W*2, D*2, dtype=torch.bool, device=octree_l2.device)
65 |         
66 |         # 调用CUDA操作
67 |         _ext.octree_mask_l2_to_l3_forward(octree_l2, mask_l3)
68 |         
69 |         return mask_l3
70 |     
71 |     @staticmethod
72 |     @once_differentiable
73 |     def backward(ctx, grad_output):
74 |         # 该操作不需要梯度计算
75 |         return None
76 | 
77 | # 对外暴露的API函数
78 | def create_octree_mask_l1_to_l2(octree_l1):
79 |     return OctreeMaskL1ToL2Function.apply(octree_l1)
80 | 
81 | def create_octree_mask_l2_to_l3(octree_l2):
82 |     return OctreeMaskL2ToL3Function.apply(octree_l2)


--------------------------------------------------------------------------------
/ops/octree_ops_cuda.cu:
--------------------------------------------------------------------------------
  1 | #include <torch/extension.h>
  2 | #include <cuda.h>
  3 | #include <cuda_runtime.h>
  4 | #include <ATen/ATen.h>
  5 | #include <ATen/cuda/CUDAContext.h>
  6 | #include <THC/THCAtomics.cuh>
  7 | 
  8 | // 定义线程块大小
  9 | const int THREADS_PER_BLOCK = 256;
 10 | 
 11 | // CUDA kernel实现
 12 | __global__ void octree_mask_l1_to_l2_kernel(
 13 |     const bool* __restrict__ octree_l1,
 14 |     bool* __restrict__ mask_l2,
 15 |     const int B,
 16 |     const int H,
 17 |     const int W,
 18 |     const int D) {
 19 |     
 20 |     // 计算全局索引
 21 |     const int index = blockIdx.x * blockDim.x + threadIdx.x;
 22 |     const int total_elements = B * H * W * D;
 23 |     
 24 |     if (index >= total_elements) return;
 25 |     
 26 |     // 计算input中的4D索引
 27 |     const int d = index % D;
 28 |     const int w = (index / D) % W;
 29 |     const int h = (index / (D * W)) % H;
 30 |     const int b = index / (D * W * H);
 31 |     
 32 |     // 取得原始值
 33 |     const int input_idx = ((b * H + h) * W + w) * D + d;
 34 |     const bool value = octree_l1[input_idx];
 35 |     
 36 |     // 如果值为true，则计算在输出中的8个子节点位置并设置
 37 |     if (value) {
 38 |         // 计算在输出中的基础索引
 39 |         const int h_out = h * 2;
 40 |         const int w_out = w * 2;
 41 |         const int d_out = d * 2;
 42 |         
 43 |         // 8个子节点的偏移量
 44 |         const int offsets[8][3] = {
 45 |             {0, 0, 0}, {0, 0, 1}, {0, 1, 0}, {1, 0, 0},
 46 |             {1, 1, 1}, {0, 1, 1}, {1, 1, 0}, {1, 0, 1}
 47 |         };
 48 |         
 49 |         // 对8个子节点赋值
 50 |         for (int i = 0; i < 8; ++i) {
 51 |             const int h_offset = offsets[i][0];
 52 |             const int w_offset = offsets[i][1];
 53 |             const int d_offset = offsets[i][2];
 54 |             
 55 |             const int output_idx = ((b * (H*2) + (h_out+h_offset)) * (W*2) + (w_out+w_offset)) * (D*2) + (d_out+d_offset);
 56 |             mask_l2[output_idx] = true;
 57 |         }
 58 |     }
 59 | }
 60 | 
 61 | // CUDA kernel实现 - 同样逻辑适用于L2到L3
 62 | __global__ void octree_mask_l2_to_l3_kernel(
 63 |     const bool* __restrict__ octree_l2,
 64 |     bool* __restrict__ mask_l3,
 65 |     const int B,
 66 |     const int H,
 67 |     const int W,
 68 |     const int D) {
 69 |     
 70 |     // 计算全局索引
 71 |     const int index = blockIdx.x * blockDim.x + threadIdx.x;
 72 |     const int total_elements = B * H * W * D;
 73 |     
 74 |     if (index >= total_elements) return;
 75 |     
 76 |     // 计算input中的4D索引
 77 |     const int d = index % D;
 78 |     const int w = (index / D) % W;
 79 |     const int h = (index / (D * W)) % H;
 80 |     const int b = index / (D * W * H);
 81 |     
 82 |     // 取得原始值
 83 |     const int input_idx = ((b * H + h) * W + w) * D + d;
 84 |     const bool value = octree_l2[input_idx];
 85 |     
 86 |     // 如果值为true，则计算在输出中的8个子节点位置并设置
 87 |     if (value) {
 88 |         // 计算在输出中的基础索引
 89 |         const int h_out = h * 2;
 90 |         const int w_out = w * 2;
 91 |         const int d_out = d * 2;
 92 |         
 93 |         // 8个子节点的偏移量
 94 |         const int offsets[8][3] = {
 95 |             {0, 0, 0}, {0, 0, 1}, {0, 1, 0}, {1, 0, 0},
 96 |             {1, 1, 1}, {0, 1, 1}, {1, 1, 0}, {1, 0, 1}
 97 |         };
 98 |         
 99 |         // 对8个子节点赋值
100 |         for (int i = 0; i < 8; ++i) {
101 |             const int h_offset = offsets[i][0];
102 |             const int w_offset = offsets[i][1];
103 |             const int d_offset = offsets[i][2];
104 |             
105 |             const int output_idx = ((b * (H*2) + (h_out+h_offset)) * (W*2) + (w_out+w_offset)) * (D*2) + (d_out+d_offset);
106 |             mask_l3[output_idx] = true;
107 |         }
108 |     }
109 | }
110 | 
111 | // CUDA接口函数
112 | void octree_mask_l1_to_l2_forward_cuda(
113 |     const at::Tensor& octree_l1, 
114 |     at::Tensor& mask_l2) {
115 |     
116 |     // 获取张量大小
117 |     const int B = octree_l1.size(0);
118 |     const int H = octree_l1.size(1);
119 |     const int W = octree_l1.size(2);
120 |     const int D = octree_l1.size(3);
121 |     const int total_elements = B * H * W * D;
122 |     
123 |     // 计算grid大小
124 |     const int blocks = (total_elements + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
125 |     
126 |     // 调用CUDA kernel
127 |     octree_mask_l1_to_l2_kernel<<<blocks, THREADS_PER_BLOCK>>>(
128 |         octree_l1.data_ptr<bool>(),
129 |         mask_l2.data_ptr<bool>(),
130 |         B, H, W, D
131 |     );
132 |     
133 |     // 同步CUDA流
134 |     cudaDeviceSynchronize();
135 | }
136 | 
137 | // CUDA接口函数
138 | void octree_mask_l2_to_l3_forward_cuda(
139 |     const at::Tensor& octree_l2, 
140 |     at::Tensor& mask_l3) {
141 |     
142 |     // 获取张量大小
143 |     const int B = octree_l2.size(0);
144 |     const int H = octree_l2.size(1);
145 |     const int W = octree_l2.size(2);
146 |     const int D = octree_l2.size(3);
147 |     const int total_elements = B * H * W * D;
148 |     
149 |     // 计算grid大小
150 |     const int blocks = (total_elements + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK;
151 |     
152 |     // 调用CUDA kernel
153 |     octree_mask_l2_to_l3_kernel<<<blocks, THREADS_PER_BLOCK>>>(
154 |         octree_l2.data_ptr<bool>(),
155 |         mask_l3.data_ptr<bool>(),
156 |         B, H, W, D
157 |     );
158 |     
159 |     // 同步CUDA流
160 |     cudaDeviceSynchronize();
161 | }


--------------------------------------------------------------------------------
/projects/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/coco/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 8 |     dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
 9 |     dict(type='RandomFlip', flip_ratio=0.5),
10 |     dict(type='Normalize', **img_norm_cfg),
11 |     dict(type='Pad', size_divisor=32),
12 |     dict(type='DefaultFormatBundle'),
13 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
14 | ]
15 | test_pipeline = [
16 |     dict(type='LoadImageFromFile'),
17 |     dict(
18 |         type='MultiScaleFlipAug',
19 |         img_scale=(1333, 800),
20 |         flip=False,
21 |         transforms=[
22 |             dict(type='Resize', keep_ratio=True),
23 |             dict(type='RandomFlip'),
24 |             dict(type='Normalize', **img_norm_cfg),
25 |             dict(type='Pad', size_divisor=32),
26 |             dict(type='ImageToTensor', keys=['img']),
27 |             dict(type='Collect', keys=['img']),
28 |         ])
29 | ]
30 | data = dict(
31 |     samples_per_gpu=2,
32 |     workers_per_gpu=2,
33 |     train=dict(
34 |         type=dataset_type,
35 |         ann_file=data_root + 'annotations/instances_train2017.json',
36 |         img_prefix=data_root + 'train2017/',
37 |         pipeline=train_pipeline),
38 |     val=dict(
39 |         type=dataset_type,
40 |         ann_file=data_root + 'annotations/instances_val2017.json',
41 |         img_prefix=data_root + 'val2017/',
42 |         pipeline=test_pipeline),
43 |     test=dict(
44 |         type=dataset_type,
45 |         ann_file=data_root + 'annotations/instances_val2017.json',
46 |         img_prefix=data_root + 'val2017/',
47 |         pipeline=test_pipeline))
48 | evaluation = dict(metric=['bbox', 'segm'])
49 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/kitti-3d-3class.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'KittiDataset'
  3 | data_root = 'data/kitti/'
  4 | class_names = ['Pedestrian', 'Cyclist', 'Car']
  5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1]
  6 | input_modality = dict(use_lidar=True, use_camera=False)
  7 | db_sampler = dict(
  8 |     data_root=data_root,
  9 |     info_path=data_root + 'kitti_dbinfos_train.pkl',
 10 |     rate=1.0,
 11 |     prepare=dict(
 12 |         filter_by_difficulty=[-1],
 13 |         filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
 14 |     classes=class_names,
 15 |     sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6))
 16 | 
 17 | file_client_args = dict(backend='disk')
 18 | # Uncomment the following if use ceph or other file clients.
 19 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
 20 | # for more details.
 21 | # file_client_args = dict(
 22 | #     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
 23 | 
 24 | train_pipeline = [
 25 |     dict(
 26 |         type='LoadPointsFromFile',
 27 |         coord_type='LIDAR',
 28 |         load_dim=4,
 29 |         use_dim=4,
 30 |         file_client_args=file_client_args),
 31 |     dict(
 32 |         type='LoadAnnotations3D',
 33 |         with_bbox_3d=True,
 34 |         with_label_3d=True,
 35 |         file_client_args=file_client_args),
 36 |     dict(type='ObjectSample', db_sampler=db_sampler),
 37 |     dict(
 38 |         type='ObjectNoise',
 39 |         num_try=100,
 40 |         translation_std=[1.0, 1.0, 0.5],
 41 |         global_rot_range=[0.0, 0.0],
 42 |         rot_range=[-0.78539816, 0.78539816]),
 43 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 44 |     dict(
 45 |         type='GlobalRotScaleTrans',
 46 |         rot_range=[-0.78539816, 0.78539816],
 47 |         scale_ratio_range=[0.95, 1.05]),
 48 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 49 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
 50 |     dict(type='PointShuffle'),
 51 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 52 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 53 | ]
 54 | test_pipeline = [
 55 |     dict(
 56 |         type='LoadPointsFromFile',
 57 |         coord_type='LIDAR',
 58 |         load_dim=4,
 59 |         use_dim=4,
 60 |         file_client_args=file_client_args),
 61 |     dict(
 62 |         type='MultiScaleFlipAug3D',
 63 |         img_scale=(1333, 800),
 64 |         pts_scale_ratio=1,
 65 |         flip=False,
 66 |         transforms=[
 67 |             dict(
 68 |                 type='GlobalRotScaleTrans',
 69 |                 rot_range=[0, 0],
 70 |                 scale_ratio_range=[1., 1.],
 71 |                 translation_std=[0, 0, 0]),
 72 |             dict(type='RandomFlip3D'),
 73 |             dict(
 74 |                 type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 75 |             dict(
 76 |                 type='DefaultFormatBundle3D',
 77 |                 class_names=class_names,
 78 |                 with_label=False),
 79 |             dict(type='Collect3D', keys=['points'])
 80 |         ])
 81 | ]
 82 | # construct a pipeline for data and gt loading in show function
 83 | # please keep its loading function consistent with test_pipeline (e.g. client)
 84 | eval_pipeline = [
 85 |     dict(
 86 |         type='LoadPointsFromFile',
 87 |         coord_type='LIDAR',
 88 |         load_dim=4,
 89 |         use_dim=4,
 90 |         file_client_args=file_client_args),
 91 |     dict(
 92 |         type='DefaultFormatBundle3D',
 93 |         class_names=class_names,
 94 |         with_label=False),
 95 |     dict(type='Collect3D', keys=['points'])
 96 | ]
 97 | 
 98 | data = dict(
 99 |     samples_per_gpu=6,
100 |     workers_per_gpu=4,
101 |     train=dict(
102 |         type='RepeatDataset',
103 |         times=2,
104 |         dataset=dict(
105 |             type=dataset_type,
106 |             data_root=data_root,
107 |             ann_file=data_root + 'kitti_infos_train.pkl',
108 |             split='training',
109 |             pts_prefix='velodyne_reduced',
110 |             pipeline=train_pipeline,
111 |             modality=input_modality,
112 |             classes=class_names,
113 |             test_mode=False,
114 |             # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
115 |             # and box_type_3d='Depth' in sunrgbd and scannet dataset.
116 |             box_type_3d='LiDAR')),
117 |     val=dict(
118 |         type=dataset_type,
119 |         data_root=data_root,
120 |         ann_file=data_root + 'kitti_infos_val.pkl',
121 |         split='training',
122 |         pts_prefix='velodyne_reduced',
123 |         pipeline=test_pipeline,
124 |         modality=input_modality,
125 |         classes=class_names,
126 |         test_mode=True,
127 |         box_type_3d='LiDAR'),
128 |     test=dict(
129 |         type=dataset_type,
130 |         data_root=data_root,
131 |         ann_file=data_root + 'kitti_infos_val.pkl',
132 |         split='training',
133 |         pts_prefix='velodyne_reduced',
134 |         pipeline=test_pipeline,
135 |         modality=input_modality,
136 |         classes=class_names,
137 |         test_mode=True,
138 |         box_type_3d='LiDAR'))
139 | 
140 | evaluation = dict(interval=1, pipeline=eval_pipeline)
141 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/kitti-3d-car.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'KittiDataset'
  3 | data_root = 'data/kitti/'
  4 | class_names = ['Car']
  5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1]
  6 | input_modality = dict(use_lidar=True, use_camera=False)
  7 | db_sampler = dict(
  8 |     data_root=data_root,
  9 |     info_path=data_root + 'kitti_dbinfos_train.pkl',
 10 |     rate=1.0,
 11 |     prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
 12 |     classes=class_names,
 13 |     sample_groups=dict(Car=15))
 14 | 
 15 | file_client_args = dict(backend='disk')
 16 | # Uncomment the following if use ceph or other file clients.
 17 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
 18 | # for more details.
 19 | # file_client_args = dict(
 20 | #     backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
 21 | 
 22 | train_pipeline = [
 23 |     dict(
 24 |         type='LoadPointsFromFile',
 25 |         coord_type='LIDAR',
 26 |         load_dim=4,
 27 |         use_dim=4,
 28 |         file_client_args=file_client_args),
 29 |     dict(
 30 |         type='LoadAnnotations3D',
 31 |         with_bbox_3d=True,
 32 |         with_label_3d=True,
 33 |         file_client_args=file_client_args),
 34 |     dict(type='ObjectSample', db_sampler=db_sampler),
 35 |     dict(
 36 |         type='ObjectNoise',
 37 |         num_try=100,
 38 |         translation_std=[1.0, 1.0, 0.5],
 39 |         global_rot_range=[0.0, 0.0],
 40 |         rot_range=[-0.78539816, 0.78539816]),
 41 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 42 |     dict(
 43 |         type='GlobalRotScaleTrans',
 44 |         rot_range=[-0.78539816, 0.78539816],
 45 |         scale_ratio_range=[0.95, 1.05]),
 46 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 47 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
 48 |     dict(type='PointShuffle'),
 49 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 50 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 51 | ]
 52 | test_pipeline = [
 53 |     dict(
 54 |         type='LoadPointsFromFile',
 55 |         coord_type='LIDAR',
 56 |         load_dim=4,
 57 |         use_dim=4,
 58 |         file_client_args=file_client_args),
 59 |     dict(
 60 |         type='MultiScaleFlipAug3D',
 61 |         img_scale=(1333, 800),
 62 |         pts_scale_ratio=1,
 63 |         flip=False,
 64 |         transforms=[
 65 |             dict(
 66 |                 type='GlobalRotScaleTrans',
 67 |                 rot_range=[0, 0],
 68 |                 scale_ratio_range=[1., 1.],
 69 |                 translation_std=[0, 0, 0]),
 70 |             dict(type='RandomFlip3D'),
 71 |             dict(
 72 |                 type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 73 |             dict(
 74 |                 type='DefaultFormatBundle3D',
 75 |                 class_names=class_names,
 76 |                 with_label=False),
 77 |             dict(type='Collect3D', keys=['points'])
 78 |         ])
 79 | ]
 80 | # construct a pipeline for data and gt loading in show function
 81 | # please keep its loading function consistent with test_pipeline (e.g. client)
 82 | eval_pipeline = [
 83 |     dict(
 84 |         type='LoadPointsFromFile',
 85 |         coord_type='LIDAR',
 86 |         load_dim=4,
 87 |         use_dim=4,
 88 |         file_client_args=file_client_args),
 89 |     dict(
 90 |         type='DefaultFormatBundle3D',
 91 |         class_names=class_names,
 92 |         with_label=False),
 93 |     dict(type='Collect3D', keys=['points'])
 94 | ]
 95 | 
 96 | data = dict(
 97 |     samples_per_gpu=6,
 98 |     workers_per_gpu=4,
 99 |     train=dict(
100 |         type='RepeatDataset',
101 |         times=2,
102 |         dataset=dict(
103 |             type=dataset_type,
104 |             data_root=data_root,
105 |             ann_file=data_root + 'kitti_infos_train.pkl',
106 |             split='training',
107 |             pts_prefix='velodyne_reduced',
108 |             pipeline=train_pipeline,
109 |             modality=input_modality,
110 |             classes=class_names,
111 |             test_mode=False,
112 |             # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
113 |             # and box_type_3d='Depth' in sunrgbd and scannet dataset.
114 |             box_type_3d='LiDAR')),
115 |     val=dict(
116 |         type=dataset_type,
117 |         data_root=data_root,
118 |         ann_file=data_root + 'kitti_infos_val.pkl',
119 |         split='training',
120 |         pts_prefix='velodyne_reduced',
121 |         pipeline=test_pipeline,
122 |         modality=input_modality,
123 |         classes=class_names,
124 |         test_mode=True,
125 |         box_type_3d='LiDAR'),
126 |     test=dict(
127 |         type=dataset_type,
128 |         data_root=data_root,
129 |         ann_file=data_root + 'kitti_infos_val.pkl',
130 |         split='training',
131 |         pts_prefix='velodyne_reduced',
132 |         pipeline=test_pipeline,
133 |         modality=input_modality,
134 |         classes=class_names,
135 |         test_mode=True,
136 |         box_type_3d='LiDAR'))
137 | 
138 | evaluation = dict(interval=1, pipeline=eval_pipeline)
139 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/lyft-3d.py:
--------------------------------------------------------------------------------
  1 | # If point cloud range is changed, the models should also change their point
  2 | # cloud range accordingly
  3 | point_cloud_range = [-80, -80, -5, 80, 80, 3]
  4 | # For Lyft we usually do 9-class detection
  5 | class_names = [
  6 |     'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
  7 |     'bicycle', 'pedestrian', 'animal'
  8 | ]
  9 | dataset_type = 'LyftDataset'
 10 | data_root = 'data/lyft/'
 11 | # Input modality for Lyft dataset, this is consistent with the submission
 12 | # format which requires the information in input_modality.
 13 | input_modality = dict(
 14 |     use_lidar=True,
 15 |     use_camera=False,
 16 |     use_radar=False,
 17 |     use_map=False,
 18 |     use_external=False)
 19 | file_client_args = dict(backend='disk')
 20 | # Uncomment the following if use ceph or other file clients.
 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
 22 | # for more details.
 23 | # file_client_args = dict(
 24 | #     backend='petrel',
 25 | #     path_mapping=dict({
 26 | #         './data/lyft/': 's3://lyft/lyft/',
 27 | #         'data/lyft/': 's3://lyft/lyft/'
 28 | #    }))
 29 | train_pipeline = [
 30 |     dict(
 31 |         type='LoadPointsFromFile',
 32 |         coord_type='LIDAR',
 33 |         load_dim=5,
 34 |         use_dim=5,
 35 |         file_client_args=file_client_args),
 36 |     dict(
 37 |         type='LoadPointsFromMultiSweeps',
 38 |         sweeps_num=10,
 39 |         file_client_args=file_client_args),
 40 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
 41 |     dict(
 42 |         type='GlobalRotScaleTrans',
 43 |         rot_range=[-0.3925, 0.3925],
 44 |         scale_ratio_range=[0.95, 1.05],
 45 |         translation_std=[0, 0, 0]),
 46 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 47 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 48 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
 49 |     dict(type='PointShuffle'),
 50 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 51 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 52 | ]
 53 | test_pipeline = [
 54 |     dict(
 55 |         type='LoadPointsFromFile',
 56 |         coord_type='LIDAR',
 57 |         load_dim=5,
 58 |         use_dim=5,
 59 |         file_client_args=file_client_args),
 60 |     dict(
 61 |         type='LoadPointsFromMultiSweeps',
 62 |         sweeps_num=10,
 63 |         file_client_args=file_client_args),
 64 |     dict(
 65 |         type='MultiScaleFlipAug3D',
 66 |         img_scale=(1333, 800),
 67 |         pts_scale_ratio=1,
 68 |         flip=False,
 69 |         transforms=[
 70 |             dict(
 71 |                 type='GlobalRotScaleTrans',
 72 |                 rot_range=[0, 0],
 73 |                 scale_ratio_range=[1., 1.],
 74 |                 translation_std=[0, 0, 0]),
 75 |             dict(type='RandomFlip3D'),
 76 |             dict(
 77 |                 type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 78 |             dict(
 79 |                 type='DefaultFormatBundle3D',
 80 |                 class_names=class_names,
 81 |                 with_label=False),
 82 |             dict(type='Collect3D', keys=['points'])
 83 |         ])
 84 | ]
 85 | # construct a pipeline for data and gt loading in show function
 86 | # please keep its loading function consistent with test_pipeline (e.g. client)
 87 | eval_pipeline = [
 88 |     dict(
 89 |         type='LoadPointsFromFile',
 90 |         coord_type='LIDAR',
 91 |         load_dim=5,
 92 |         use_dim=5,
 93 |         file_client_args=file_client_args),
 94 |     dict(
 95 |         type='LoadPointsFromMultiSweeps',
 96 |         sweeps_num=10,
 97 |         file_client_args=file_client_args),
 98 |     dict(
 99 |         type='DefaultFormatBundle3D',
100 |         class_names=class_names,
101 |         with_label=False),
102 |     dict(type='Collect3D', keys=['points'])
103 | ]
104 | 
105 | data = dict(
106 |     samples_per_gpu=2,
107 |     workers_per_gpu=2,
108 |     train=dict(
109 |         type=dataset_type,
110 |         data_root=data_root,
111 |         ann_file=data_root + 'lyft_infos_train.pkl',
112 |         pipeline=train_pipeline,
113 |         classes=class_names,
114 |         modality=input_modality,
115 |         test_mode=False),
116 |     val=dict(
117 |         type=dataset_type,
118 |         data_root=data_root,
119 |         ann_file=data_root + 'lyft_infos_val.pkl',
120 |         pipeline=test_pipeline,
121 |         classes=class_names,
122 |         modality=input_modality,
123 |         test_mode=True),
124 |     test=dict(
125 |         type=dataset_type,
126 |         data_root=data_root,
127 |         ann_file=data_root + 'lyft_infos_test.pkl',
128 |         pipeline=test_pipeline,
129 |         classes=class_names,
130 |         modality=input_modality,
131 |         test_mode=True))
132 | # For Lyft dataset, we usually evaluate the model at the end of training.
133 | # Since the models are trained by 24 epochs by default, we set evaluation
134 | # interval to be 24. Please change the interval accordingly if you do not
135 | # use a default schedule.
136 | evaluation = dict(interval=24, pipeline=eval_pipeline)
137 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/nuim_instance.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CocoDataset'
 2 | data_root = 'data/nuimages/'
 3 | class_names = [
 4 |     'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
 5 |     'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
 6 | ]
 7 | img_norm_cfg = dict(
 8 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 9 | train_pipeline = [
10 |     dict(type='LoadImageFromFile'),
11 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
12 |     dict(
13 |         type='Resize',
14 |         img_scale=[(1280, 720), (1920, 1080)],
15 |         multiscale_mode='range',
16 |         keep_ratio=True),
17 |     dict(type='RandomFlip', flip_ratio=0.5),
18 |     dict(type='Normalize', **img_norm_cfg),
19 |     dict(type='Pad', size_divisor=32),
20 |     dict(type='DefaultFormatBundle'),
21 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
22 | ]
23 | test_pipeline = [
24 |     dict(type='LoadImageFromFile'),
25 |     dict(
26 |         type='MultiScaleFlipAug',
27 |         img_scale=(1600, 900),
28 |         flip=False,
29 |         transforms=[
30 |             dict(type='Resize', keep_ratio=True),
31 |             dict(type='RandomFlip'),
32 |             dict(type='Normalize', **img_norm_cfg),
33 |             dict(type='Pad', size_divisor=32),
34 |             dict(type='ImageToTensor', keys=['img']),
35 |             dict(type='Collect', keys=['img']),
36 |         ])
37 | ]
38 | data = dict(
39 |     samples_per_gpu=2,
40 |     workers_per_gpu=2,
41 |     train=dict(
42 |         type=dataset_type,
43 |         ann_file=data_root + 'annotations/nuimages_v1.0-train.json',
44 |         img_prefix=data_root,
45 |         classes=class_names,
46 |         pipeline=train_pipeline),
47 |     val=dict(
48 |         type=dataset_type,
49 |         ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
50 |         img_prefix=data_root,
51 |         classes=class_names,
52 |         pipeline=test_pipeline),
53 |     test=dict(
54 |         type=dataset_type,
55 |         ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
56 |         img_prefix=data_root,
57 |         classes=class_names,
58 |         pipeline=test_pipeline))
59 | evaluation = dict(metric=['bbox', 'segm'])
60 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/nus-mono3d.py:
--------------------------------------------------------------------------------
  1 | dataset_type = 'CustomNuScenesMonoDataset'
  2 | data_root = 'data/nuscenes/'
  3 | class_names = [
  4 |     'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
  5 |     'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
  6 | ]
  7 | # Input modality for nuScenes dataset, this is consistent with the submission
  8 | # format which requires the information in input_modality.
  9 | input_modality = dict(
 10 |     use_lidar=False,
 11 |     use_camera=True,
 12 |     use_radar=False,
 13 |     use_map=False,
 14 |     use_external=False)
 15 | img_norm_cfg = dict(
 16 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 17 | train_pipeline = [
 18 |     dict(type='LoadImageFromFileMono3D'),
 19 |     dict(
 20 |         type='LoadAnnotations3D',
 21 |         with_bbox=True,
 22 |         with_label=True,
 23 |         with_attr_label=True,
 24 |         with_bbox_3d=True,
 25 |         with_label_3d=True,
 26 |         with_bbox_depth=True),
 27 |     dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),
 28 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 29 |     dict(type='Normalize', **img_norm_cfg),
 30 |     dict(type='Pad', size_divisor=32),
 31 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 32 |     dict(
 33 |         type='Collect3D',
 34 |         keys=[
 35 |             'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',
 36 |             'gt_labels_3d', 'centers2d', 'depths'
 37 |         ]),
 38 | ]
 39 | test_pipeline = [
 40 |     dict(type='LoadImageFromFileMono3D'),
 41 |     dict(
 42 |         type='MultiScaleFlipAug',
 43 |         scale_factor=1.0,
 44 |         flip=False,
 45 |         transforms=[
 46 |             dict(type='RandomFlip3D'),
 47 |             dict(type='Normalize', **img_norm_cfg),
 48 |             dict(type='Pad', size_divisor=32),
 49 |             dict(
 50 |                 type='DefaultFormatBundle3D',
 51 |                 class_names=class_names,
 52 |                 with_label=False),
 53 |             dict(type='Collect3D', keys=['img']),
 54 |         ])
 55 | ]
 56 | # construct a pipeline for data and gt loading in show function
 57 | # please keep its loading function consistent with test_pipeline (e.g. client)
 58 | eval_pipeline = [
 59 |     dict(type='LoadImageFromFileMono3D'),
 60 |     dict(
 61 |         type='DefaultFormatBundle3D',
 62 |         class_names=class_names,
 63 |         with_label=False),
 64 |     dict(type='Collect3D', keys=['img'])
 65 | ]
 66 | 
 67 | data = dict(
 68 |     samples_per_gpu=2,
 69 |     workers_per_gpu=2,
 70 |     train=dict(
 71 |         type=dataset_type,
 72 |         data_root=data_root,
 73 |         ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json',
 74 |         img_prefix=data_root,
 75 |         classes=class_names,
 76 |         pipeline=train_pipeline,
 77 |         modality=input_modality,
 78 |         test_mode=False,
 79 |         box_type_3d='Camera'),
 80 |     val=dict(
 81 |         type=dataset_type,
 82 |         data_root=data_root,
 83 |         ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
 84 |         img_prefix=data_root,
 85 |         classes=class_names,
 86 |         pipeline=test_pipeline,
 87 |         modality=input_modality,
 88 |         test_mode=True,
 89 |         box_type_3d='Camera'),
 90 |     test=dict(
 91 |         type=dataset_type,
 92 |         data_root=data_root,
 93 |         ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
 94 |         img_prefix=data_root,
 95 |         classes=class_names,
 96 |         pipeline=test_pipeline,
 97 |         modality=input_modality,
 98 |         test_mode=True,
 99 |         box_type_3d='Camera'))
100 | evaluation = dict(interval=2)
101 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/range100_lyft-3d.py:
--------------------------------------------------------------------------------
  1 | # If point cloud range is changed, the models should also change their point
  2 | # cloud range accordingly
  3 | point_cloud_range = [-100, -100, -5, 100, 100, 3]
  4 | # For Lyft we usually do 9-class detection
  5 | class_names = [
  6 |     'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
  7 |     'bicycle', 'pedestrian', 'animal'
  8 | ]
  9 | dataset_type = 'LyftDataset'
 10 | data_root = 'data/lyft/'
 11 | # Input modality for Lyft dataset, this is consistent with the submission
 12 | # format which requires the information in input_modality.
 13 | input_modality = dict(
 14 |     use_lidar=True,
 15 |     use_camera=False,
 16 |     use_radar=False,
 17 |     use_map=False,
 18 |     use_external=False)
 19 | file_client_args = dict(backend='disk')
 20 | # Uncomment the following if use ceph or other file clients.
 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
 22 | # for more details.
 23 | # file_client_args = dict(
 24 | #     backend='petrel',
 25 | #     path_mapping=dict({
 26 | #         './data/lyft/': 's3://lyft/lyft/',
 27 | #         'data/lyft/': 's3://lyft/lyft/'
 28 | #    }))
 29 | train_pipeline = [
 30 |     dict(
 31 |         type='LoadPointsFromFile',
 32 |         coord_type='LIDAR',
 33 |         load_dim=5,
 34 |         use_dim=5,
 35 |         file_client_args=file_client_args),
 36 |     dict(
 37 |         type='LoadPointsFromMultiSweeps',
 38 |         sweeps_num=10,
 39 |         file_client_args=file_client_args),
 40 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
 41 |     dict(
 42 |         type='GlobalRotScaleTrans',
 43 |         rot_range=[-0.3925, 0.3925],
 44 |         scale_ratio_range=[0.95, 1.05],
 45 |         translation_std=[0, 0, 0]),
 46 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 47 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 48 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
 49 |     dict(type='PointShuffle'),
 50 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 51 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 52 | ]
 53 | test_pipeline = [
 54 |     dict(
 55 |         type='LoadPointsFromFile',
 56 |         coord_type='LIDAR',
 57 |         load_dim=5,
 58 |         use_dim=5,
 59 |         file_client_args=file_client_args),
 60 |     dict(
 61 |         type='LoadPointsFromMultiSweeps',
 62 |         sweeps_num=10,
 63 |         file_client_args=file_client_args),
 64 |     dict(
 65 |         type='MultiScaleFlipAug3D',
 66 |         img_scale=(1333, 800),
 67 |         pts_scale_ratio=1,
 68 |         flip=False,
 69 |         transforms=[
 70 |             dict(
 71 |                 type='GlobalRotScaleTrans',
 72 |                 rot_range=[0, 0],
 73 |                 scale_ratio_range=[1., 1.],
 74 |                 translation_std=[0, 0, 0]),
 75 |             dict(type='RandomFlip3D'),
 76 |             dict(
 77 |                 type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 78 |             dict(
 79 |                 type='DefaultFormatBundle3D',
 80 |                 class_names=class_names,
 81 |                 with_label=False),
 82 |             dict(type='Collect3D', keys=['points'])
 83 |         ])
 84 | ]
 85 | # construct a pipeline for data and gt loading in show function
 86 | # please keep its loading function consistent with test_pipeline (e.g. client)
 87 | eval_pipeline = [
 88 |     dict(
 89 |         type='LoadPointsFromFile',
 90 |         coord_type='LIDAR',
 91 |         load_dim=5,
 92 |         use_dim=5,
 93 |         file_client_args=file_client_args),
 94 |     dict(
 95 |         type='LoadPointsFromMultiSweeps',
 96 |         sweeps_num=10,
 97 |         file_client_args=file_client_args),
 98 |     dict(
 99 |         type='DefaultFormatBundle3D',
100 |         class_names=class_names,
101 |         with_label=False),
102 |     dict(type='Collect3D', keys=['points'])
103 | ]
104 | 
105 | data = dict(
106 |     samples_per_gpu=2,
107 |     workers_per_gpu=2,
108 |     train=dict(
109 |         type=dataset_type,
110 |         data_root=data_root,
111 |         ann_file=data_root + 'lyft_infos_train.pkl',
112 |         pipeline=train_pipeline,
113 |         classes=class_names,
114 |         modality=input_modality,
115 |         test_mode=False),
116 |     val=dict(
117 |         type=dataset_type,
118 |         data_root=data_root,
119 |         ann_file=data_root + 'lyft_infos_val.pkl',
120 |         pipeline=test_pipeline,
121 |         classes=class_names,
122 |         modality=input_modality,
123 |         test_mode=True),
124 |     test=dict(
125 |         type=dataset_type,
126 |         data_root=data_root,
127 |         ann_file=data_root + 'lyft_infos_test.pkl',
128 |         pipeline=test_pipeline,
129 |         classes=class_names,
130 |         modality=input_modality,
131 |         test_mode=True))
132 | # For Lyft dataset, we usually evaluate the model at the end of training.
133 | # Since the models are trained by 24 epochs by default, we set evaluation
134 | # interval to be 24. Please change the interval accordingly if you do not
135 | # use a default schedule.
136 | evaluation = dict(interval=24, pipeline=eval_pipeline)
137 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/s3dis-3d-5class.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'S3DISDataset'
  3 | data_root = './data/s3dis/'
  4 | class_names = ('table', 'chair', 'sofa', 'bookcase', 'board')
  5 | train_area = [1, 2, 3, 4, 6]
  6 | test_area = 5
  7 | 
  8 | train_pipeline = [
  9 |     dict(
 10 |         type='LoadPointsFromFile',
 11 |         coord_type='DEPTH',
 12 |         shift_height=True,
 13 |         load_dim=6,
 14 |         use_dim=[0, 1, 2, 3, 4, 5]),
 15 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
 16 |     dict(type='PointSample', num_points=40000),
 17 |     dict(
 18 |         type='RandomFlip3D',
 19 |         sync_2d=False,
 20 |         flip_ratio_bev_horizontal=0.5,
 21 |         flip_ratio_bev_vertical=0.5),
 22 |     dict(
 23 |         type='GlobalRotScaleTrans',
 24 |         # following ScanNet dataset the rotation range is 5 degrees
 25 |         rot_range=[-0.087266, 0.087266],
 26 |         scale_ratio_range=[1.0, 1.0],
 27 |         shift_height=True),
 28 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 29 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 30 | ]
 31 | test_pipeline = [
 32 |     dict(
 33 |         type='LoadPointsFromFile',
 34 |         coord_type='DEPTH',
 35 |         shift_height=True,
 36 |         load_dim=6,
 37 |         use_dim=[0, 1, 2, 3, 4, 5]),
 38 |     dict(
 39 |         type='MultiScaleFlipAug3D',
 40 |         img_scale=(1333, 800),
 41 |         pts_scale_ratio=1,
 42 |         flip=False,
 43 |         transforms=[
 44 |             dict(
 45 |                 type='GlobalRotScaleTrans',
 46 |                 rot_range=[0, 0],
 47 |                 scale_ratio_range=[1., 1.],
 48 |                 translation_std=[0, 0, 0]),
 49 |             dict(
 50 |                 type='RandomFlip3D',
 51 |                 sync_2d=False,
 52 |                 flip_ratio_bev_horizontal=0.5,
 53 |                 flip_ratio_bev_vertical=0.5),
 54 |             dict(type='PointSample', num_points=40000),
 55 |             dict(
 56 |                 type='DefaultFormatBundle3D',
 57 |                 class_names=class_names,
 58 |                 with_label=False),
 59 |             dict(type='Collect3D', keys=['points'])
 60 |         ])
 61 | ]
 62 | # construct a pipeline for data and gt loading in show function
 63 | # please keep its loading function consistent with test_pipeline (e.g. client)
 64 | eval_pipeline = [
 65 |     dict(
 66 |         type='LoadPointsFromFile',
 67 |         coord_type='DEPTH',
 68 |         shift_height=False,
 69 |         load_dim=6,
 70 |         use_dim=[0, 1, 2, 3, 4, 5]),
 71 |     dict(
 72 |         type='DefaultFormatBundle3D',
 73 |         class_names=class_names,
 74 |         with_label=False),
 75 |     dict(type='Collect3D', keys=['points'])
 76 | ]
 77 | 
 78 | data = dict(
 79 |     samples_per_gpu=8,
 80 |     workers_per_gpu=4,
 81 |     train=dict(
 82 |         type='RepeatDataset',
 83 |         times=5,
 84 |         dataset=dict(
 85 |             type='ConcatDataset',
 86 |             datasets=[
 87 |                 dict(
 88 |                     type=dataset_type,
 89 |                     data_root=data_root,
 90 |                     ann_file=data_root + f's3dis_infos_Area_{i}.pkl',
 91 |                     pipeline=train_pipeline,
 92 |                     filter_empty_gt=False,
 93 |                     classes=class_names,
 94 |                     box_type_3d='Depth') for i in train_area
 95 |             ],
 96 |             separate_eval=False)),
 97 |     val=dict(
 98 |         type=dataset_type,
 99 |         data_root=data_root,
100 |         ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
101 |         pipeline=test_pipeline,
102 |         classes=class_names,
103 |         test_mode=True,
104 |         box_type_3d='Depth'),
105 |     test=dict(
106 |         type=dataset_type,
107 |         data_root=data_root,
108 |         ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
109 |         pipeline=test_pipeline,
110 |         classes=class_names,
111 |         test_mode=True,
112 |         box_type_3d='Depth'))
113 | 
114 | evaluation = dict(pipeline=eval_pipeline)
115 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/s3dis_seg-3d-13class.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'S3DISSegDataset'
  3 | data_root = './data/s3dis/'
  4 | class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',
  5 |                'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')
  6 | num_points = 4096
  7 | train_area = [1, 2, 3, 4, 6]
  8 | test_area = 5
  9 | train_pipeline = [
 10 |     dict(
 11 |         type='LoadPointsFromFile',
 12 |         coord_type='DEPTH',
 13 |         shift_height=False,
 14 |         use_color=True,
 15 |         load_dim=6,
 16 |         use_dim=[0, 1, 2, 3, 4, 5]),
 17 |     dict(
 18 |         type='LoadAnnotations3D',
 19 |         with_bbox_3d=False,
 20 |         with_label_3d=False,
 21 |         with_mask_3d=False,
 22 |         with_seg_3d=True),
 23 |     dict(
 24 |         type='PointSegClassMapping',
 25 |         valid_cat_ids=tuple(range(len(class_names))),
 26 |         max_cat_id=13),
 27 |     dict(
 28 |         type='IndoorPatchPointSample',
 29 |         num_points=num_points,
 30 |         block_size=1.0,
 31 |         ignore_index=len(class_names),
 32 |         use_normalized_coord=True,
 33 |         enlarge_size=0.2,
 34 |         min_unique_num=None),
 35 |     dict(type='NormalizePointsColor', color_mean=None),
 36 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 37 |     dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
 38 | ]
 39 | test_pipeline = [
 40 |     dict(
 41 |         type='LoadPointsFromFile',
 42 |         coord_type='DEPTH',
 43 |         shift_height=False,
 44 |         use_color=True,
 45 |         load_dim=6,
 46 |         use_dim=[0, 1, 2, 3, 4, 5]),
 47 |     dict(type='NormalizePointsColor', color_mean=None),
 48 |     dict(
 49 |         # a wrapper in order to successfully call test function
 50 |         # actually we don't perform test-time-aug
 51 |         type='MultiScaleFlipAug3D',
 52 |         img_scale=(1333, 800),
 53 |         pts_scale_ratio=1,
 54 |         flip=False,
 55 |         transforms=[
 56 |             dict(
 57 |                 type='GlobalRotScaleTrans',
 58 |                 rot_range=[0, 0],
 59 |                 scale_ratio_range=[1., 1.],
 60 |                 translation_std=[0, 0, 0]),
 61 |             dict(
 62 |                 type='RandomFlip3D',
 63 |                 sync_2d=False,
 64 |                 flip_ratio_bev_horizontal=0.0,
 65 |                 flip_ratio_bev_vertical=0.0),
 66 |             dict(
 67 |                 type='DefaultFormatBundle3D',
 68 |                 class_names=class_names,
 69 |                 with_label=False),
 70 |             dict(type='Collect3D', keys=['points'])
 71 |         ])
 72 | ]
 73 | # construct a pipeline for data and gt loading in show function
 74 | # please keep its loading function consistent with test_pipeline (e.g. client)
 75 | # we need to load gt seg_mask!
 76 | eval_pipeline = [
 77 |     dict(
 78 |         type='LoadPointsFromFile',
 79 |         coord_type='DEPTH',
 80 |         shift_height=False,
 81 |         use_color=True,
 82 |         load_dim=6,
 83 |         use_dim=[0, 1, 2, 3, 4, 5]),
 84 |     dict(
 85 |         type='LoadAnnotations3D',
 86 |         with_bbox_3d=False,
 87 |         with_label_3d=False,
 88 |         with_mask_3d=False,
 89 |         with_seg_3d=True),
 90 |     dict(
 91 |         type='PointSegClassMapping',
 92 |         valid_cat_ids=tuple(range(len(class_names))),
 93 |         max_cat_id=13),
 94 |     dict(
 95 |         type='DefaultFormatBundle3D',
 96 |         with_label=False,
 97 |         class_names=class_names),
 98 |     dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
 99 | ]
100 | 
101 | data = dict(
102 |     samples_per_gpu=8,
103 |     workers_per_gpu=4,
104 |     # train on area 1, 2, 3, 4, 6
105 |     # test on area 5
106 |     train=dict(
107 |         type=dataset_type,
108 |         data_root=data_root,
109 |         ann_files=[
110 |             data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area
111 |         ],
112 |         pipeline=train_pipeline,
113 |         classes=class_names,
114 |         test_mode=False,
115 |         ignore_index=len(class_names),
116 |         scene_idxs=[
117 |             data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy'
118 |             for i in train_area
119 |         ]),
120 |     val=dict(
121 |         type=dataset_type,
122 |         data_root=data_root,
123 |         ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',
124 |         pipeline=test_pipeline,
125 |         classes=class_names,
126 |         test_mode=True,
127 |         ignore_index=len(class_names),
128 |         scene_idxs=data_root +
129 |         f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'),
130 |     test=dict(
131 |         type=dataset_type,
132 |         data_root=data_root,
133 |         ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',
134 |         pipeline=test_pipeline,
135 |         classes=class_names,
136 |         test_mode=True,
137 |         ignore_index=len(class_names)))
138 | 
139 | evaluation = dict(pipeline=eval_pipeline)
140 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/scannet-3d-18class.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'ScanNetDataset'
  3 | data_root = './data/scannet/'
  4 | class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
  5 |                'bookshelf', 'picture', 'counter', 'desk', 'curtain',
  6 |                'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
  7 |                'garbagebin')
  8 | train_pipeline = [
  9 |     dict(
 10 |         type='LoadPointsFromFile',
 11 |         coord_type='DEPTH',
 12 |         shift_height=True,
 13 |         load_dim=6,
 14 |         use_dim=[0, 1, 2]),
 15 |     dict(
 16 |         type='LoadAnnotations3D',
 17 |         with_bbox_3d=True,
 18 |         with_label_3d=True,
 19 |         with_mask_3d=True,
 20 |         with_seg_3d=True),
 21 |     dict(type='GlobalAlignment', rotation_axis=2),
 22 |     dict(
 23 |         type='PointSegClassMapping',
 24 |         valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
 25 |                        36, 39),
 26 |         max_cat_id=40),
 27 |     dict(type='PointSample', num_points=40000),
 28 |     dict(
 29 |         type='RandomFlip3D',
 30 |         sync_2d=False,
 31 |         flip_ratio_bev_horizontal=0.5,
 32 |         flip_ratio_bev_vertical=0.5),
 33 |     dict(
 34 |         type='GlobalRotScaleTrans',
 35 |         rot_range=[-0.087266, 0.087266],
 36 |         scale_ratio_range=[1.0, 1.0],
 37 |         shift_height=True),
 38 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 39 |     dict(
 40 |         type='Collect3D',
 41 |         keys=[
 42 |             'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
 43 |             'pts_instance_mask'
 44 |         ])
 45 | ]
 46 | test_pipeline = [
 47 |     dict(
 48 |         type='LoadPointsFromFile',
 49 |         coord_type='DEPTH',
 50 |         shift_height=True,
 51 |         load_dim=6,
 52 |         use_dim=[0, 1, 2]),
 53 |     dict(type='GlobalAlignment', rotation_axis=2),
 54 |     dict(
 55 |         type='MultiScaleFlipAug3D',
 56 |         img_scale=(1333, 800),
 57 |         pts_scale_ratio=1,
 58 |         flip=False,
 59 |         transforms=[
 60 |             dict(
 61 |                 type='GlobalRotScaleTrans',
 62 |                 rot_range=[0, 0],
 63 |                 scale_ratio_range=[1., 1.],
 64 |                 translation_std=[0, 0, 0]),
 65 |             dict(
 66 |                 type='RandomFlip3D',
 67 |                 sync_2d=False,
 68 |                 flip_ratio_bev_horizontal=0.5,
 69 |                 flip_ratio_bev_vertical=0.5),
 70 |             dict(type='PointSample', num_points=40000),
 71 |             dict(
 72 |                 type='DefaultFormatBundle3D',
 73 |                 class_names=class_names,
 74 |                 with_label=False),
 75 |             dict(type='Collect3D', keys=['points'])
 76 |         ])
 77 | ]
 78 | # construct a pipeline for data and gt loading in show function
 79 | # please keep its loading function consistent with test_pipeline (e.g. client)
 80 | eval_pipeline = [
 81 |     dict(
 82 |         type='LoadPointsFromFile',
 83 |         coord_type='DEPTH',
 84 |         shift_height=False,
 85 |         load_dim=6,
 86 |         use_dim=[0, 1, 2]),
 87 |     dict(type='GlobalAlignment', rotation_axis=2),
 88 |     dict(
 89 |         type='DefaultFormatBundle3D',
 90 |         class_names=class_names,
 91 |         with_label=False),
 92 |     dict(type='Collect3D', keys=['points'])
 93 | ]
 94 | 
 95 | data = dict(
 96 |     samples_per_gpu=8,
 97 |     workers_per_gpu=4,
 98 |     train=dict(
 99 |         type='RepeatDataset',
100 |         times=5,
101 |         dataset=dict(
102 |             type=dataset_type,
103 |             data_root=data_root,
104 |             ann_file=data_root + 'scannet_infos_train.pkl',
105 |             pipeline=train_pipeline,
106 |             filter_empty_gt=False,
107 |             classes=class_names,
108 |             # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
109 |             # and box_type_3d='Depth' in sunrgbd and scannet dataset.
110 |             box_type_3d='Depth')),
111 |     val=dict(
112 |         type=dataset_type,
113 |         data_root=data_root,
114 |         ann_file=data_root + 'scannet_infos_val.pkl',
115 |         pipeline=test_pipeline,
116 |         classes=class_names,
117 |         test_mode=True,
118 |         box_type_3d='Depth'),
119 |     test=dict(
120 |         type=dataset_type,
121 |         data_root=data_root,
122 |         ann_file=data_root + 'scannet_infos_val.pkl',
123 |         pipeline=test_pipeline,
124 |         classes=class_names,
125 |         test_mode=True,
126 |         box_type_3d='Depth'))
127 | 
128 | evaluation = dict(pipeline=eval_pipeline)
129 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/scannet_seg-3d-20class.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | dataset_type = 'ScanNetSegDataset'
  3 | data_root = './data/scannet/'
  4 | class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',
  5 |                'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',
  6 |                'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink',
  7 |                'bathtub', 'otherfurniture')
  8 | num_points = 8192
  9 | train_pipeline = [
 10 |     dict(
 11 |         type='LoadPointsFromFile',
 12 |         coord_type='DEPTH',
 13 |         shift_height=False,
 14 |         use_color=True,
 15 |         load_dim=6,
 16 |         use_dim=[0, 1, 2, 3, 4, 5]),
 17 |     dict(
 18 |         type='LoadAnnotations3D',
 19 |         with_bbox_3d=False,
 20 |         with_label_3d=False,
 21 |         with_mask_3d=False,
 22 |         with_seg_3d=True),
 23 |     dict(
 24 |         type='PointSegClassMapping',
 25 |         valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
 26 |                        33, 34, 36, 39),
 27 |         max_cat_id=40),
 28 |     dict(
 29 |         type='IndoorPatchPointSample',
 30 |         num_points=num_points,
 31 |         block_size=1.5,
 32 |         ignore_index=len(class_names),
 33 |         use_normalized_coord=False,
 34 |         enlarge_size=0.2,
 35 |         min_unique_num=None),
 36 |     dict(type='NormalizePointsColor', color_mean=None),
 37 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 38 |     dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
 39 | ]
 40 | test_pipeline = [
 41 |     dict(
 42 |         type='LoadPointsFromFile',
 43 |         coord_type='DEPTH',
 44 |         shift_height=False,
 45 |         use_color=True,
 46 |         load_dim=6,
 47 |         use_dim=[0, 1, 2, 3, 4, 5]),
 48 |     dict(type='NormalizePointsColor', color_mean=None),
 49 |     dict(
 50 |         # a wrapper in order to successfully call test function
 51 |         # actually we don't perform test-time-aug
 52 |         type='MultiScaleFlipAug3D',
 53 |         img_scale=(1333, 800),
 54 |         pts_scale_ratio=1,
 55 |         flip=False,
 56 |         transforms=[
 57 |             dict(
 58 |                 type='GlobalRotScaleTrans',
 59 |                 rot_range=[0, 0],
 60 |                 scale_ratio_range=[1., 1.],
 61 |                 translation_std=[0, 0, 0]),
 62 |             dict(
 63 |                 type='RandomFlip3D',
 64 |                 sync_2d=False,
 65 |                 flip_ratio_bev_horizontal=0.0,
 66 |                 flip_ratio_bev_vertical=0.0),
 67 |             dict(
 68 |                 type='DefaultFormatBundle3D',
 69 |                 class_names=class_names,
 70 |                 with_label=False),
 71 |             dict(type='Collect3D', keys=['points'])
 72 |         ])
 73 | ]
 74 | # construct a pipeline for data and gt loading in show function
 75 | # please keep its loading function consistent with test_pipeline (e.g. client)
 76 | # we need to load gt seg_mask!
 77 | eval_pipeline = [
 78 |     dict(
 79 |         type='LoadPointsFromFile',
 80 |         coord_type='DEPTH',
 81 |         shift_height=False,
 82 |         use_color=True,
 83 |         load_dim=6,
 84 |         use_dim=[0, 1, 2, 3, 4, 5]),
 85 |     dict(
 86 |         type='LoadAnnotations3D',
 87 |         with_bbox_3d=False,
 88 |         with_label_3d=False,
 89 |         with_mask_3d=False,
 90 |         with_seg_3d=True),
 91 |     dict(
 92 |         type='PointSegClassMapping',
 93 |         valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
 94 |                        33, 34, 36, 39),
 95 |         max_cat_id=40),
 96 |     dict(
 97 |         type='DefaultFormatBundle3D',
 98 |         with_label=False,
 99 |         class_names=class_names),
100 |     dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
101 | ]
102 | 
103 | data = dict(
104 |     samples_per_gpu=8,
105 |     workers_per_gpu=4,
106 |     train=dict(
107 |         type=dataset_type,
108 |         data_root=data_root,
109 |         ann_file=data_root + 'scannet_infos_train.pkl',
110 |         pipeline=train_pipeline,
111 |         classes=class_names,
112 |         test_mode=False,
113 |         ignore_index=len(class_names),
114 |         scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy'),
115 |     val=dict(
116 |         type=dataset_type,
117 |         data_root=data_root,
118 |         ann_file=data_root + 'scannet_infos_val.pkl',
119 |         pipeline=test_pipeline,
120 |         classes=class_names,
121 |         test_mode=True,
122 |         ignore_index=len(class_names)),
123 |     test=dict(
124 |         type=dataset_type,
125 |         data_root=data_root,
126 |         ann_file=data_root + 'scannet_infos_val.pkl',
127 |         pipeline=test_pipeline,
128 |         classes=class_names,
129 |         test_mode=True,
130 |         ignore_index=len(class_names)))
131 | 
132 | evaluation = dict(pipeline=eval_pipeline)
133 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/datasets/sunrgbd-3d-10class.py:
--------------------------------------------------------------------------------
  1 | dataset_type = 'SUNRGBDDataset'
  2 | data_root = 'data/sunrgbd/'
  3 | class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
  4 |                'night_stand', 'bookshelf', 'bathtub')
  5 | train_pipeline = [
  6 |     dict(
  7 |         type='LoadPointsFromFile',
  8 |         coord_type='DEPTH',
  9 |         shift_height=True,
 10 |         load_dim=6,
 11 |         use_dim=[0, 1, 2]),
 12 |     dict(type='LoadAnnotations3D'),
 13 |     dict(
 14 |         type='RandomFlip3D',
 15 |         sync_2d=False,
 16 |         flip_ratio_bev_horizontal=0.5,
 17 |     ),
 18 |     dict(
 19 |         type='GlobalRotScaleTrans',
 20 |         rot_range=[-0.523599, 0.523599],
 21 |         scale_ratio_range=[0.85, 1.15],
 22 |         shift_height=True),
 23 |     dict(type='PointSample', num_points=20000),
 24 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 25 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 26 | ]
 27 | test_pipeline = [
 28 |     dict(
 29 |         type='LoadPointsFromFile',
 30 |         coord_type='DEPTH',
 31 |         shift_height=True,
 32 |         load_dim=6,
 33 |         use_dim=[0, 1, 2]),
 34 |     dict(
 35 |         type='MultiScaleFlipAug3D',
 36 |         img_scale=(1333, 800),
 37 |         pts_scale_ratio=1,
 38 |         flip=False,
 39 |         transforms=[
 40 |             dict(
 41 |                 type='GlobalRotScaleTrans',
 42 |                 rot_range=[0, 0],
 43 |                 scale_ratio_range=[1., 1.],
 44 |                 translation_std=[0, 0, 0]),
 45 |             dict(
 46 |                 type='RandomFlip3D',
 47 |                 sync_2d=False,
 48 |                 flip_ratio_bev_horizontal=0.5,
 49 |             ),
 50 |             dict(type='PointSample', num_points=20000),
 51 |             dict(
 52 |                 type='DefaultFormatBundle3D',
 53 |                 class_names=class_names,
 54 |                 with_label=False),
 55 |             dict(type='Collect3D', keys=['points'])
 56 |         ])
 57 | ]
 58 | # construct a pipeline for data and gt loading in show function
 59 | # please keep its loading function consistent with test_pipeline (e.g. client)
 60 | eval_pipeline = [
 61 |     dict(
 62 |         type='LoadPointsFromFile',
 63 |         coord_type='DEPTH',
 64 |         shift_height=False,
 65 |         load_dim=6,
 66 |         use_dim=[0, 1, 2]),
 67 |     dict(
 68 |         type='DefaultFormatBundle3D',
 69 |         class_names=class_names,
 70 |         with_label=False),
 71 |     dict(type='Collect3D', keys=['points'])
 72 | ]
 73 | 
 74 | data = dict(
 75 |     samples_per_gpu=16,
 76 |     workers_per_gpu=4,
 77 |     train=dict(
 78 |         type='RepeatDataset',
 79 |         times=5,
 80 |         dataset=dict(
 81 |             type=dataset_type,
 82 |             data_root=data_root,
 83 |             ann_file=data_root + 'sunrgbd_infos_train.pkl',
 84 |             pipeline=train_pipeline,
 85 |             classes=class_names,
 86 |             filter_empty_gt=False,
 87 |             # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
 88 |             # and box_type_3d='Depth' in sunrgbd and scannet dataset.
 89 |             box_type_3d='Depth')),
 90 |     val=dict(
 91 |         type=dataset_type,
 92 |         data_root=data_root,
 93 |         ann_file=data_root + 'sunrgbd_infos_val.pkl',
 94 |         pipeline=test_pipeline,
 95 |         classes=class_names,
 96 |         test_mode=True,
 97 |         box_type_3d='Depth'),
 98 |     test=dict(
 99 |         type=dataset_type,
100 |         data_root=data_root,
101 |         ann_file=data_root + 'sunrgbd_infos_val.pkl',
102 |         pipeline=test_pipeline,
103 |         classes=class_names,
104 |         test_mode=True,
105 |         box_type_3d='Depth'))
106 | 
107 | evaluation = dict(pipeline=eval_pipeline)
108 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
 1 | checkpoint_config = dict(interval=1)
 2 | # yapf:disable push
 3 | # By default we use textlogger hook and tensorboard
 4 | # For more loggers see
 5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook
 6 | log_config = dict(
 7 |     interval=50,
 8 |     hooks=[
 9 |         dict(type='TextLoggerHook'),
10 |         dict(type='TensorboardLoggerHook')
11 |     ])
12 | # yapf:enable
13 | dist_params = dict(backend='nccl')
14 | log_level = 'INFO'
15 | work_dir = None
16 | load_from = None
17 | resume_from = None
18 | workflow = [('train', 1)]
19 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/3dssd.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='SSD3DNet',
 3 |     backbone=dict(
 4 |         type='PointNet2SAMSG',
 5 |         in_channels=4,
 6 |         num_points=(4096, 512, (256, 256)),
 7 |         radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),
 8 |         num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),
 9 |         sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),
10 |                      ((64, 64, 128), (64, 64, 128), (64, 96, 128)),
11 |                      ((128, 128, 256), (128, 192, 256), (128, 256, 256))),
12 |         aggregation_channels=(64, 128, 256),
13 |         fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),
14 |         fps_sample_range_lists=((-1), (-1), (512, -1)),
15 |         norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
16 |         sa_cfg=dict(
17 |             type='PointSAModuleMSG',
18 |             pool_mod='max',
19 |             use_xyz=True,
20 |             normalize_xyz=False)),
21 |     bbox_head=dict(
22 |         type='SSD3DHead',
23 |         in_channels=256,
24 |         vote_module_cfg=dict(
25 |             in_channels=256,
26 |             num_points=256,
27 |             gt_per_seed=1,
28 |             conv_channels=(128, ),
29 |             conv_cfg=dict(type='Conv1d'),
30 |             norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
31 |             with_res_feat=False,
32 |             vote_xyz_range=(3.0, 3.0, 2.0)),
33 |         vote_aggregation_cfg=dict(
34 |             type='PointSAModuleMSG',
35 |             num_point=256,
36 |             radii=(4.8, 6.4),
37 |             sample_nums=(16, 32),
38 |             mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)),
39 |             norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
40 |             use_xyz=True,
41 |             normalize_xyz=False,
42 |             bias=True),
43 |         pred_layer_cfg=dict(
44 |             in_channels=1536,
45 |             shared_conv_channels=(512, 128),
46 |             cls_conv_channels=(128, ),
47 |             reg_conv_channels=(128, ),
48 |             conv_cfg=dict(type='Conv1d'),
49 |             norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
50 |             bias=True),
51 |         conv_cfg=dict(type='Conv1d'),
52 |         norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
53 |         objectness_loss=dict(
54 |             type='CrossEntropyLoss',
55 |             use_sigmoid=True,
56 |             reduction='sum',
57 |             loss_weight=1.0),
58 |         center_loss=dict(
59 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
60 |         dir_class_loss=dict(
61 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
62 |         dir_res_loss=dict(
63 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
64 |         size_res_loss=dict(
65 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
66 |         corner_loss=dict(
67 |             type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
68 |         vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)),
69 |     # model training and testing settings
70 |     train_cfg=dict(
71 |         sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05),
72 |     test_cfg=dict(
73 |         nms_cfg=dict(type='nms', iou_thr=0.1),
74 |         sample_mod='spec',
75 |         score_thr=0.0,
76 |         per_class_proposal=True,
77 |         max_output_num=100))
78 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.1, 0.1, 0.2]
 2 | model = dict(
 3 |     type='CenterPoint',
 4 |     pts_voxel_layer=dict(
 5 |         max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)),
 6 |     pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
 7 |     pts_middle_encoder=dict(
 8 |         type='SparseEncoder',
 9 |         in_channels=5,
10 |         sparse_shape=[41, 1024, 1024],
11 |         output_channels=128,
12 |         order=('conv', 'norm', 'act'),
13 |         encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,
14 |                                                                       128)),
15 |         encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
16 |         block_type='basicblock'),
17 |     pts_backbone=dict(
18 |         type='SECOND',
19 |         in_channels=256,
20 |         out_channels=[128, 256],
21 |         layer_nums=[5, 5],
22 |         layer_strides=[1, 2],
23 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
24 |         conv_cfg=dict(type='Conv2d', bias=False)),
25 |     pts_neck=dict(
26 |         type='SECONDFPN',
27 |         in_channels=[128, 256],
28 |         out_channels=[256, 256],
29 |         upsample_strides=[1, 2],
30 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
31 |         upsample_cfg=dict(type='deconv', bias=False),
32 |         use_conv_for_no_stride=True),
33 |     pts_bbox_head=dict(
34 |         type='CenterHead',
35 |         in_channels=sum([256, 256]),
36 |         tasks=[
37 |             dict(num_class=1, class_names=['car']),
38 |             dict(num_class=2, class_names=['truck', 'construction_vehicle']),
39 |             dict(num_class=2, class_names=['bus', 'trailer']),
40 |             dict(num_class=1, class_names=['barrier']),
41 |             dict(num_class=2, class_names=['motorcycle', 'bicycle']),
42 |             dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
43 |         ],
44 |         common_heads=dict(
45 |             reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
46 |         share_conv_channel=64,
47 |         bbox_coder=dict(
48 |             type='CenterPointBBoxCoder',
49 |             post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
50 |             max_num=500,
51 |             score_threshold=0.1,
52 |             out_size_factor=8,
53 |             voxel_size=voxel_size[:2],
54 |             code_size=9),
55 |         separate_head=dict(
56 |             type='SeparateHead', init_bias=-2.19, final_kernel=3),
57 |         loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
58 |         loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
59 |         norm_bbox=True),
60 |     # model training and testing settings
61 |     train_cfg=dict(
62 |         pts=dict(
63 |             grid_size=[1024, 1024, 40],
64 |             voxel_size=voxel_size,
65 |             out_size_factor=8,
66 |             dense_reg=1,
67 |             gaussian_overlap=0.1,
68 |             max_objs=500,
69 |             min_radius=2,
70 |             code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
71 |     test_cfg=dict(
72 |         pts=dict(
73 |             post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
74 |             max_per_img=500,
75 |             max_pool_nms=False,
76 |             min_radius=[4, 12, 10, 1, 0.85, 0.175],
77 |             score_threshold=0.1,
78 |             out_size_factor=8,
79 |             voxel_size=voxel_size[:2],
80 |             nms_type='rotate',
81 |             pre_max_size=1000,
82 |             post_max_size=83,
83 |             nms_thr=0.2)))
84 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.2, 0.2, 8]
 2 | model = dict(
 3 |     type='CenterPoint',
 4 |     pts_voxel_layer=dict(
 5 |         max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)),
 6 |     pts_voxel_encoder=dict(
 7 |         type='PillarFeatureNet',
 8 |         in_channels=5,
 9 |         feat_channels=[64],
10 |         with_distance=False,
11 |         voxel_size=(0.2, 0.2, 8),
12 |         norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
13 |         legacy=False),
14 |     pts_middle_encoder=dict(
15 |         type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),
16 |     pts_backbone=dict(
17 |         type='SECOND',
18 |         in_channels=64,
19 |         out_channels=[64, 128, 256],
20 |         layer_nums=[3, 5, 5],
21 |         layer_strides=[2, 2, 2],
22 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
23 |         conv_cfg=dict(type='Conv2d', bias=False)),
24 |     pts_neck=dict(
25 |         type='SECONDFPN',
26 |         in_channels=[64, 128, 256],
27 |         out_channels=[128, 128, 128],
28 |         upsample_strides=[0.5, 1, 2],
29 |         norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
30 |         upsample_cfg=dict(type='deconv', bias=False),
31 |         use_conv_for_no_stride=True),
32 |     pts_bbox_head=dict(
33 |         type='CenterHead',
34 |         in_channels=sum([128, 128, 128]),
35 |         tasks=[
36 |             dict(num_class=1, class_names=['car']),
37 |             dict(num_class=2, class_names=['truck', 'construction_vehicle']),
38 |             dict(num_class=2, class_names=['bus', 'trailer']),
39 |             dict(num_class=1, class_names=['barrier']),
40 |             dict(num_class=2, class_names=['motorcycle', 'bicycle']),
41 |             dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
42 |         ],
43 |         common_heads=dict(
44 |             reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
45 |         share_conv_channel=64,
46 |         bbox_coder=dict(
47 |             type='CenterPointBBoxCoder',
48 |             post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
49 |             max_num=500,
50 |             score_threshold=0.1,
51 |             out_size_factor=4,
52 |             voxel_size=voxel_size[:2],
53 |             code_size=9),
54 |         separate_head=dict(
55 |             type='SeparateHead', init_bias=-2.19, final_kernel=3),
56 |         loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
57 |         loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
58 |         norm_bbox=True),
59 |     # model training and testing settings
60 |     train_cfg=dict(
61 |         pts=dict(
62 |             grid_size=[512, 512, 1],
63 |             voxel_size=voxel_size,
64 |             out_size_factor=4,
65 |             dense_reg=1,
66 |             gaussian_overlap=0.1,
67 |             max_objs=500,
68 |             min_radius=2,
69 |             code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
70 |     test_cfg=dict(
71 |         pts=dict(
72 |             post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
73 |             max_per_img=500,
74 |             max_pool_nms=False,
75 |             min_radius=[4, 12, 10, 1, 0.85, 0.175],
76 |             score_threshold=0.1,
77 |             pc_range=[-51.2, -51.2],
78 |             out_size_factor=4,
79 |             voxel_size=voxel_size[:2],
80 |             nms_type='rotate',
81 |             pre_max_size=1000,
82 |             post_max_size=83,
83 |             nms_thr=0.2)))
84 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/fcos3d.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='FCOSMono3D',
 3 |     pretrained='open-mmlab://detectron2/resnet101_caffe',
 4 |     backbone=dict(
 5 |         type='ResNet',
 6 |         depth=101,
 7 |         num_stages=4,
 8 |         out_indices=(0, 1, 2, 3),
 9 |         frozen_stages=1,
10 |         norm_cfg=dict(type='BN', requires_grad=False),
11 |         norm_eval=True,
12 |         style='caffe'),
13 |     neck=dict(
14 |         type='FPN',
15 |         in_channels=[256, 512, 1024, 2048],
16 |         out_channels=256,
17 |         start_level=1,
18 |         add_extra_convs='on_output',
19 |         num_outs=5,
20 |         relu_before_extra_convs=True),
21 |     bbox_head=dict(
22 |         type='FCOSMono3DHead',
23 |         num_classes=10,
24 |         in_channels=256,
25 |         stacked_convs=2,
26 |         feat_channels=256,
27 |         use_direction_classifier=True,
28 |         diff_rad_by_sin=True,
29 |         pred_attrs=True,
30 |         pred_velo=True,
31 |         dir_offset=0.7854,  # pi/4
32 |         strides=[8, 16, 32, 64, 128],
33 |         group_reg_dims=(2, 1, 3, 1, 2),  # offset, depth, size, rot, velo
34 |         cls_branch=(256, ),
35 |         reg_branch=(
36 |             (256, ),  # offset
37 |             (256, ),  # depth
38 |             (256, ),  # size
39 |             (256, ),  # rot
40 |             ()  # velo
41 |         ),
42 |         dir_branch=(256, ),
43 |         attr_branch=(256, ),
44 |         loss_cls=dict(
45 |             type='FocalLoss',
46 |             use_sigmoid=True,
47 |             gamma=2.0,
48 |             alpha=0.25,
49 |             loss_weight=1.0),
50 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
51 |         loss_dir=dict(
52 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
53 |         loss_attr=dict(
54 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 |         loss_centerness=dict(
56 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
57 |         norm_on_bbox=True,
58 |         centerness_on_reg=True,
59 |         center_sampling=True,
60 |         conv_bias=True,
61 |         dcn_on_last_conv=True),
62 |     train_cfg=dict(
63 |         allowed_border=0,
64 |         code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
65 |         pos_weight=-1,
66 |         debug=False),
67 |     test_cfg=dict(
68 |         use_rotate_nms=True,
69 |         nms_across_levels=False,
70 |         nms_pre=1000,
71 |         nms_thr=0.8,
72 |         score_thr=0.05,
73 |         min_bbox_size=0,
74 |         max_per_img=200))
75 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/groupfree3d.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='GroupFree3DNet',
 3 |     backbone=dict(
 4 |         type='PointNet2SASSG',
 5 |         in_channels=3,
 6 |         num_points=(2048, 1024, 512, 256),
 7 |         radius=(0.2, 0.4, 0.8, 1.2),
 8 |         num_samples=(64, 32, 16, 16),
 9 |         sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
10 |                      (128, 128, 256)),
11 |         fp_channels=((256, 256), (256, 288)),
12 |         norm_cfg=dict(type='BN2d'),
13 |         sa_cfg=dict(
14 |             type='PointSAModule',
15 |             pool_mod='max',
16 |             use_xyz=True,
17 |             normalize_xyz=True)),
18 |     bbox_head=dict(
19 |         type='GroupFree3DHead',
20 |         in_channels=288,
21 |         num_decoder_layers=6,
22 |         num_proposal=256,
23 |         transformerlayers=dict(
24 |             type='BaseTransformerLayer',
25 |             attn_cfgs=dict(
26 |                 type='GroupFree3DMHA',
27 |                 embed_dims=288,
28 |                 num_heads=8,
29 |                 attn_drop=0.1,
30 |                 dropout_layer=dict(type='Dropout', drop_prob=0.1)),
31 |             ffn_cfgs=dict(
32 |                 embed_dims=288,
33 |                 feedforward_channels=2048,
34 |                 ffn_drop=0.1,
35 |                 act_cfg=dict(type='ReLU', inplace=True)),
36 |             operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',
37 |                              'norm')),
38 |         pred_layer_cfg=dict(
39 |             in_channels=288, shared_conv_channels=(288, 288), bias=True),
40 |         sampling_objectness_loss=dict(
41 |             type='FocalLoss',
42 |             use_sigmoid=True,
43 |             gamma=2.0,
44 |             alpha=0.25,
45 |             loss_weight=8.0),
46 |         objectness_loss=dict(
47 |             type='FocalLoss',
48 |             use_sigmoid=True,
49 |             gamma=2.0,
50 |             alpha=0.25,
51 |             loss_weight=1.0),
52 |         center_loss=dict(
53 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
54 |         dir_class_loss=dict(
55 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
56 |         dir_res_loss=dict(
57 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
58 |         size_class_loss=dict(
59 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
60 |         size_res_loss=dict(
61 |             type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0),
62 |         semantic_loss=dict(
63 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
64 |     # model training and testing settings
65 |     train_cfg=dict(sample_mod='kps'),
66 |     test_cfg=dict(
67 |         sample_mod='kps',
68 |         nms_thr=0.25,
69 |         score_thr=0.0,
70 |         per_class_proposal=True,
71 |         prediction_stages='last'))
72 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py:
--------------------------------------------------------------------------------
 1 | _base_ = './hv_pointpillars_fpn_nus.py'
 2 | 
 3 | # model settings (based on nuScenes model settings)
 4 | # Voxel size for voxel encoder
 5 | # Usually voxel size is changed consistently with the point cloud range
 6 | # If point cloud range is modified, do remember to change all related
 7 | # keys in the config.
 8 | model = dict(
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=20,
11 |         point_cloud_range=[-80, -80, -5, 80, 80, 3],
12 |         max_voxels=(60000, 60000)),
13 |     pts_voxel_encoder=dict(
14 |         feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),
15 |     pts_middle_encoder=dict(output_shape=[640, 640]),
16 |     pts_bbox_head=dict(
17 |         num_classes=9,
18 |         anchor_generator=dict(
19 |             ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),
20 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 |     # model training settings (based on nuScenes model settings)
22 |     train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_fpn_nus.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | # Voxel size for voxel encoder
 3 | # Usually voxel size is changed consistently with the point cloud range
 4 | # If point cloud range is modified, do remember to change all related
 5 | # keys in the config.
 6 | voxel_size = [0.25, 0.25, 8]
 7 | model = dict(
 8 |     type='MVXFasterRCNN',
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=64,
11 |         point_cloud_range=[-50, -50, -5, 50, 50, 3],
12 |         voxel_size=voxel_size,
13 |         max_voxels=(30000, 40000)),
14 |     pts_voxel_encoder=dict(
15 |         type='HardVFE',
16 |         in_channels=4,
17 |         feat_channels=[64, 64],
18 |         with_distance=False,
19 |         voxel_size=voxel_size,
20 |         with_cluster_center=True,
21 |         with_voxel_center=True,
22 |         point_cloud_range=[-50, -50, -5, 50, 50, 3],
23 |         norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
24 |     pts_middle_encoder=dict(
25 |         type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
26 |     pts_backbone=dict(
27 |         type='SECOND',
28 |         in_channels=64,
29 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
30 |         layer_nums=[3, 5, 5],
31 |         layer_strides=[2, 2, 2],
32 |         out_channels=[64, 128, 256]),
33 |     pts_neck=dict(
34 |         type='FPN',
35 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
36 |         act_cfg=dict(type='ReLU'),
37 |         in_channels=[64, 128, 256],
38 |         out_channels=256,
39 |         start_level=0,
40 |         num_outs=3),
41 |     pts_bbox_head=dict(
42 |         type='Anchor3DHead',
43 |         num_classes=10,
44 |         in_channels=256,
45 |         feat_channels=256,
46 |         use_direction_classifier=True,
47 |         anchor_generator=dict(
48 |             type='AlignedAnchor3DRangeGenerator',
49 |             ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
50 |             scales=[1, 2, 4],
51 |             sizes=[
52 |                 [0.8660, 2.5981, 1.],  # 1.5/sqrt(3)
53 |                 [0.5774, 1.7321, 1.],  # 1/sqrt(3)
54 |                 [1., 1., 1.],
55 |                 [0.4, 0.4, 1],
56 |             ],
57 |             custom_values=[0, 0],
58 |             rotations=[0, 1.57],
59 |             reshape_out=True),
60 |         assigner_per_size=False,
61 |         diff_rad_by_sin=True,
62 |         dir_offset=0.7854,  # pi/4
63 |         dir_limit_offset=0,
64 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
65 |         loss_cls=dict(
66 |             type='FocalLoss',
67 |             use_sigmoid=True,
68 |             gamma=2.0,
69 |             alpha=0.25,
70 |             loss_weight=1.0),
71 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
72 |         loss_dir=dict(
73 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
74 |     # model training and testing settings
75 |     train_cfg=dict(
76 |         pts=dict(
77 |             assigner=dict(
78 |                 type='MaxIoUAssigner',
79 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
80 |                 pos_iou_thr=0.6,
81 |                 neg_iou_thr=0.3,
82 |                 min_pos_iou=0.3,
83 |                 ignore_iof_thr=-1),
84 |             allowed_border=0,
85 |             code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
86 |             pos_weight=-1,
87 |             debug=False)),
88 |     test_cfg=dict(
89 |         pts=dict(
90 |             use_rotate_nms=True,
91 |             nms_across_levels=False,
92 |             nms_pre=1000,
93 |             nms_thr=0.2,
94 |             score_thr=0.05,
95 |             min_bbox_size=0,
96 |             max_num=500)))
97 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py:
--------------------------------------------------------------------------------
 1 | _base_ = './hv_pointpillars_fpn_nus.py'
 2 | 
 3 | # model settings (based on nuScenes model settings)
 4 | # Voxel size for voxel encoder
 5 | # Usually voxel size is changed consistently with the point cloud range
 6 | # If point cloud range is modified, do remember to change all related
 7 | # keys in the config.
 8 | model = dict(
 9 |     pts_voxel_layer=dict(
10 |         max_num_points=20,
11 |         point_cloud_range=[-100, -100, -5, 100, 100, 3],
12 |         max_voxels=(60000, 60000)),
13 |     pts_voxel_encoder=dict(
14 |         feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),
15 |     pts_middle_encoder=dict(output_shape=[800, 800]),
16 |     pts_bbox_head=dict(
17 |         num_classes=9,
18 |         anchor_generator=dict(
19 |             ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),
20 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 |     # model training settings (based on nuScenes model settings)
22 |     train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.16, 0.16, 4]
 2 | 
 3 | model = dict(
 4 |     type='VoxelNet',
 5 |     voxel_layer=dict(
 6 |         max_num_points=32,  # max_points_per_voxel
 7 |         point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],
 8 |         voxel_size=voxel_size,
 9 |         max_voxels=(16000, 40000)  # (training, testing) max_voxels
10 |     ),
11 |     voxel_encoder=dict(
12 |         type='PillarFeatureNet',
13 |         in_channels=4,
14 |         feat_channels=[64],
15 |         with_distance=False,
16 |         voxel_size=voxel_size,
17 |         point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),
18 |     middle_encoder=dict(
19 |         type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
20 |     backbone=dict(
21 |         type='SECOND',
22 |         in_channels=64,
23 |         layer_nums=[3, 5, 5],
24 |         layer_strides=[2, 2, 2],
25 |         out_channels=[64, 128, 256]),
26 |     neck=dict(
27 |         type='SECONDFPN',
28 |         in_channels=[64, 128, 256],
29 |         upsample_strides=[1, 2, 4],
30 |         out_channels=[128, 128, 128]),
31 |     bbox_head=dict(
32 |         type='Anchor3DHead',
33 |         num_classes=3,
34 |         in_channels=384,
35 |         feat_channels=384,
36 |         use_direction_classifier=True,
37 |         anchor_generator=dict(
38 |             type='Anchor3DRangeGenerator',
39 |             ranges=[
40 |                 [0, -39.68, -0.6, 70.4, 39.68, -0.6],
41 |                 [0, -39.68, -0.6, 70.4, 39.68, -0.6],
42 |                 [0, -39.68, -1.78, 70.4, 39.68, -1.78],
43 |             ],
44 |             sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
45 |             rotations=[0, 1.57],
46 |             reshape_out=False),
47 |         diff_rad_by_sin=True,
48 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
49 |         loss_cls=dict(
50 |             type='FocalLoss',
51 |             use_sigmoid=True,
52 |             gamma=2.0,
53 |             alpha=0.25,
54 |             loss_weight=1.0),
55 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
56 |         loss_dir=dict(
57 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
58 |     # model training and testing settings
59 |     train_cfg=dict(
60 |         assigner=[
61 |             dict(  # for Pedestrian
62 |                 type='MaxIoUAssigner',
63 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
64 |                 pos_iou_thr=0.5,
65 |                 neg_iou_thr=0.35,
66 |                 min_pos_iou=0.35,
67 |                 ignore_iof_thr=-1),
68 |             dict(  # for Cyclist
69 |                 type='MaxIoUAssigner',
70 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
71 |                 pos_iou_thr=0.5,
72 |                 neg_iou_thr=0.35,
73 |                 min_pos_iou=0.35,
74 |                 ignore_iof_thr=-1),
75 |             dict(  # for Car
76 |                 type='MaxIoUAssigner',
77 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
78 |                 pos_iou_thr=0.6,
79 |                 neg_iou_thr=0.45,
80 |                 min_pos_iou=0.45,
81 |                 ignore_iof_thr=-1),
82 |         ],
83 |         allowed_border=0,
84 |         pos_weight=-1,
85 |         debug=False),
86 |     test_cfg=dict(
87 |         use_rotate_nms=True,
88 |         nms_across_levels=False,
89 |         nms_thr=0.01,
90 |         score_thr=0.1,
91 |         min_bbox_size=0,
92 |         nms_pre=100,
93 |         max_num=50))
94 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | # Voxel size for voxel encoder
  3 | # Usually voxel size is changed consistently with the point cloud range
  4 | # If point cloud range is modified, do remember to change all related
  5 | # keys in the config.
  6 | voxel_size = [0.32, 0.32, 6]
  7 | model = dict(
  8 |     type='MVXFasterRCNN',
  9 |     pts_voxel_layer=dict(
 10 |         max_num_points=20,
 11 |         point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
 12 |         voxel_size=voxel_size,
 13 |         max_voxels=(32000, 32000)),
 14 |     pts_voxel_encoder=dict(
 15 |         type='HardVFE',
 16 |         in_channels=5,
 17 |         feat_channels=[64],
 18 |         with_distance=False,
 19 |         voxel_size=voxel_size,
 20 |         with_cluster_center=True,
 21 |         with_voxel_center=True,
 22 |         point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
 23 |         norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
 24 |     pts_middle_encoder=dict(
 25 |         type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]),
 26 |     pts_backbone=dict(
 27 |         type='SECOND',
 28 |         in_channels=64,
 29 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 30 |         layer_nums=[3, 5, 5],
 31 |         layer_strides=[1, 2, 2],
 32 |         out_channels=[64, 128, 256]),
 33 |     pts_neck=dict(
 34 |         type='SECONDFPN',
 35 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 36 |         in_channels=[64, 128, 256],
 37 |         upsample_strides=[1, 2, 4],
 38 |         out_channels=[128, 128, 128]),
 39 |     pts_bbox_head=dict(
 40 |         type='Anchor3DHead',
 41 |         num_classes=3,
 42 |         in_channels=384,
 43 |         feat_channels=384,
 44 |         use_direction_classifier=True,
 45 |         anchor_generator=dict(
 46 |             type='AlignedAnchor3DRangeGenerator',
 47 |             ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],
 48 |                     [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188],
 49 |                     [-74.88, -74.88, 0, 74.88, 74.88, 0]],
 50 |             sizes=[
 51 |                 [2.08, 4.73, 1.77],  # car
 52 |                 [0.84, 1.81, 1.77],  # cyclist
 53 |                 [0.84, 0.91, 1.74]  # pedestrian
 54 |             ],
 55 |             rotations=[0, 1.57],
 56 |             reshape_out=False),
 57 |         diff_rad_by_sin=True,
 58 |         dir_offset=0.7854,  # pi/4
 59 |         dir_limit_offset=0,
 60 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
 61 |         loss_cls=dict(
 62 |             type='FocalLoss',
 63 |             use_sigmoid=True,
 64 |             gamma=2.0,
 65 |             alpha=0.25,
 66 |             loss_weight=1.0),
 67 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
 68 |         loss_dir=dict(
 69 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
 70 |     # model training and testing settings
 71 |     train_cfg=dict(
 72 |         pts=dict(
 73 |             assigner=[
 74 |                 dict(  # car
 75 |                     type='MaxIoUAssigner',
 76 |                     iou_calculator=dict(type='BboxOverlapsNearest3D'),
 77 |                     pos_iou_thr=0.55,
 78 |                     neg_iou_thr=0.4,
 79 |                     min_pos_iou=0.4,
 80 |                     ignore_iof_thr=-1),
 81 |                 dict(  # cyclist
 82 |                     type='MaxIoUAssigner',
 83 |                     iou_calculator=dict(type='BboxOverlapsNearest3D'),
 84 |                     pos_iou_thr=0.5,
 85 |                     neg_iou_thr=0.3,
 86 |                     min_pos_iou=0.3,
 87 |                     ignore_iof_thr=-1),
 88 |                 dict(  # pedestrian
 89 |                     type='MaxIoUAssigner',
 90 |                     iou_calculator=dict(type='BboxOverlapsNearest3D'),
 91 |                     pos_iou_thr=0.5,
 92 |                     neg_iou_thr=0.3,
 93 |                     min_pos_iou=0.3,
 94 |                     ignore_iof_thr=-1),
 95 |             ],
 96 |             allowed_border=0,
 97 |             code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 98 |             pos_weight=-1,
 99 |             debug=False)),
100 |     test_cfg=dict(
101 |         pts=dict(
102 |             use_rotate_nms=True,
103 |             nms_across_levels=False,
104 |             nms_pre=4096,
105 |             nms_thr=0.25,
106 |             score_thr=0.1,
107 |             min_bbox_size=0,
108 |             max_num=500)))
109 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_second_secfpn_kitti.py:
--------------------------------------------------------------------------------
 1 | voxel_size = [0.05, 0.05, 0.1]
 2 | 
 3 | model = dict(
 4 |     type='VoxelNet',
 5 |     voxel_layer=dict(
 6 |         max_num_points=5,
 7 |         point_cloud_range=[0, -40, -3, 70.4, 40, 1],
 8 |         voxel_size=voxel_size,
 9 |         max_voxels=(16000, 40000)),
10 |     voxel_encoder=dict(type='HardSimpleVFE'),
11 |     middle_encoder=dict(
12 |         type='SparseEncoder',
13 |         in_channels=4,
14 |         sparse_shape=[41, 1600, 1408],
15 |         order=('conv', 'norm', 'act')),
16 |     backbone=dict(
17 |         type='SECOND',
18 |         in_channels=256,
19 |         layer_nums=[5, 5],
20 |         layer_strides=[1, 2],
21 |         out_channels=[128, 256]),
22 |     neck=dict(
23 |         type='SECONDFPN',
24 |         in_channels=[128, 256],
25 |         upsample_strides=[1, 2],
26 |         out_channels=[256, 256]),
27 |     bbox_head=dict(
28 |         type='Anchor3DHead',
29 |         num_classes=3,
30 |         in_channels=512,
31 |         feat_channels=512,
32 |         use_direction_classifier=True,
33 |         anchor_generator=dict(
34 |             type='Anchor3DRangeGenerator',
35 |             ranges=[
36 |                 [0, -40.0, -0.6, 70.4, 40.0, -0.6],
37 |                 [0, -40.0, -0.6, 70.4, 40.0, -0.6],
38 |                 [0, -40.0, -1.78, 70.4, 40.0, -1.78],
39 |             ],
40 |             sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
41 |             rotations=[0, 1.57],
42 |             reshape_out=False),
43 |         diff_rad_by_sin=True,
44 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
45 |         loss_cls=dict(
46 |             type='FocalLoss',
47 |             use_sigmoid=True,
48 |             gamma=2.0,
49 |             alpha=0.25,
50 |             loss_weight=1.0),
51 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
52 |         loss_dir=dict(
53 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
54 |     # model training and testing settings
55 |     train_cfg=dict(
56 |         assigner=[
57 |             dict(  # for Pedestrian
58 |                 type='MaxIoUAssigner',
59 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
60 |                 pos_iou_thr=0.35,
61 |                 neg_iou_thr=0.2,
62 |                 min_pos_iou=0.2,
63 |                 ignore_iof_thr=-1),
64 |             dict(  # for Cyclist
65 |                 type='MaxIoUAssigner',
66 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
67 |                 pos_iou_thr=0.35,
68 |                 neg_iou_thr=0.2,
69 |                 min_pos_iou=0.2,
70 |                 ignore_iof_thr=-1),
71 |             dict(  # for Car
72 |                 type='MaxIoUAssigner',
73 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
74 |                 pos_iou_thr=0.6,
75 |                 neg_iou_thr=0.45,
76 |                 min_pos_iou=0.45,
77 |                 ignore_iof_thr=-1),
78 |         ],
79 |         allowed_border=0,
80 |         pos_weight=-1,
81 |         debug=False),
82 |     test_cfg=dict(
83 |         use_rotate_nms=True,
84 |         nms_across_levels=False,
85 |         nms_thr=0.01,
86 |         score_thr=0.1,
87 |         min_bbox_size=0,
88 |         nms_pre=100,
89 |         max_num=50))
90 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/hv_second_secfpn_waymo.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | # Voxel size for voxel encoder
  3 | # Usually voxel size is changed consistently with the point cloud range
  4 | # If point cloud range is modified, do remember to change all related
  5 | # keys in the config.
  6 | voxel_size = [0.08, 0.08, 0.1]
  7 | model = dict(
  8 |     type='VoxelNet',
  9 |     voxel_layer=dict(
 10 |         max_num_points=10,
 11 |         point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4],
 12 |         voxel_size=voxel_size,
 13 |         max_voxels=(80000, 90000)),
 14 |     voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
 15 |     middle_encoder=dict(
 16 |         type='SparseEncoder',
 17 |         in_channels=5,
 18 |         sparse_shape=[61, 1280, 1920],
 19 |         order=('conv', 'norm', 'act')),
 20 |     backbone=dict(
 21 |         type='SECOND',
 22 |         in_channels=384,
 23 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 24 |         layer_nums=[5, 5],
 25 |         layer_strides=[1, 2],
 26 |         out_channels=[128, 256]),
 27 |     neck=dict(
 28 |         type='SECONDFPN',
 29 |         norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
 30 |         in_channels=[128, 256],
 31 |         upsample_strides=[1, 2],
 32 |         out_channels=[256, 256]),
 33 |     bbox_head=dict(
 34 |         type='Anchor3DHead',
 35 |         num_classes=3,
 36 |         in_channels=512,
 37 |         feat_channels=512,
 38 |         use_direction_classifier=True,
 39 |         anchor_generator=dict(
 40 |             type='AlignedAnchor3DRangeGenerator',
 41 |             ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345],
 42 |                     [-76.8, -51.2, 0, 76.8, 51.2, 0],
 43 |                     [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]],
 44 |             sizes=[
 45 |                 [2.08, 4.73, 1.77],  # car
 46 |                 [0.84, 0.91, 1.74],  # pedestrian
 47 |                 [0.84, 1.81, 1.77]  # cyclist
 48 |             ],
 49 |             rotations=[0, 1.57],
 50 |             reshape_out=False),
 51 |         diff_rad_by_sin=True,
 52 |         dir_offset=0.7854,  # pi/4
 53 |         dir_limit_offset=0,
 54 |         bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
 55 |         loss_cls=dict(
 56 |             type='FocalLoss',
 57 |             use_sigmoid=True,
 58 |             gamma=2.0,
 59 |             alpha=0.25,
 60 |             loss_weight=1.0),
 61 |         loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
 62 |         loss_dir=dict(
 63 |             type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
 64 |     # model training and testing settings
 65 |     train_cfg=dict(
 66 |         assigner=[
 67 |             dict(  # car
 68 |                 type='MaxIoUAssigner',
 69 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
 70 |                 pos_iou_thr=0.55,
 71 |                 neg_iou_thr=0.4,
 72 |                 min_pos_iou=0.4,
 73 |                 ignore_iof_thr=-1),
 74 |             dict(  # pedestrian
 75 |                 type='MaxIoUAssigner',
 76 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
 77 |                 pos_iou_thr=0.5,
 78 |                 neg_iou_thr=0.3,
 79 |                 min_pos_iou=0.3,
 80 |                 ignore_iof_thr=-1),
 81 |             dict(  # cyclist
 82 |                 type='MaxIoUAssigner',
 83 |                 iou_calculator=dict(type='BboxOverlapsNearest3D'),
 84 |                 pos_iou_thr=0.5,
 85 |                 neg_iou_thr=0.3,
 86 |                 min_pos_iou=0.3,
 87 |                 ignore_iof_thr=-1)
 88 |         ],
 89 |         allowed_border=0,
 90 |         code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 91 |         pos_weight=-1,
 92 |         debug=False),
 93 |     test_cfg=dict(
 94 |         use_rotate_nms=True,
 95 |         nms_across_levels=False,
 96 |         nms_pre=4096,
 97 |         nms_thr=0.25,
 98 |         score_thr=0.1,
 99 |         min_bbox_size=0,
100 |         max_num=500))
101 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/imvotenet_image.py:
--------------------------------------------------------------------------------
  1 | model = dict(
  2 |     type='ImVoteNet',
  3 |     img_backbone=dict(
  4 |         type='ResNet',
  5 |         depth=50,
  6 |         num_stages=4,
  7 |         out_indices=(0, 1, 2, 3),
  8 |         frozen_stages=1,
  9 |         norm_cfg=dict(type='BN', requires_grad=False),
 10 |         norm_eval=True,
 11 |         style='caffe'),
 12 |     img_neck=dict(
 13 |         type='FPN',
 14 |         in_channels=[256, 512, 1024, 2048],
 15 |         out_channels=256,
 16 |         num_outs=5),
 17 |     img_rpn_head=dict(
 18 |         type='RPNHead',
 19 |         in_channels=256,
 20 |         feat_channels=256,
 21 |         anchor_generator=dict(
 22 |             type='AnchorGenerator',
 23 |             scales=[8],
 24 |             ratios=[0.5, 1.0, 2.0],
 25 |             strides=[4, 8, 16, 32, 64]),
 26 |         bbox_coder=dict(
 27 |             type='DeltaXYWHBBoxCoder',
 28 |             target_means=[.0, .0, .0, .0],
 29 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 30 |         loss_cls=dict(
 31 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 32 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 33 |     img_roi_head=dict(
 34 |         type='StandardRoIHead',
 35 |         bbox_roi_extractor=dict(
 36 |             type='SingleRoIExtractor',
 37 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 38 |             out_channels=256,
 39 |             featmap_strides=[4, 8, 16, 32]),
 40 |         bbox_head=dict(
 41 |             type='Shared2FCBBoxHead',
 42 |             in_channels=256,
 43 |             fc_out_channels=1024,
 44 |             roi_feat_size=7,
 45 |             num_classes=10,
 46 |             bbox_coder=dict(
 47 |                 type='DeltaXYWHBBoxCoder',
 48 |                 target_means=[0., 0., 0., 0.],
 49 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 50 |             reg_class_agnostic=False,
 51 |             loss_cls=dict(
 52 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 53 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
 54 | 
 55 |     # model training and testing settings
 56 |     train_cfg=dict(
 57 |         img_rpn=dict(
 58 |             assigner=dict(
 59 |                 type='MaxIoUAssigner',
 60 |                 pos_iou_thr=0.7,
 61 |                 neg_iou_thr=0.3,
 62 |                 min_pos_iou=0.3,
 63 |                 match_low_quality=True,
 64 |                 ignore_iof_thr=-1),
 65 |             sampler=dict(
 66 |                 type='RandomSampler',
 67 |                 num=256,
 68 |                 pos_fraction=0.5,
 69 |                 neg_pos_ub=-1,
 70 |                 add_gt_as_proposals=False),
 71 |             allowed_border=-1,
 72 |             pos_weight=-1,
 73 |             debug=False),
 74 |         img_rpn_proposal=dict(
 75 |             nms_across_levels=False,
 76 |             nms_pre=2000,
 77 |             nms_post=1000,
 78 |             max_per_img=1000,
 79 |             nms=dict(type='nms', iou_threshold=0.7),
 80 |             min_bbox_size=0),
 81 |         img_rcnn=dict(
 82 |             assigner=dict(
 83 |                 type='MaxIoUAssigner',
 84 |                 pos_iou_thr=0.5,
 85 |                 neg_iou_thr=0.5,
 86 |                 min_pos_iou=0.5,
 87 |                 match_low_quality=False,
 88 |                 ignore_iof_thr=-1),
 89 |             sampler=dict(
 90 |                 type='RandomSampler',
 91 |                 num=512,
 92 |                 pos_fraction=0.25,
 93 |                 neg_pos_ub=-1,
 94 |                 add_gt_as_proposals=True),
 95 |             pos_weight=-1,
 96 |             debug=False)),
 97 |     test_cfg=dict(
 98 |         img_rpn=dict(
 99 |             nms_across_levels=False,
100 |             nms_pre=1000,
101 |             nms_post=1000,
102 |             max_per_img=1000,
103 |             nms=dict(type='nms', iou_threshold=0.7),
104 |             min_bbox_size=0),
105 |         img_rcnn=dict(
106 |             score_thr=0.05,
107 |             nms=dict(type='nms', iou_threshold=0.5),
108 |             max_per_img=100)))
109 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
  1 | # model settings
  2 | model = dict(
  3 |     type='MaskRCNN',
  4 |     pretrained='torchvision://resnet50',
  5 |     backbone=dict(
  6 |         type='ResNet',
  7 |         depth=50,
  8 |         num_stages=4,
  9 |         out_indices=(0, 1, 2, 3),
 10 |         frozen_stages=1,
 11 |         norm_cfg=dict(type='BN', requires_grad=True),
 12 |         norm_eval=True,
 13 |         style='pytorch'),
 14 |     neck=dict(
 15 |         type='FPN',
 16 |         in_channels=[256, 512, 1024, 2048],
 17 |         out_channels=256,
 18 |         num_outs=5),
 19 |     rpn_head=dict(
 20 |         type='RPNHead',
 21 |         in_channels=256,
 22 |         feat_channels=256,
 23 |         anchor_generator=dict(
 24 |             type='AnchorGenerator',
 25 |             scales=[8],
 26 |             ratios=[0.5, 1.0, 2.0],
 27 |             strides=[4, 8, 16, 32, 64]),
 28 |         bbox_coder=dict(
 29 |             type='DeltaXYWHBBoxCoder',
 30 |             target_means=[.0, .0, .0, .0],
 31 |             target_stds=[1.0, 1.0, 1.0, 1.0]),
 32 |         loss_cls=dict(
 33 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 34 |         loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 35 |     roi_head=dict(
 36 |         type='StandardRoIHead',
 37 |         bbox_roi_extractor=dict(
 38 |             type='SingleRoIExtractor',
 39 |             roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
 40 |             out_channels=256,
 41 |             featmap_strides=[4, 8, 16, 32]),
 42 |         bbox_head=dict(
 43 |             type='Shared2FCBBoxHead',
 44 |             in_channels=256,
 45 |             fc_out_channels=1024,
 46 |             roi_feat_size=7,
 47 |             num_classes=80,
 48 |             bbox_coder=dict(
 49 |                 type='DeltaXYWHBBoxCoder',
 50 |                 target_means=[0., 0., 0., 0.],
 51 |                 target_stds=[0.1, 0.1, 0.2, 0.2]),
 52 |             reg_class_agnostic=False,
 53 |             loss_cls=dict(
 54 |                 type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
 55 |             loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
 56 |         mask_roi_extractor=dict(
 57 |             type='SingleRoIExtractor',
 58 |             roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
 59 |             out_channels=256,
 60 |             featmap_strides=[4, 8, 16, 32]),
 61 |         mask_head=dict(
 62 |             type='FCNMaskHead',
 63 |             num_convs=4,
 64 |             in_channels=256,
 65 |             conv_out_channels=256,
 66 |             num_classes=80,
 67 |             loss_mask=dict(
 68 |                 type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
 69 |     # model training and testing settings
 70 |     train_cfg=dict(
 71 |         rpn=dict(
 72 |             assigner=dict(
 73 |                 type='MaxIoUAssigner',
 74 |                 pos_iou_thr=0.7,
 75 |                 neg_iou_thr=0.3,
 76 |                 min_pos_iou=0.3,
 77 |                 match_low_quality=True,
 78 |                 ignore_iof_thr=-1),
 79 |             sampler=dict(
 80 |                 type='RandomSampler',
 81 |                 num=256,
 82 |                 pos_fraction=0.5,
 83 |                 neg_pos_ub=-1,
 84 |                 add_gt_as_proposals=False),
 85 |             allowed_border=-1,
 86 |             pos_weight=-1,
 87 |             debug=False),
 88 |         rpn_proposal=dict(
 89 |             nms_across_levels=False,
 90 |             nms_pre=2000,
 91 |             nms_post=1000,
 92 |             max_num=1000,
 93 |             nms_thr=0.7,
 94 |             min_bbox_size=0),
 95 |         rcnn=dict(
 96 |             assigner=dict(
 97 |                 type='MaxIoUAssigner',
 98 |                 pos_iou_thr=0.5,
 99 |                 neg_iou_thr=0.5,
100 |                 min_pos_iou=0.5,
101 |                 match_low_quality=True,
102 |                 ignore_iof_thr=-1),
103 |             sampler=dict(
104 |                 type='RandomSampler',
105 |                 num=512,
106 |                 pos_fraction=0.25,
107 |                 neg_pos_ub=-1,
108 |                 add_gt_as_proposals=True),
109 |             mask_size=28,
110 |             pos_weight=-1,
111 |             debug=False)),
112 |     test_cfg=dict(
113 |         rpn=dict(
114 |             nms_across_levels=False,
115 |             nms_pre=1000,
116 |             nms_post=1000,
117 |             max_num=1000,
118 |             nms_thr=0.7,
119 |             min_bbox_size=0),
120 |         rcnn=dict(
121 |             score_thr=0.05,
122 |             nms=dict(type='nms', iou_threshold=0.5),
123 |             max_per_img=100,
124 |             mask_thr_binary=0.5)))
125 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/paconv_cuda_ssg.py:
--------------------------------------------------------------------------------
1 | _base_ = './paconv_ssg.py'
2 | 
3 | model = dict(
4 |     backbone=dict(
5 |         sa_cfg=dict(
6 |             type='PAConvCUDASAModule',
7 |             scorenet_cfg=dict(mlp_channels=[8, 16, 16]))))
8 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/paconv_ssg.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='EncoderDecoder3D',
 4 |     backbone=dict(
 5 |         type='PointNet2SASSG',
 6 |         in_channels=9,  # [xyz, rgb, normalized_xyz]
 7 |         num_points=(1024, 256, 64, 16),
 8 |         radius=(None, None, None, None),  # use kNN instead of ball query
 9 |         num_samples=(32, 32, 32, 32),
10 |         sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 |                                                                     512)),
12 |         fp_channels=(),
13 |         norm_cfg=dict(type='BN2d', momentum=0.1),
14 |         sa_cfg=dict(
15 |             type='PAConvSAModule',
16 |             pool_mod='max',
17 |             use_xyz=True,
18 |             normalize_xyz=False,
19 |             paconv_num_kernels=[16, 16, 16],
20 |             paconv_kernel_input='w_neighbor',
21 |             scorenet_input='w_neighbor_dist',
22 |             scorenet_cfg=dict(
23 |                 mlp_channels=[16, 16, 16],
24 |                 score_norm='softmax',
25 |                 temp_factor=1.0,
26 |                 last_bn=False))),
27 |     decode_head=dict(
28 |         type='PAConvHead',
29 |         # PAConv model's decoder takes skip connections from beckbone
30 |         # different from PointNet++, it also concats input features in the last
31 |         # level of decoder, leading to `128 + 6` as the channel number
32 |         fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
33 |                      (128 + 6, 128, 128, 128)),
34 |         channels=128,
35 |         dropout_ratio=0.5,
36 |         conv_cfg=dict(type='Conv1d'),
37 |         norm_cfg=dict(type='BN1d'),
38 |         act_cfg=dict(type='ReLU'),
39 |         loss_decode=dict(
40 |             type='CrossEntropyLoss',
41 |             use_sigmoid=False,
42 |             class_weight=None,  # should be modified with dataset
43 |             loss_weight=1.0)),
44 |     # correlation loss to regularize PAConv's kernel weights
45 |     loss_regularization=dict(
46 |         type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0),
47 |     # model training and testing settings
48 |     train_cfg=dict(),
49 |     test_cfg=dict(mode='slide'))
50 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/pointnet2_msg.py:
--------------------------------------------------------------------------------
 1 | _base_ = './pointnet2_ssg.py'
 2 | 
 3 | # model settings
 4 | model = dict(
 5 |     backbone=dict(
 6 |         _delete_=True,
 7 |         type='PointNet2SAMSG',
 8 |         in_channels=6,  # [xyz, rgb], should be modified with dataset
 9 |         num_points=(1024, 256, 64, 16),
10 |         radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),
11 |         num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
12 |         sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
13 |                                                                     128)),
14 |                      ((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
15 |                                                           (256, 384, 512))),
16 |         aggregation_channels=(None, None, None, None),
17 |         fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
18 |         fps_sample_range_lists=((-1), (-1), (-1), (-1)),
19 |         dilated_group=(False, False, False, False),
20 |         out_indices=(0, 1, 2, 3),
21 |         sa_cfg=dict(
22 |             type='PointSAModuleMSG',
23 |             pool_mod='max',
24 |             use_xyz=True,
25 |             normalize_xyz=False)),
26 |     decode_head=dict(
27 |         fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),
28 |                      (128, 128, 128, 128))))
29 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/pointnet2_ssg.py:
--------------------------------------------------------------------------------
 1 | # model settings
 2 | model = dict(
 3 |     type='EncoderDecoder3D',
 4 |     backbone=dict(
 5 |         type='PointNet2SASSG',
 6 |         in_channels=6,  # [xyz, rgb], should be modified with dataset
 7 |         num_points=(1024, 256, 64, 16),
 8 |         radius=(0.1, 0.2, 0.4, 0.8),
 9 |         num_samples=(32, 32, 32, 32),
10 |         sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 |                                                                     512)),
12 |         fp_channels=(),
13 |         norm_cfg=dict(type='BN2d'),
14 |         sa_cfg=dict(
15 |             type='PointSAModule',
16 |             pool_mod='max',
17 |             use_xyz=True,
18 |             normalize_xyz=False)),
19 |     decode_head=dict(
20 |         type='PointNet2Head',
21 |         fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
22 |                      (128, 128, 128, 128)),
23 |         channels=128,
24 |         dropout_ratio=0.5,
25 |         conv_cfg=dict(type='Conv1d'),
26 |         norm_cfg=dict(type='BN1d'),
27 |         act_cfg=dict(type='ReLU'),
28 |         loss_decode=dict(
29 |             type='CrossEntropyLoss',
30 |             use_sigmoid=False,
31 |             class_weight=None,  # should be modified with dataset
32 |             loss_weight=1.0)),
33 |     # model training and testing settings
34 |     train_cfg=dict(),
35 |     test_cfg=dict(mode='slide'))
36 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/models/votenet.py:
--------------------------------------------------------------------------------
 1 | model = dict(
 2 |     type='VoteNet',
 3 |     backbone=dict(
 4 |         type='PointNet2SASSG',
 5 |         in_channels=4,
 6 |         num_points=(2048, 1024, 512, 256),
 7 |         radius=(0.2, 0.4, 0.8, 1.2),
 8 |         num_samples=(64, 32, 16, 16),
 9 |         sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
10 |                      (128, 128, 256)),
11 |         fp_channels=((256, 256), (256, 256)),
12 |         norm_cfg=dict(type='BN2d'),
13 |         sa_cfg=dict(
14 |             type='PointSAModule',
15 |             pool_mod='max',
16 |             use_xyz=True,
17 |             normalize_xyz=True)),
18 |     bbox_head=dict(
19 |         type='VoteHead',
20 |         vote_module_cfg=dict(
21 |             in_channels=256,
22 |             vote_per_seed=1,
23 |             gt_per_seed=3,
24 |             conv_channels=(256, 256),
25 |             conv_cfg=dict(type='Conv1d'),
26 |             norm_cfg=dict(type='BN1d'),
27 |             norm_feats=True,
28 |             vote_loss=dict(
29 |                 type='ChamferDistance',
30 |                 mode='l1',
31 |                 reduction='none',
32 |                 loss_dst_weight=10.0)),
33 |         vote_aggregation_cfg=dict(
34 |             type='PointSAModule',
35 |             num_point=256,
36 |             radius=0.3,
37 |             num_sample=16,
38 |             mlp_channels=[256, 128, 128, 128],
39 |             use_xyz=True,
40 |             normalize_xyz=True),
41 |         pred_layer_cfg=dict(
42 |             in_channels=128, shared_conv_channels=(128, 128), bias=True),
43 |         conv_cfg=dict(type='Conv1d'),
44 |         norm_cfg=dict(type='BN1d'),
45 |         objectness_loss=dict(
46 |             type='CrossEntropyLoss',
47 |             class_weight=[0.2, 0.8],
48 |             reduction='sum',
49 |             loss_weight=5.0),
50 |         center_loss=dict(
51 |             type='ChamferDistance',
52 |             mode='l2',
53 |             reduction='sum',
54 |             loss_src_weight=10.0,
55 |             loss_dst_weight=10.0),
56 |         dir_class_loss=dict(
57 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
58 |         dir_res_loss=dict(
59 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
60 |         size_class_loss=dict(
61 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
62 |         size_res_loss=dict(
63 |             type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
64 |         semantic_loss=dict(
65 |             type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
66 |     # model training and testing settings
67 |     train_cfg=dict(
68 |         pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
69 |     test_cfg=dict(
70 |         sample_mod='seed',
71 |         nms_thr=0.25,
72 |         score_thr=0.05,
73 |         per_class_proposal=True))
74 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/cosine.py:
--------------------------------------------------------------------------------
 1 | # This schedule is mainly used by models with dynamic voxelization
 2 | # optimizer
 3 | lr = 0.003  # max learning rate
 4 | optimizer = dict(
 5 |     type='AdamW',
 6 |     lr=lr,
 7 |     betas=(0.95, 0.99),  # the momentum is change during training
 8 |     weight_decay=0.001)
 9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
10 | 
11 | lr_config = dict(
12 |     policy='CosineAnnealing',
13 |     warmup='linear',
14 |     warmup_iters=1000,
15 |     warmup_ratio=1.0 / 10,
16 |     min_lr_ratio=1e-5)
17 | 
18 | momentum_config = None
19 | 
20 | runner = dict(type='EpochBasedRunner', max_epochs=40)
21 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/cyclic_20e.py:
--------------------------------------------------------------------------------
 1 | # For nuScenes dataset, we usually evaluate the model at the end of training.
 2 | # Since the models are trained by 24 epochs by default, we set evaluation
 3 | # interval to be 20. Please change the interval accordingly if you do not
 4 | # use a default schedule.
 5 | # optimizer
 6 | # This schedule is mainly used by models on nuScenes dataset
 7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)
 8 | # max_norm=10 is better for SECOND
 9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
10 | lr_config = dict(
11 |     policy='cyclic',
12 |     target_ratio=(10, 1e-4),
13 |     cyclic_times=1,
14 |     step_ratio_up=0.4,
15 | )
16 | momentum_config = dict(
17 |     policy='cyclic',
18 |     target_ratio=(0.85 / 0.95, 1),
19 |     cyclic_times=1,
20 |     step_ratio_up=0.4,
21 | )
22 | 
23 | # runtime settings
24 | runner = dict(type='EpochBasedRunner', max_epochs=20)
25 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/cyclic_40e.py:
--------------------------------------------------------------------------------
 1 | # The schedule is usually used by models trained on KITTI dataset
 2 | 
 3 | # The learning rate set in the cyclic schedule is the initial learning rate
 4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4),
 5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
 6 | lr = 0.0018
 7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use
 8 | # the offcial AdamW optimizer implemented by PyTorch.
 9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch
12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69  # noqa
13 | # We implement them in mmcv, for more details, please refer to
14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327  # noqa
15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130  # noqa
16 | lr_config = dict(
17 |     policy='cyclic',
18 |     target_ratio=(10, 1e-4),
19 |     cyclic_times=1,
20 |     step_ratio_up=0.4,
21 | )
22 | momentum_config = dict(
23 |     policy='cyclic',
24 |     target_ratio=(0.85 / 0.95, 1),
25 |     cyclic_times=1,
26 |     step_ratio_up=0.4,
27 | )
28 | # Although the max_epochs is 40, this schedule is usually used we
29 | # RepeatDataset with repeat ratio N, thus the actual max epoch
30 | # number could be Nx40
31 | runner = dict(type='EpochBasedRunner', max_epochs=40)
32 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/mmdet_schedule_1x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
 3 | optimizer_config = dict(grad_clip=None)
 4 | # learning policy
 5 | lr_config = dict(
 6 |     policy='step',
 7 |     warmup='linear',
 8 |     warmup_iters=500,
 9 |     warmup_ratio=0.001,
10 |     step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used by models on nuScenes dataset
 3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
 4 | # max_norm=10 is better for SECOND
 5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
 6 | lr_config = dict(
 7 |     policy='step',
 8 |     warmup='linear',
 9 |     warmup_iters=1000,
10 |     warmup_ratio=1.0 / 1000,
11 |     step=[20, 23])
12 | momentum_config = None
13 | # runtime settings
14 | runner = dict(type='EpochBasedRunner', max_epochs=24)
15 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/schedule_3x.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used by models on indoor dataset,
 3 | # e.g., VoteNet on SUNRGBD and ScanNet
 4 | lr = 0.008  # max learning rate
 5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
 6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
 7 | lr_config = dict(policy='step', warmup=None, step=[24, 32])
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=36)
10 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/seg_cosine_150e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on S3DIS dataset in segmentation task
 3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=150)
10 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/seg_cosine_200e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on ScanNet dataset in segmentation task
 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=200)
10 | 


--------------------------------------------------------------------------------
/projects/configs/_base_/schedules/seg_cosine_50e.py:
--------------------------------------------------------------------------------
 1 | # optimizer
 2 | # This schedule is mainly used on S3DIS dataset in segmentation task
 3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)
 4 | optimizer_config = dict(grad_clip=None)
 5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
 6 | momentum_config = None
 7 | 
 8 | # runtime settings
 9 | runner = dict(type='EpochBasedRunner', max_epochs=50)
10 | 


--------------------------------------------------------------------------------
/projects/configs/datasets/custom_lyft-3d.py:
--------------------------------------------------------------------------------
  1 | # If point cloud range is changed, the models should also change their point
  2 | # cloud range accordingly
  3 | point_cloud_range = [-80, -80, -5, 80, 80, 3]
  4 | # For Lyft we usually do 9-class detection
  5 | class_names = [
  6 |     'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
  7 |     'bicycle', 'pedestrian', 'animal'
  8 | ]
  9 | dataset_type = 'CustomLyftDataset'
 10 | data_root = 'data/lyft/'
 11 | # Input modality for Lyft dataset, this is consistent with the submission
 12 | # format which requires the information in input_modality.
 13 | input_modality = dict(
 14 |     use_lidar=True,
 15 |     use_camera=False,
 16 |     use_radar=False,
 17 |     use_map=False,
 18 |     use_external=True)
 19 | file_client_args = dict(backend='disk')
 20 | # Uncomment the following if use ceph or other file clients.
 21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
 22 | # for more details.
 23 | # file_client_args = dict(
 24 | #     backend='petrel',
 25 | #     path_mapping=dict({
 26 | #         './data/lyft/': 's3://lyft/lyft/',
 27 | #         'data/lyft/': 's3://lyft/lyft/'
 28 | #    }))
 29 | train_pipeline = [
 30 |     dict(
 31 |         type='LoadPointsFromFile',
 32 |         coord_type='LIDAR',
 33 |         load_dim=5,
 34 |         use_dim=5,
 35 |         file_client_args=file_client_args),
 36 |     dict(
 37 |         type='LoadPointsFromMultiSweeps',
 38 |         sweeps_num=10,
 39 |         file_client_args=file_client_args),
 40 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
 41 |     dict(
 42 |         type='GlobalRotScaleTrans',
 43 |         rot_range=[-0.3925, 0.3925],
 44 |         scale_ratio_range=[0.95, 1.05],
 45 |         translation_std=[0, 0, 0]),
 46 |     dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
 47 |     dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 48 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
 49 |     dict(type='PointShuffle'),
 50 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 51 |     dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
 52 | ]
 53 | test_pipeline = [
 54 |     dict(
 55 |         type='LoadPointsFromFile',
 56 |         coord_type='LIDAR',
 57 |         load_dim=5,
 58 |         use_dim=5,
 59 |         file_client_args=file_client_args),
 60 |     dict(
 61 |         type='LoadPointsFromMultiSweeps',
 62 |         sweeps_num=10,
 63 |         file_client_args=file_client_args),
 64 |     dict(
 65 |         type='MultiScaleFlipAug3D',
 66 |         img_scale=(1333, 800),
 67 |         pts_scale_ratio=1,
 68 |         flip=False,
 69 |         transforms=[
 70 |             dict(
 71 |                 type='GlobalRotScaleTrans',
 72 |                 rot_range=[0, 0],
 73 |                 scale_ratio_range=[1., 1.],
 74 |                 translation_std=[0, 0, 0]),
 75 |             dict(type='RandomFlip3D'),
 76 |             dict(
 77 |                 type='PointsRangeFilter', point_cloud_range=point_cloud_range),
 78 |             dict(
 79 |                 type='DefaultFormatBundle3D',
 80 |                 class_names=class_names,
 81 |                 with_label=False),
 82 |             dict(type='Collect3D', keys=['points'])
 83 |         ])
 84 | ]
 85 | # construct a pipeline for data and gt loading in show function
 86 | # please keep its loading function consistent with test_pipeline (e.g. client)
 87 | eval_pipeline = [
 88 |     dict(
 89 |         type='LoadPointsFromFile',
 90 |         coord_type='LIDAR',
 91 |         load_dim=5,
 92 |         use_dim=5,
 93 |         file_client_args=file_client_args),
 94 |     dict(
 95 |         type='LoadPointsFromMultiSweeps',
 96 |         sweeps_num=10,
 97 |         file_client_args=file_client_args),
 98 |     dict(
 99 |         type='DefaultFormatBundle3D',
100 |         class_names=class_names,
101 |         with_label=False),
102 |     dict(type='Collect3D', keys=['points'])
103 | ]
104 | 
105 | data = dict(
106 |     samples_per_gpu=2,
107 |     workers_per_gpu=2,
108 |     train=dict(
109 |         type=dataset_type,
110 |         data_root=data_root,
111 |         ann_file=data_root + 'lyft_infos_train.pkl',
112 |         pipeline=train_pipeline,
113 |         classes=class_names,
114 |         modality=input_modality,
115 |         test_mode=False),
116 |     val=dict(
117 |         type=dataset_type,
118 |         data_root=data_root,
119 |         ann_file=data_root + 'lyft_infos_val.pkl',
120 |         pipeline=test_pipeline,
121 |         classes=class_names,
122 |         modality=input_modality,
123 |         test_mode=True),
124 |     test=dict(
125 |         type=dataset_type,
126 |         data_root=data_root,
127 |         ann_file=data_root + 'lyft_infos_val.pkl',
128 |         pipeline=test_pipeline,
129 |         classes=class_names,
130 |         modality=input_modality,
131 |         test_mode=True))
132 | # For Lyft dataset, we usually evaluate the model at the end of training.
133 | # Since the models are trained by 24 epochs by default, we set evaluation
134 | # interval to be 24. Please change the interval accordingly if you do not
135 | # use a default schedule.
136 | evaluation = dict(interval=24, pipeline=eval_pipeline)


--------------------------------------------------------------------------------
/projects/configs/datasets/custom_waymo-3d.py:
--------------------------------------------------------------------------------
  1 | # dataset settings
  2 | # D5 in the config name means the whole dataset is divided into 5 folds
  3 | # We only use one fold for efficient experiments
  4 | dataset_type = 'CustomWaymoDataset'
  5 | data_root = 'data/waymo/kitti_format/'
  6 | file_client_args = dict(backend='disk')
  7 | # Uncomment the following if use ceph or other file clients.
  8 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
  9 | # for more details.
 10 | # file_client_args = dict(
 11 | #     backend='petrel', path_mapping=dict(data='s3://waymo_data/'))
 12 | 
 13 | img_norm_cfg = dict(
 14 |     mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
 15 | class_names = ['Car', 'Pedestrian', 'Cyclist']
 16 | point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4]
 17 | input_modality = dict(use_lidar=False, use_camera=True)
 18 | db_sampler = dict(
 19 |     data_root=data_root,
 20 |     info_path=data_root + 'waymo_dbinfos_train.pkl',
 21 |     rate=1.0,
 22 |     prepare=dict(
 23 |         filter_by_difficulty=[-1],
 24 |         filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
 25 |     classes=class_names,
 26 |     sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),
 27 |     points_loader=dict(
 28 |         type='LoadPointsFromFile',
 29 |         coord_type='LIDAR',
 30 |         load_dim=5,
 31 |         use_dim=[0, 1, 2, 3, 4],
 32 |         file_client_args=file_client_args))
 33 | 
 34 | 
 35 | 
 36 | train_pipeline = [
 37 |     dict(type='LoadMultiViewImageFromFiles', to_float32=True),
 38 |     dict(type='PhotoMetricDistortionMultiViewImage'),
 39 |     dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, with_attr_label=False),
 40 |     dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
 41 |     dict(type='ObjectNameFilter', classes=class_names),
 42 |     dict(type='NormalizeMultiviewImage', **img_norm_cfg),
 43 |     dict(type='PadMultiViewImage', size_divisor=32),
 44 |     dict(type='DefaultFormatBundle3D', class_names=class_names),
 45 |     dict(type='CustomCollect3D', keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'])
 46 | ]
 47 | 
 48 | 
 49 | test_pipeline = [
 50 |     dict(type='LoadMultiViewImageFromFiles', to_float32=True),
 51 |     dict(type='NormalizeMultiviewImage', **img_norm_cfg),
 52 |     dict(type='PadMultiViewImage', size_divisor=32),
 53 |     dict(
 54 |         type='MultiScaleFlipAug3D',
 55 |         img_scale=(1920, 1280),
 56 |         pts_scale_ratio=1,
 57 |         flip=False,
 58 |         transforms=[
 59 |             dict(
 60 |                 type='DefaultFormatBundle3D',
 61 |                 class_names=class_names,
 62 |                 with_label=False),
 63 |             dict(type='CustomCollect3D', keys=['img'])
 64 |         ])
 65 | ]
 66 | 
 67 | 
 68 | # construct a pipeline for data and gt loading in show function
 69 | # please keep its loading function consistent with test_pipeline (e.g. client)
 70 | 
 71 | data = dict(
 72 |     samples_per_gpu=2,
 73 |     workers_per_gpu=4,
 74 |     train=dict(
 75 |         type='RepeatDataset',
 76 |         times=2,
 77 |         dataset=dict(
 78 |             type=dataset_type,
 79 |             data_root=data_root,
 80 |             ann_file=data_root + 'waymo_infos_train.pkl',
 81 |             split='training',
 82 |             pipeline=train_pipeline,
 83 |             modality=input_modality,
 84 |             classes=class_names,
 85 |             test_mode=False,
 86 |             # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
 87 |             # and box_type_3d='Depth' in sunrgbd and scannet dataset.
 88 |             box_type_3d='LiDAR',
 89 |             # load one frame every five frames
 90 |             load_interval=5)),
 91 |     val=dict(
 92 |         type=dataset_type,
 93 |         data_root=data_root,
 94 |         ann_file=data_root + 'waymo_infos_val.pkl',
 95 |         split='training',
 96 |         pipeline=test_pipeline,
 97 |         modality=input_modality,
 98 |         classes=class_names,
 99 |         test_mode=True,
100 |         box_type_3d='LiDAR'),
101 |     test=dict(
102 |         type=dataset_type,
103 |         data_root=data_root,
104 |         ann_file=data_root + 'waymo_infos_val.pkl',
105 |         split='training',
106 |         pipeline=test_pipeline,
107 |         modality=input_modality,
108 |         classes=class_names,
109 |         test_mode=True,
110 |         box_type_3d='LiDAR'))
111 | 
112 | evaluation = dict(interval=24, pipeline=test_pipeline)


--------------------------------------------------------------------------------
/projects/configs/label_mapping/nuscenes.yaml:
--------------------------------------------------------------------------------
 1 | labels:
 2 |   0: 'noise'
 3 |   1: 'animal'
 4 |   2: 'human.pedestrian.adult'
 5 |   3: 'human.pedestrian.child'
 6 |   4: 'human.pedestrian.construction_worker'
 7 |   5: 'human.pedestrian.personal_mobility'
 8 |   6: 'human.pedestrian.police_officer'
 9 |   7: 'human.pedestrian.stroller'
10 |   8: 'human.pedestrian.wheelchair'
11 |   9: 'movable_object.barrier'
12 |   10: 'movable_object.debris'
13 |   11: 'movable_object.pushable_pullable'
14 |   12: 'movable_object.trafficcone'
15 |   13: 'static_object.bicycle_rack'
16 |   14: 'vehicle.bicycle'
17 |   15: 'vehicle.bus.bendy'
18 |   16: 'vehicle.bus.rigid'
19 |   17: 'vehicle.car'
20 |   18: 'vehicle.construction'
21 |   19: 'vehicle.emergency.ambulance'
22 |   20: 'vehicle.emergency.police'
23 |   21: 'vehicle.motorcycle'
24 |   22: 'vehicle.trailer'
25 |   23: 'vehicle.truck'
26 |   24: 'flat.driveable_surface'
27 |   25: 'flat.other'
28 |   26: 'flat.sidewalk'
29 |   27: 'flat.terrain'
30 |   28: 'static.manmade'
31 |   29: 'static.other'
32 |   30: 'static.vegetation'
33 |   31: 'vehicle.ego'
34 | labels_16:
35 |   0: 'noise'
36 |   1: 'barrier'
37 |   2: 'bicycle'
38 |   3: 'bus'
39 |   4: 'car'
40 |   5: 'construction_vehicle'
41 |   6: 'motorcycle'
42 |   7: 'pedestrian'
43 |   8: 'traffic_cone'
44 |   9: 'trailer'
45 |   10: 'truck'
46 |   11: 'driveable_surface'
47 |   12: 'other_flat'
48 |   13: 'sidewalk'
49 |   14: 'terrain'
50 |   15: 'manmade'
51 |   16: 'vegetation'
52 | learning_map:
53 |   1: 0
54 |   5: 0
55 |   7: 0
56 |   8: 0
57 |   10: 0
58 |   11: 0
59 |   13: 0
60 |   19: 0
61 |   20: 0
62 |   0: 0
63 |   29: 0
64 |   31: 0
65 |   9: 1
66 |   14: 2
67 |   15: 3
68 |   16: 3
69 |   17: 4
70 |   18: 5
71 |   21: 6
72 |   2: 7
73 |   3: 7
74 |   4: 7
75 |   6: 7
76 |   12: 8
77 |   22: 9
78 |   23: 10
79 |   24: 11
80 |   25: 12
81 |   26: 13
82 |   27: 14
83 |   28: 15
84 |   30: 16


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/__init__.py:
--------------------------------------------------------------------------------
 1 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D
 2 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder
 3 | from .core.bbox.match_costs import BBox3DL1Cost
 4 | from .core.evaluation.eval_hooks import CustomDistEvalHook
 5 | from .datasets.pipelines import (
 6 |   PhotoMetricDistortionMultiViewImage, PadMultiViewImage, 
 7 |   NormalizeMultiviewImage,  CustomCollect3D)
 8 | from .models.backbones.vovnet import VoVNet
 9 | from .models.utils import *
10 | from .models.opt.adamw import AdamW2
11 | from .models.losses import Lovasz3DLoss
12 | from .bevformer import *
13 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .dense_heads import *
3 | from .detectors import *
4 | from .modules import *
5 | from .runner import *
6 | from .hooks import *
7 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .train import custom_train_model
2 | from .mmdet_train import custom_train_detector
3 | # from .test import custom_multi_gpu_test
4 | from .test_dense import custom_multi_gpu_test


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/apis/train.py:
--------------------------------------------------------------------------------
 1 | # ---------------------------------------------
 2 | # Copyright (c) OpenMMLab. All rights reserved.
 3 | # ---------------------------------------------
 4 | #  Modified by Zhiqi Li
 5 | # ---------------------------------------------
 6 | 
 7 | from .mmdet_train import custom_train_detector
 8 | from mmseg.apis import train_segmentor
 9 | from mmdet.apis import train_detector
10 | 
11 | def custom_train_model(model,
12 |                 dataset,
13 |                 cfg,
14 |                 distributed=False,
15 |                 validate=False,
16 |                 timestamp=None,
17 |                 eval_model=None,
18 |                 meta=None):
19 |     """A function wrapper for launching model training according to cfg.
20 | 
21 |     Because we need different eval_hook in runner. Should be deprecated in the
22 |     future.
23 |     """
24 |     if cfg.model.type in ['EncoderDecoder3D']:
25 |         assert False
26 |     else:
27 |         custom_train_detector(
28 |             model,
29 |             dataset,
30 |             cfg,
31 |             distributed=distributed,
32 |             validate=validate,
33 |             timestamp=timestamp,
34 |             eval_model=eval_model,
35 |             meta=meta)
36 | 
37 | 
38 | def train_model(model,
39 |                 dataset,
40 |                 cfg,
41 |                 distributed=False,
42 |                 validate=False,
43 |                 timestamp=None,
44 |                 meta=None):
45 |     """A function wrapper for launching model training according to cfg.
46 | 
47 |     Because we need different eval_hook in runner. Should be deprecated in the
48 |     future.
49 |     """
50 |     if cfg.model.type in ['EncoderDecoder3D']:
51 |         train_segmentor(
52 |             model,
53 |             dataset,
54 |             cfg,
55 |             distributed=distributed,
56 |             validate=validate,
57 |             timestamp=timestamp,
58 |             meta=meta)
59 |     else:
60 |         train_detector(
61 |             model,
62 |             dataset,
63 |             cfg,
64 |             distributed=distributed,
65 |             validate=validate,
66 |             timestamp=timestamp,
67 |             meta=meta)
68 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .occ_head import *
2 | from .depth_head import *
3 | from .seg_head import *


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .octree_occ import OctreeOcc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | from .custom_hooks import TransferWeight


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/hooks/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/bevformer/hooks/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/hooks/__pycache__/custom_hooks.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/bevformer/hooks/__pycache__/custom_hooks.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner.hooks.hook import HOOKS, Hook
 2 | from projects.mmdet3d_plugin.models.utils import run_time
 3 | 
 4 | 
 5 | @HOOKS.register_module()
 6 | class TransferWeight(Hook):
 7 |     
 8 |     def __init__(self, every_n_inters=1):
 9 |         self.every_n_inters=every_n_inters
10 | 
11 |     def after_train_iter(self, runner):
12 |         if self.every_n_inner_iters(runner, self.every_n_inters):
13 |             runner.eval_model.load_state_dict(runner.model.state_dict())
14 | 
15 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .spatial_cross_attention import SpatialCrossAttention, MSDeformableAttention3D
2 | from .occ_encoder import OctreeOccupancyEncoder, OctreeOccupancyLayer
3 | from .decoder import DetectionTransformerDecoder
4 | from .occ_spatial_attention import OccSpatialAttention
5 | from .occ_mlp_decoder import MLP_Decoder, OctreeDecoder
6 | from .octree_transformer import OctreeOccTransformer
7 | from .deformable_self_attention_3D_custom import OctreeSelfAttention3D, DeformSelfAttention3DCustom


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/runner/__init__.py:
--------------------------------------------------------------------------------
1 | from .epoch_based_runner import EpochBasedRunner_video


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/runner/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/bevformer/runner/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/runner/__pycache__/epoch_based_runner.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/bevformer/runner/__pycache__/epoch_based_runner.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | # ---------------------------------------------
 3 | #  Modified by Zhiqi Li
 4 | # ---------------------------------------------
 5 | 
 6 | import os.path as osp
 7 | import torch
 8 | import mmcv
 9 | from mmcv.runner.base_runner import BaseRunner
10 | from mmcv.runner.epoch_based_runner import EpochBasedRunner
11 | from mmcv.runner.builder import RUNNERS
12 | from mmcv.runner.checkpoint import save_checkpoint
13 | from mmcv.runner.utils import get_host_info
14 | from pprint import pprint
15 | from mmcv.parallel.data_container import DataContainer
16 | 
17 | 
18 | @RUNNERS.register_module()
19 | class EpochBasedRunner_video(EpochBasedRunner):
20 |     
21 |     ''' 
22 |     # basic logic
23 |     
24 |     input_sequence = [a, b, c] # given a sequence of samples
25 |     
26 |     prev_bev = None
27 |     for each in input_sequcene[:-1]
28 |         prev_bev = eval_model(each, prev_bev)) # inference only.
29 |     
30 |     model(input_sequcene[-1], prev_bev) # train the last sample.
31 |     '''
32 |     
33 |     def __init__(self,
34 |                  model,
35 |                  eval_model=None,
36 |                  batch_processor=None,
37 |                  optimizer=None,
38 |                  work_dir=None,
39 |                  logger=None,
40 |                  meta=None,
41 |                  keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'],
42 |                  max_iters=None,
43 |                  max_epochs=None):
44 |         super().__init__(model,
45 |                  batch_processor,
46 |                  optimizer,
47 |                  work_dir,
48 |                  logger,
49 |                  meta,
50 |                  max_iters,
51 |                  max_epochs)
52 |         keys.append('img_metas')
53 |         self.keys = keys
54 |         self.eval_model = eval_model
55 |         self.eval_model.eval()
56 |     
57 |     def run_iter(self, data_batch, train_mode, **kwargs):
58 |         if self.batch_processor is not None:
59 |             assert False
60 |             # outputs = self.batch_processor(
61 |             #     self.model, data_batch, train_mode=train_mode, **kwargs)
62 |         elif train_mode:
63 | 
64 |             num_samples = data_batch['img'].data[0].size(1)
65 |             data_list = []
66 |             prev_bev = None
67 |             for i in range(num_samples):
68 |                 data = {}
69 |                 for key in self.keys:
70 |                     if key not in ['img_metas', 'img', 'points']:
71 |                         data[key] = data_batch[key]
72 |                     else:
73 |                         if key == 'img':
74 |                             data['img'] = DataContainer(data=[data_batch['img'].data[0][:, i]], cpu_only=data_batch['img'].cpu_only, stack=True)
75 |                         elif key == 'img_metas':
76 |                             data['img_metas'] = DataContainer(data=[[each[i] for each in data_batch['img_metas'].data[0]]], cpu_only=data_batch['img_metas'].cpu_only)
77 |                         else:
78 |                             assert False
79 |                 data_list.append(data)
80 |             with torch.no_grad():
81 |                 for i in range(num_samples-1):
82 |                     if data_list[i]['img_metas'].data[0][0]['prev_bev_exists']:
83 |                         data_list[i]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False)
84 |                     prev_bev = self.eval_model.val_step(data_list[i], self.optimizer, **kwargs)
85 |             if data_list[-1]['img_metas'].data[0][0]['prev_bev_exists']:
86 |                 data_list[-1]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False)
87 |             outputs = self.model.train_step(data_list[-1], self.optimizer, **kwargs)
88 |         else:
89 |             assert False
90 |             # outputs = self.model.val_step(data_batch, self.optimizer, **kwargs)
91 | 
92 |         if not isinstance(outputs, dict):
93 |             raise TypeError('"batch_processor()" or "model.train_step()"'
94 |                             'and "model.val_step()" must return a dict')
95 |         if 'log_vars' in outputs:
96 |             self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])
97 |         self.outputs = outputs


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .hungarian_assigner_3d import HungarianAssigner3D
2 | 
3 | __all__ = ['HungarianAssigner3D']
4 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_free_coder import NMSFreeCoder
2 | 
3 | __all__ = ['NMSFreeCoder']
4 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/coders/nms_free_coder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from mmdet.core.bbox import BaseBBoxCoder
  4 | from mmdet.core.bbox.builder import BBOX_CODERS
  5 | from projects.mmdet3d_plugin.core.bbox.util import denormalize_bbox
  6 | import numpy as np
  7 | 
  8 | 
  9 | @BBOX_CODERS.register_module()
 10 | class NMSFreeCoder(BaseBBoxCoder):
 11 |     """Bbox coder for NMS-free detector.
 12 |     Args:
 13 |         pc_range (list[float]): Range of point cloud.
 14 |         post_center_range (list[float]): Limit of the center.
 15 |             Default: None.
 16 |         max_num (int): Max number to be kept. Default: 100.
 17 |         score_threshold (float): Threshold to filter boxes based on score.
 18 |             Default: None.
 19 |         code_size (int): Code size of bboxes. Default: 9
 20 |     """
 21 | 
 22 |     def __init__(self,
 23 |                  pc_range,
 24 |                  voxel_size=None,
 25 |                  post_center_range=None,
 26 |                  max_num=100,
 27 |                  score_threshold=None,
 28 |                  num_classes=10):
 29 |         self.pc_range = pc_range
 30 |         self.voxel_size = voxel_size
 31 |         self.post_center_range = post_center_range
 32 |         self.max_num = max_num
 33 |         self.score_threshold = score_threshold
 34 |         self.num_classes = num_classes
 35 | 
 36 |     def encode(self):
 37 | 
 38 |         pass
 39 | 
 40 |     def decode_single(self, cls_scores, bbox_preds):
 41 |         """Decode bboxes.
 42 |         Args:
 43 |             cls_scores (Tensor): Outputs from the classification head, \
 44 |                 shape [num_query, cls_out_channels]. Note \
 45 |                 cls_out_channels should includes background.
 46 |             bbox_preds (Tensor): Outputs from the regression \
 47 |                 head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
 48 |                 Shape [num_query, 9].
 49 |         Returns:
 50 |             list[dict]: Decoded boxes.
 51 |         """
 52 |         max_num = self.max_num
 53 | 
 54 |         cls_scores = cls_scores.sigmoid()
 55 |         scores, indexs = cls_scores.view(-1).topk(max_num)
 56 |         labels = indexs % self.num_classes
 57 |         bbox_index = indexs // self.num_classes
 58 |         bbox_preds = bbox_preds[bbox_index]
 59 |        
 60 |         final_box_preds = denormalize_bbox(bbox_preds, self.pc_range)   
 61 |         final_scores = scores 
 62 |         final_preds = labels 
 63 | 
 64 |         # use score threshold
 65 |         if self.score_threshold is not None:
 66 |             thresh_mask = final_scores > self.score_threshold
 67 |             tmp_score = self.score_threshold
 68 |             while thresh_mask.sum() == 0:
 69 |                 tmp_score *= 0.9
 70 |                 if tmp_score < 0.01:
 71 |                     thresh_mask = final_scores > -1
 72 |                     break
 73 |                 thresh_mask = final_scores >= tmp_score
 74 | 
 75 |         if self.post_center_range is not None:
 76 |             self.post_center_range = torch.tensor(
 77 |                 self.post_center_range, device=scores.device)
 78 |             mask = (final_box_preds[..., :3] >=
 79 |                     self.post_center_range[:3]).all(1)
 80 |             mask &= (final_box_preds[..., :3] <=
 81 |                      self.post_center_range[3:]).all(1)
 82 | 
 83 |             if self.score_threshold:
 84 |                 mask &= thresh_mask
 85 | 
 86 |             boxes3d = final_box_preds[mask]
 87 |             scores = final_scores[mask]
 88 | 
 89 |             labels = final_preds[mask]
 90 |             predictions_dict = {
 91 |                 'bboxes': boxes3d,
 92 |                 'scores': scores,
 93 |                 'labels': labels
 94 |             }
 95 | 
 96 |         else:
 97 |             raise NotImplementedError(
 98 |                 'Need to reorganize output as a batch, only '
 99 |                 'support post_center_range is not None for now!')
100 |         return predictions_dict
101 | 
102 |     def decode(self, preds_dicts):
103 |         """Decode bboxes.
104 |         Args:
105 |             all_cls_scores (Tensor): Outputs from the classification head, \
106 |                 shape [nb_dec, bs, num_query, cls_out_channels]. Note \
107 |                 cls_out_channels should includes background.
108 |             all_bbox_preds (Tensor): Sigmoid outputs from the regression \
109 |                 head with normalized coordinate format (cx, cy, w, l, cz, h, rot_sine, rot_cosine, vx, vy). \
110 |                 Shape [nb_dec, bs, num_query, 9].
111 |         Returns:
112 |             list[dict]: Decoded boxes.
113 |         """
114 |         all_cls_scores = preds_dicts['all_cls_scores'][-1]
115 |         all_bbox_preds = preds_dicts['all_bbox_preds'][-1]
116 |         
117 |         batch_size = all_cls_scores.size()[0]
118 |         predictions_list = []
119 |         for i in range(batch_size):
120 |             predictions_list.append(self.decode_single(all_cls_scores[i], all_bbox_preds[i]))
121 |         return predictions_list
122 | 
123 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.core.bbox.match_costs import build_match_cost
2 | from .match_cost import BBox3DL1Cost, SmoothL1Cost
3 | 
4 | __all__ = ['build_match_cost', 'BBox3DL1Cost','SmoothL1Cost']


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST
 3 | import mmcv
 4 | 
 5 | 
 6 | @MATCH_COST.register_module()
 7 | class BBox3DL1Cost(object):
 8 |     """BBox3DL1Cost.
 9 |      Args:
10 |          weight (int | float, optional): loss_weight
11 |     """
12 | 
13 |     def __init__(self, weight=1.):
14 |         self.weight = weight
15 | 
16 |     def __call__(self, bbox_pred, gt_bboxes):
17 |         """
18 |         Args:
19 |             bbox_pred (Tensor): Predicted boxes with normalized coordinates
20 |                 (cx, cy, w, h), which are all in range [0, 1]. Shape
21 |                 [num_query, 4].
22 |             gt_bboxes (Tensor): Ground truth boxes with normalized
23 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
24 |         Returns:
25 |             torch.Tensor: bbox_cost value with weight
26 |         """
27 |         bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
28 |         return bbox_cost * self.weight
29 | 
30 | @mmcv.jit(derivate=True, coderize=True)
31 | #@weighted_loss
32 | def smooth_l1_loss(pred, target, beta=1.0):
33 |     """Smooth L1 loss.
34 |     Args:
35 |         pred (torch.Tensor): The prediction.
36 |         target (torch.Tensor): The learning target of the prediction.
37 |         beta (float, optional): The threshold in the piecewise function.
38 |             Defaults to 1.0.
39 |     Returns:
40 |         torch.Tensor: Calculated loss
41 |     """
42 |     assert beta > 0
43 |     if target.numel() == 0:
44 |         return pred.sum() * 0
45 | 
46 |     # assert pred.size() == target.size()
47 |     diff = torch.abs(pred - target)
48 |     loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
49 |                        diff - 0.5 * beta)
50 |     return loss.sum(-1)
51 | 
52 | 
53 | @MATCH_COST.register_module()
54 | class SmoothL1Cost(object):
55 |     """SmoothL1Cost.
56 |      Args:
57 |          weight (int | float, optional): loss weight
58 | 
59 |      Examples:
60 |          >>> from mmdet.core.bbox.match_costs.match_cost import IoUCost
61 |          >>> import torch
62 |          >>> self = IoUCost()
63 |          >>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]])
64 |          >>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])
65 |          >>> self(bboxes, gt_bboxes)
66 |          tensor([[-0.1250,  0.1667],
67 |                 [ 0.1667, -0.5000]])
68 |     """
69 | 
70 |     def __init__(self, weight=1.):
71 |         self.weight = weight
72 | 
73 |     def __call__(self, input, target):
74 |         """
75 |         Args:
76 |             bboxes (Tensor): Predicted boxes with unnormalized coordinates
77 |                 (x1, y1, x2, y2). Shape [num_query, 4].
78 |             gt_bboxes (Tensor): Ground truth boxes with unnormalized
79 |                 coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
80 | 
81 |         Returns:
82 |             torch.Tensor: iou_cost value with weight
83 |         """
84 |         N1, C = input.shape
85 |         N2, C = target.shape
86 |         input = input.contiguous().view(N1, C)[:, None, :]
87 |         target = target.contiguous().view(N2, C)[None, :, :]
88 |         cost = smooth_l1_loss(input, target)
89 | 
90 |         return cost * self.weight


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/bbox/util.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | 
 3 | 
 4 | def normalize_bbox(bboxes, pc_range):
 5 | 
 6 |     cx = bboxes[..., 0:1]
 7 |     cy = bboxes[..., 1:2]
 8 |     cz = bboxes[..., 2:3]
 9 |     w = bboxes[..., 3:4].log()
10 |     l = bboxes[..., 4:5].log()
11 |     h = bboxes[..., 5:6].log()
12 | 
13 |     rot = bboxes[..., 6:7]
14 |     if bboxes.size(-1) > 7:
15 |         vx = bboxes[..., 7:8] 
16 |         vy = bboxes[..., 8:9]
17 |         normalized_bboxes = torch.cat(
18 |             (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1
19 |         )
20 |     else:
21 |         normalized_bboxes = torch.cat(
22 |             (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1
23 |         )
24 |     return normalized_bboxes
25 | 
26 | def denormalize_bbox(normalized_bboxes, pc_range):
27 |     # rotation 
28 |     rot_sine = normalized_bboxes[..., 6:7]
29 | 
30 |     rot_cosine = normalized_bboxes[..., 7:8]
31 |     rot = torch.atan2(rot_sine, rot_cosine)
32 | 
33 |     # center in the bev
34 |     cx = normalized_bboxes[..., 0:1]
35 |     cy = normalized_bboxes[..., 1:2]
36 |     cz = normalized_bboxes[..., 4:5]
37 |    
38 |     # size
39 |     w = normalized_bboxes[..., 2:3]
40 |     l = normalized_bboxes[..., 3:4]
41 |     h = normalized_bboxes[..., 5:6]
42 | 
43 |     w = w.exp() 
44 |     l = l.exp() 
45 |     h = h.exp() 
46 |     if normalized_bboxes.size(-1) > 8:
47 |          # velocity 
48 |         vx = normalized_bboxes[:, 8:9]
49 |         vy = normalized_bboxes[:, 9:10]
50 |         denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)
51 |     else:
52 |         denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)
53 |     return denormalized_bboxes


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .eval_hooks import CustomDistEvalHook


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/evaluation/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/core/evaluation/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/evaluation/__pycache__/eval_hooks.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/core/evaluation/__pycache__/eval_hooks.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/core/evaluation/eval_hooks.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,
 3 | # in order to avoid strong version dependency, we did not directly
 4 | # inherit EvalHook but BaseDistEvalHook.
 5 | 
 6 | import bisect
 7 | import os.path as osp
 8 | 
 9 | import mmcv
10 | import torch.distributed as dist
11 | from mmcv.runner import DistEvalHook as BaseDistEvalHook
12 | from mmcv.runner import EvalHook as BaseEvalHook
13 | from torch.nn.modules.batchnorm import _BatchNorm
14 | from mmdet.core.evaluation.eval_hooks import DistEvalHook
15 | 
16 | 
17 | def _calc_dynamic_intervals(start_interval, dynamic_interval_list):
18 |     assert mmcv.is_list_of(dynamic_interval_list, tuple)
19 | 
20 |     dynamic_milestones = [0]
21 |     dynamic_milestones.extend(
22 |         [dynamic_interval[0] for dynamic_interval in dynamic_interval_list])
23 |     dynamic_intervals = [start_interval]
24 |     dynamic_intervals.extend(
25 |         [dynamic_interval[1] for dynamic_interval in dynamic_interval_list])
26 |     return dynamic_milestones, dynamic_intervals
27 | 
28 | 
29 | class CustomDistEvalHook(BaseDistEvalHook):
30 | 
31 |     def __init__(self, *args, dynamic_intervals=None,  **kwargs):
32 |         super(CustomDistEvalHook, self).__init__(*args, **kwargs)
33 |         self.use_dynamic_intervals = dynamic_intervals is not None
34 |         if self.use_dynamic_intervals:
35 |             self.dynamic_milestones, self.dynamic_intervals = \
36 |                 _calc_dynamic_intervals(self.interval, dynamic_intervals)
37 | 
38 |     def _decide_interval(self, runner):
39 |         if self.use_dynamic_intervals:
40 |             progress = runner.epoch if self.by_epoch else runner.iter
41 |             step = bisect.bisect(self.dynamic_milestones, (progress + 1))
42 |             # Dynamically modify the evaluation interval
43 |             self.interval = self.dynamic_intervals[step - 1]
44 | 
45 |     def before_train_epoch(self, runner):
46 |         """Evaluate the model only at the start of training by epoch."""
47 |         self._decide_interval(runner)
48 |         super().before_train_epoch(runner)
49 | 
50 |     def before_train_iter(self, runner):
51 |         self._decide_interval(runner)
52 |         super().before_train_iter(runner)
53 | 
54 |     def _do_evaluate(self, runner):
55 |         """perform evaluation and save ckpt."""
56 |         # Synchronization of BatchNorm's buffer (running_mean
57 |         # and running_var) is not supported in the DDP of pytorch,
58 |         # which may cause the inconsistent performance of models in
59 |         # different ranks, so we broadcast BatchNorm's buffers
60 |         # of rank 0 to other ranks to avoid this.
61 |         if self.broadcast_bn_buffer:
62 |             model = runner.model
63 |             for name, module in model.named_modules():
64 |                 if isinstance(module,
65 |                               _BatchNorm) and module.track_running_stats:
66 |                     dist.broadcast(module.running_var, 0)
67 |                     dist.broadcast(module.running_mean, 0)
68 | 
69 |         if not self._should_evaluate(runner):
70 |             return
71 | 
72 |         tmpdir = self.tmpdir
73 |         if tmpdir is None:
74 |             tmpdir = osp.join(runner.work_dir, '.eval_hook')
75 | 
76 |         from projects.mmdet3d_plugin.bevformer.apis.test_dense import custom_multi_gpu_test # to solve circlur  import
77 | 
78 |         results = custom_multi_gpu_test(
79 |             runner.model,
80 |             self.dataloader,
81 |             tmpdir=tmpdir,
82 |             gpu_collect=self.gpu_collect)
83 |         if runner.rank == 0:
84 |             print('\n')
85 |             runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
86 | 
87 |             # key_score = self.evaluate(runner, results)
88 |             self.dataloader.dataset.evaluate_miou(results,
89 |                                                      runner=runner)
90 |             
91 |             # if self.save_best:
92 |             #     self._save_ckpt(runner, key_score)
93 |   
94 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .nuscenes_dataset import CustomNuScenesDataset
 2 | from .nuscenes_dataset_lidarseg import LidarSegNuScenesDataset
 3 | from .nuscenes_occ import NuSceneOcc
 4 | from .nuscenes_dataset_occ import NuScenesOcc
 5 | from .builder import custom_build_dataset
 6 | 
 7 | __all__ = [
 8 |     'CustomNuScenesDataset','LidarSegNuScenesDataset'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from .transform_3d import (
 2 |     PadMultiViewImage, NormalizeMultiviewImage,ResizeCropFlipImage,RandomMultiScaleImageMultiViewImage,
 3 |     PhotoMetricDistortionMultiViewImage, CustomCollect3D, RandomScaleImageMultiViewImage)
 4 | from .formating import CustomDefaultFormatBundle3D
 5 | from .loading import LoadDenseLabel, LoadMultiViewDepthFromFiles, LoadSegPriorFromFile
 6 | __all__ = [
 7 |     'PadMultiViewImage', 'NormalizeMultiviewImage', 'ResizeCropFlipImage','RandomMultiScaleImageMultiViewImage','LoadDenseLabel',
 8 |     'PhotoMetricDistortionMultiViewImage', 'CustomDefaultFormatBundle3D', 'CustomCollect3D', 'RandomScaleImageMultiViewImage',
 9 |     'LoadMultiViewDepthFromFiles', 'LoadSegPriorFromFile'
10 | ]


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/compose.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | from mmcv.utils import build_from_cfg
 4 | 
 5 | from mmdet.datasets.builder import PIPELINES
 6 | 
 7 | @PIPELINES.register_module()
 8 | class CustomCompose:
 9 |     """Compose multiple transforms sequentially.
10 |     Args:
11 |         transforms (Sequence[dict | callable]): Sequence of transform object or
12 |             config dict to be composed.
13 |     """
14 |     def __init__(self, transforms):
15 |         assert isinstance(transforms, collections.abc.Sequence)
16 |         self.transforms = []
17 |         for transform in transforms:
18 |             if isinstance(transform, dict):
19 |                 transform = build_from_cfg(transform, PIPELINES)
20 |                 self.transforms.append(transform)
21 |             elif callable(transform):
22 |                 self.transforms.append(transform)
23 |             else:
24 |                 raise TypeError('transform must be callable or a dict')
25 | 
26 |     def __call__(self, data, seed=0):
27 |         """Call function to apply transforms sequentially.
28 |         Args:
29 |             data (dict): A result dict contains the data to transform.
30 |         Returns:
31 |            dict: Transformed data.
32 |         """
33 | 
34 |         for t in self.transforms:
35 | 
36 |             if hasattr(t, 'seed'):
37 |                 data = t(data, seed=seed)
38 |             else:
39 |                 data = t(data)
40 | 
41 |             if data is None:
42 |                 return None
43 |         return data
44 | 
45 |     def __repr__(self):
46 |         format_string = self.__class__.__name__ + '('
47 |         for t in self.transforms:
48 |             str_ = t.__repr__()
49 |             if 'Compose(' in str_:
50 |                 str_ = str_.replace('\n', '\n    ')
51 |             format_string += '\n'
52 |             format_string += f'    {str_}'
53 |         format_string += '\n)'
54 |         return format_string


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/pipelines/formating.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Copyright (c) OpenMMLab. All rights reserved.
 3 | import numpy as np
 4 | from mmcv.parallel import DataContainer as DC
 5 | 
 6 | from mmdet3d.core.bbox import BaseInstance3DBoxes
 7 | from mmdet3d.core.points import BasePoints
 8 | from mmdet.datasets.builder import PIPELINES
 9 | from mmdet.datasets.pipelines import to_tensor
10 | from mmdet3d.datasets.pipelines import DefaultFormatBundle3D
11 | 
12 | @PIPELINES.register_module()
13 | class CustomDefaultFormatBundle3D(DefaultFormatBundle3D):
14 |     """Default formatting bundle.
15 |     It simplifies the pipeline of formatting common fields for voxels,
16 |     including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and
17 |     "gt_semantic_seg".
18 |     These fields are formatted as follows.
19 |     - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
20 |     - proposals: (1)to tensor, (2)to DataContainer
21 |     - gt_bboxes: (1)to tensor, (2)to DataContainer
22 |     - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
23 |     - gt_labels: (1)to tensor, (2)to DataContainer
24 |     """
25 | 
26 |     def __call__(self, results):
27 |         """Call function to transform and format common fields in results.
28 |         Args:
29 |             results (dict): Result dict contains the data to convert.
30 |         Returns:
31 |             dict: The result dict contains the data that is formatted with
32 |                 default bundle.
33 |         """
34 |         # Format 3D data
35 |         results = super(CustomDefaultFormatBundle3D, self).__call__(results)
36 |         if "gt_map_masks" in results.keys():
37 |             results['gt_map_masks'] = DC(
38 |                 to_tensor(results['gt_map_masks']), stack=True)
39 |         if "dpt" in results.keys():
40 |             dpts = results['dpt']
41 |             dpts = np.ascontiguousarray(np.stack(dpts, axis=0))[:,None,:,:]
42 |             results['dpt'] = DC(to_tensor(dpts), stack=True)
43 |         if "seg_gt" in results.keys():
44 |             seg_gt = results['seg_gt']
45 |             seg_gt = np.ascontiguousarray(np.stack(seg_gt, axis=0))
46 |             results['seg_gt'] = DC(to_tensor(seg_gt), stack=True)
47 |         return results
48 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .group_sampler import DistributedGroupSampler
2 | from .distributed_sampler import DistributedSampler
3 | from .sampler import SAMPLER, build_sampler
4 | 
5 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | from torch.utils.data import DistributedSampler as _DistributedSampler
 5 | from .sampler import SAMPLER
 6 | 
 7 | 
 8 | @SAMPLER.register_module()
 9 | class DistributedSampler(_DistributedSampler):
10 | 
11 |     def __init__(self,
12 |                  dataset=None,
13 |                  num_replicas=None,
14 |                  rank=None,
15 |                  shuffle=True,
16 |                  seed=0):
17 |         super().__init__(
18 |             dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
19 |         # for the compatibility from PyTorch 1.3+
20 |         self.seed = seed if seed is not None else 0
21 | 
22 |     def __iter__(self):
23 |         # deterministically shuffle based on epoch
24 |         if self.shuffle:
25 |             assert False
26 |         else:
27 |             indices = torch.arange(len(self.dataset)).tolist()
28 | 
29 |         # add extra samples to make it evenly divisible
30 |         # in case that indices is shorter than half of total_size
31 |         indices = (indices *
32 |                    math.ceil(self.total_size / len(indices)))[:self.total_size]
33 |         assert len(indices) == self.total_size
34 | 
35 |         # subsample
36 |         per_replicas = self.total_size//self.num_replicas
37 |         # indices = indices[self.rank:self.total_size:self.num_replicas]
38 |         indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas]
39 |         assert len(indices) == self.num_samples
40 | 
41 |         return iter(indices)
42 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/samplers/group_sampler.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Copyright (c) OpenMMLab. All rights reserved.
  3 | import math
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | from mmcv.runner import get_dist_info
  8 | from torch.utils.data import Sampler
  9 | from .sampler import SAMPLER
 10 | import random
 11 | from IPython import embed
 12 | 
 13 | 
 14 | @SAMPLER.register_module()
 15 | class DistributedGroupSampler(Sampler):
 16 |     """Sampler that restricts data loading to a subset of the dataset.
 17 |     It is especially useful in conjunction with
 18 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
 19 |     process can pass a DistributedSampler instance as a DataLoader sampler,
 20 |     and load a subset of the original dataset that is exclusive to it.
 21 |     .. note::
 22 |         Dataset is assumed to be of constant size.
 23 |     Arguments:
 24 |         dataset: Dataset used for sampling.
 25 |         num_replicas (optional): Number of processes participating in
 26 |             distributed training.
 27 |         rank (optional): Rank of the current process within num_replicas.
 28 |         seed (int, optional): random seed used to shuffle the sampler if
 29 |             ``shuffle=True``. This number should be identical across all
 30 |             processes in the distributed group. Default: 0.
 31 |     """
 32 | 
 33 |     def __init__(self,
 34 |                  dataset,
 35 |                  samples_per_gpu=1,
 36 |                  num_replicas=None,
 37 |                  rank=None,
 38 |                  seed=0):
 39 |         _rank, _num_replicas = get_dist_info()
 40 |         if num_replicas is None:
 41 |             num_replicas = _num_replicas
 42 |         if rank is None:
 43 |             rank = _rank
 44 |         self.dataset = dataset
 45 |         self.samples_per_gpu = samples_per_gpu
 46 |         self.num_replicas = num_replicas
 47 |         self.rank = rank
 48 |         self.epoch = 0
 49 |         self.seed = seed if seed is not None else 0
 50 | 
 51 |         assert hasattr(self.dataset, 'flag')
 52 |         self.flag = self.dataset.flag
 53 |         self.group_sizes = np.bincount(self.flag)
 54 | 
 55 |         self.num_samples = 0
 56 |         for i, j in enumerate(self.group_sizes):
 57 |             self.num_samples += int(
 58 |                 math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
 59 |                           self.num_replicas)) * self.samples_per_gpu
 60 |         self.total_size = self.num_samples * self.num_replicas
 61 | 
 62 |     def __iter__(self):
 63 |         # deterministically shuffle based on epoch
 64 |         g = torch.Generator()
 65 |         g.manual_seed(self.epoch + self.seed)
 66 | 
 67 |         indices = []
 68 |         for i, size in enumerate(self.group_sizes):
 69 |             if size > 0:
 70 |                 indice = np.where(self.flag == i)[0]
 71 |                 assert len(indice) == size
 72 |                 # add .numpy() to avoid bug when selecting indice in parrots.
 73 |                 # TODO: check whether torch.randperm() can be replaced by
 74 |                 # numpy.random.permutation().
 75 |                 indice = indice[list(
 76 |                     torch.randperm(int(size), generator=g).numpy())].tolist()
 77 |                 extra = int(
 78 |                     math.ceil(
 79 |                         size * 1.0 / self.samples_per_gpu / self.num_replicas)
 80 |                 ) * self.samples_per_gpu * self.num_replicas - len(indice)
 81 |                 # pad indice
 82 |                 tmp = indice.copy()
 83 |                 for _ in range(extra // size):
 84 |                     indice.extend(tmp)
 85 |                 indice.extend(tmp[:extra % size])
 86 |                 indices.extend(indice)
 87 | 
 88 |         assert len(indices) == self.total_size
 89 | 
 90 |         indices = [
 91 |             indices[j] for i in list(
 92 |                 torch.randperm(
 93 |                     len(indices) // self.samples_per_gpu, generator=g))
 94 |             for j in range(i * self.samples_per_gpu, (i + 1) *
 95 |                            self.samples_per_gpu)
 96 |         ]
 97 | 
 98 |         # subsample
 99 |         offset = self.num_samples * self.rank
100 |         indices = indices[offset:offset + self.num_samples]
101 |         assert len(indices) == self.num_samples
102 | 
103 |         return iter(indices)
104 | 
105 |     def __len__(self):
106 |         return self.num_samples
107 | 
108 |     def set_epoch(self, epoch):
109 |         self.epoch = epoch
110 | 
111 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/datasets/samplers/sampler.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils.registry import Registry, build_from_cfg
2 | 
3 | SAMPLER = Registry('sampler')
4 | 
5 | 
6 | def build_sampler(cfg, default_args):
7 |     return build_from_cfg(cfg, SAMPLER, default_args)
8 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .vovnet import VoVNet
2 | from .internv2_impl16 import InternV2Impl16
3 | from .sam_modeling import ImageEncoderViT
4 | 
5 | __all__ = ['VoVNet', "InternV2Impl16", "ImageEncoderViT"]


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/backbones/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/__pycache__/internv2_impl16.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/backbones/__pycache__/internv2_impl16.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/__pycache__/vovnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/backbones/__pycache__/vovnet.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/sam_modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from .image_encoder import ImageEncoderViT


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/sam_modeling/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/backbones/sam_modeling/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/sam_modeling/__pycache__/common.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/backbones/sam_modeling/__pycache__/common.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/sam_modeling/__pycache__/image_encoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/backbones/sam_modeling/__pycache__/image_encoder.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/backbones/sam_modeling/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | # All rights reserved.
 3 | 
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | from typing import Type
11 | 
12 | 
13 | class MLPBlock(nn.Module):
14 |     def __init__(
15 |         self,
16 |         embedding_dim: int,
17 |         mlp_dim: int,
18 |         act: Type[nn.Module] = nn.GELU,
19 |     ) -> None:
20 |         super().__init__()
21 |         self.lin1 = nn.Linear(embedding_dim, mlp_dim)
22 |         self.lin2 = nn.Linear(mlp_dim, embedding_dim)
23 |         self.act = act()
24 | 
25 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
26 |         return self.lin2(self.act(self.lin1(x)))
27 | 
28 | 
29 | # From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa
30 | # Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119  # noqa
31 | class LayerNorm2d(nn.Module):
32 |     def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
33 |         super().__init__()
34 |         self.weight = nn.Parameter(torch.ones(num_channels))
35 |         self.bias = nn.Parameter(torch.zeros(num_channels))
36 |         self.eps = eps
37 | 
38 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
39 |         u = x.mean(1, keepdim=True)
40 |         s = (x - u).pow(2).mean(1, keepdim=True)
41 |         x = (x - u) / torch.sqrt(s + self.eps)
42 |         x = self.weight[:, None, None] * x + self.bias[:, None, None]
43 |         return x


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | from .hooks import GradChecker


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/hooks/hooks.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner.hooks.hook import HOOKS, Hook
 2 | from projects.mmdet3d_plugin.models.utils import run_time
 3 | 
 4 | 
 5 | @HOOKS.register_module()
 6 | class GradChecker(Hook):
 7 | 
 8 |     def after_train_iter(self, runner):
 9 |         for key, val in runner.model.named_parameters():
10 |             if val.grad == None and val.requires_grad:
11 |                 print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key))
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .lovasz_softmax  import *
2 | from .dice_loss import *
3 | from .nusc_param import *
4 | from .semkitti import *
5 | from .focal_loss import CustomFocalLoss
6 | from .lovasz_losses import Lovasz3DLoss


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/losses/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/__pycache__/dice_loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/losses/__pycache__/dice_loss.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/__pycache__/focal_loss.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/losses/__pycache__/focal_loss.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/__pycache__/lovasz_losses.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/losses/__pycache__/lovasz_losses.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/__pycache__/lovasz_softmax.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/losses/__pycache__/lovasz_softmax.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/__pycache__/nusc_param.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/losses/__pycache__/nusc_param.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/losses/__pycache__/semkitti.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/losses/__pycache__/semkitti.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/opt/__init__.py:
--------------------------------------------------------------------------------
1 | from .adamw import AdamW2


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/opt/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/opt/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/opt/__pycache__/adamw.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/opt/__pycache__/adamw.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .bricks import run_time
3 | from .grid_mask import GridMask
4 | from .position_embedding import RelPositionEmbedding,Learned3DPositionalEncoding
5 | from .visual import save_tensor
6 | from .bev_visualize import heatmap


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/__pycache__/bev_visualize.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/utils/__pycache__/bev_visualize.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/__pycache__/bricks.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/utils/__pycache__/bricks.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/__pycache__/grid_mask.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/utils/__pycache__/grid_mask.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/__pycache__/position_embedding.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/utils/__pycache__/position_embedding.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/__pycache__/visual.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/utils/__pycache__/visual.cpython-37.pyc


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/bev_visualize.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | def heatmap(bev_embed,bev_h,bev_w,name):
 5 |     """
 6 |     bev_feat = [C,H,W]
 7 |     """
 8 |     bev_feat = bev_embed.squeeze(1).permute(1,0).view(256,bev_h,bev_w)
 9 | 
10 |     indx = bev_feat.detach().cpu().numpy()
11 |     heatmap = np.linalg.norm(indx,ord=2,axis=0)
12 |     heatmap= (heatmap-np.min(heatmap)) / (np.max(heatmap)-np.min(heatmap))
13 | 
14 |     heatmap = np.uint8(256 * heatmap)
15 |     heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
16 | 
17 |     path = '/home/yuqi_wang/code/Occupancy/work_dirs/visualize/heatmap_'+name+'.png'
18 |     cv2.imwrite(path, heatmap)


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/bricks.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import time
 3 | from collections import defaultdict
 4 | import torch
 5 | time_maps = defaultdict(lambda :0.)
 6 | count_maps = defaultdict(lambda :0.)
 7 | def run_time(name):
 8 |     def middle(fn):
 9 |         def wrapper(*args, **kwargs):
10 |             torch.cuda.synchronize()
11 |             start = time.time()
12 |             res = fn(*args, **kwargs)
13 |             torch.cuda.synchronize()
14 |             time_maps['%s : %s'%(name, fn.__name__) ] += time.time()-start
15 |             count_maps['%s : %s'%(name, fn.__name__) ] +=1
16 |             print("%s : %s takes up %f "% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] ))
17 |             return res
18 |         return wrapper
19 |     return middle
20 |     


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/occupied_prob.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/utils/occupied_prob.npz


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/occupied_prob_with_l3.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/utils/occupied_prob_with_l3.npz


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/table.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/4DVLab/OctreeOcc/54afdd64b210fa45a0b1e1f255e4f88739b4e5a8/projects/mmdet3d_plugin/models/utils/table.npz


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/models/utils/visual.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torchvision.utils import make_grid
 3 | import torchvision
 4 | import matplotlib.pyplot as plt
 5 | import cv2
 6 | 
 7 | 
 8 | def convert_color(img_path):
 9 |     plt.figure()
10 |     img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
11 |     plt.imsave(img_path, img, cmap=plt.get_cmap('viridis'))
12 |     plt.close()
13 | 
14 | 
15 | def save_tensor(tensor, path, pad_value=254.0,):
16 |     print('save_tensor', path)
17 |     tensor = tensor.to(torch.float).detach().cpu()
18 |     if tensor.type() == 'torch.BoolTensor':
19 |         tensor = tensor*255
20 |     if len(tensor.shape) == 3:
21 |         tensor = tensor.unsqueeze(1)
22 |     tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy()
23 |     torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path)
24 |     convert_color(path)
25 | 


--------------------------------------------------------------------------------
/projects/mmdet3d_plugin/ops/src/octree_ops_cuda.cu:
--------------------------------------------------------------------------------
  1 | #include <torch/extension.h>
  2 | #include <cuda.h>
  3 | #include <cuda_runtime.h>
  4 | #include <ATen/ATen.h>
  5 | #include <ATen/cuda/CUDAContext.h>
  6 | #include <c10/cuda/CUDAGuard.h>
  7 | 
  8 | // 常量定义在设备端，避免频繁内存传输
  9 | __constant__ int CHILD_OFFSETS[8][3] = {
 10 |     {0, 0, 0}, {0, 0, 1}, {0, 1, 0}, {1, 0, 0},
 11 |     {1, 1, 1}, {0, 1, 1}, {1, 1, 0}, {1, 0, 1}
 12 | };
 13 | 
 14 | // 高效的CUDA内核，一次性处理所有子节点
 15 | __global__ void octree_mask_kernel(
 16 |     const bool* __restrict__ input_mask,
 17 |     bool* __restrict__ output_mask,
 18 |     const int B, const int H, const int W, const int D,
 19 |     const int out_H, const int out_W, const int out_D) {
 20 |     
 21 |     // 计算当前线程对应的索引
 22 |     const int index = blockIdx.x * blockDim.x + threadIdx.x;
 23 |     const int total_elements = B * H * W * D;
 24 |     
 25 |     if (index >= total_elements) return;
 26 |     
 27 |     // 计算input中的4D索引
 28 |     const int d = index % D;
 29 |     const int w = (index / D) % W;
 30 |     const int h = (index / (D * W)) % H;
 31 |     const int b = index / (D * W * H);
 32 |     
 33 |     // 获取原始值
 34 |     const int input_idx = ((b * H + h) * W + w) * D + d;
 35 |     const bool is_active = input_mask[input_idx];
 36 |     
 37 |     // 如果当前体素是活动的，设置所有8个子节点
 38 |     if (is_active) {
 39 |         // 计算输出中的基础索引
 40 |         const int h_out = h * 2;
 41 |         const int w_out = w * 2;
 42 |         const int d_out = d * 2;
 43 |         
 44 |         // 一次性计算8个子节点的索引并设置值
 45 |         #pragma unroll
 46 |         for (int i = 0; i < 8; ++i) {
 47 |             const int out_h = h_out + CHILD_OFFSETS[i][0];
 48 |             const int out_w = w_out + CHILD_OFFSETS[i][1];
 49 |             const int out_d = d_out + CHILD_OFFSETS[i][2];
 50 |             
 51 |             // 计算输出索引
 52 |             const int output_idx = ((b * out_H + out_h) * out_W + out_w) * out_D + out_d;
 53 |             output_mask[output_idx] = true;
 54 |         }
 55 |     }
 56 | }
 57 | 
 58 | void octree_mask_l1_to_l2_forward_cuda(
 59 |     const at::Tensor& octree_l1, 
 60 |     at::Tensor& mask_l2) {
 61 |     
 62 |     // 获取张量大小
 63 |     const int B = octree_l1.size(0);
 64 |     const int H = octree_l1.size(1);
 65 |     const int W = octree_l1.size(2);
 66 |     const int D = octree_l1.size(3);
 67 |     
 68 |     // 输出尺寸
 69 |     const int out_H = H * 2;
 70 |     const int out_W = W * 2;
 71 |     const int out_D = D * 2;
 72 |     
 73 |     const int total_elements = B * H * W * D;
 74 |     const int threads = 256;
 75 |     const int blocks = (total_elements + threads - 1) / threads;
 76 |     
 77 |     // 选择最佳的流以提高性能
 78 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 79 |     
 80 |     // 启动内核
 81 |     octree_mask_kernel<<<blocks, threads, 0, stream>>>(
 82 |         octree_l1.data_ptr<bool>(),
 83 |         mask_l2.data_ptr<bool>(),
 84 |         B, H, W, D,
 85 |         out_H, out_W, out_D
 86 |     );
 87 | }
 88 | 
 89 | void octree_mask_l2_to_l3_forward_cuda(
 90 |     const at::Tensor& octree_l2, 
 91 |     at::Tensor& mask_l3) {
 92 |     
 93 |     // 获取张量大小
 94 |     const int B = octree_l2.size(0);
 95 |     const int H = octree_l2.size(1);
 96 |     const int W = octree_l2.size(2);
 97 |     const int D = octree_l2.size(3);
 98 |     
 99 |     // 输出尺寸
100 |     const int out_H = H * 2;
101 |     const int out_W = W * 2;
102 |     const int out_D = D * 2;
103 |     
104 |     const int total_elements = B * H * W * D;
105 |     const int threads = 256;
106 |     const int blocks = (total_elements + threads - 1) / threads;
107 |     
108 |     // 选择最佳的流以提高性能
109 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
110 |     
111 |     // 启动内核
112 |     octree_mask_kernel<<<blocks, threads, 0, stream>>>(
113 |         octree_l2.data_ptr<bool>(),
114 |         mask_l3.data_ptr<bool>(),
115 |         B, H, W, D,
116 |         out_H, out_W, out_D
117 |     );
118 | } 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | 
 4 | setup(
 5 |     name='octree_ops',
 6 |     ext_modules=[
 7 |         CUDAExtension(
 8 |             name='octree_ops',
 9 |             sources=[
10 |                 'ops/octree_ops.cpp',
11 |                 'ops/octree_ops_cuda.cu',
12 |             ],
13 |             extra_compile_args={
14 |                 'cxx': ['-O3'],
15 |                 'nvcc': ['-O3']
16 |             }
17 |         ),
18 |     ],
19 |     cmdclass={
20 |         'build_ext': BuildExtension
21 |     }
22 | )


--------------------------------------------------------------------------------
/tools/data_converter/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | 


--------------------------------------------------------------------------------
/tools/data_converter/lyft_data_fixer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import numpy as np
 4 | import os
 5 | 
 6 | 
 7 | def fix_lyft(root_folder='./data/lyft', version='v1.01'):
 8 |     # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000  # noqa
 9 |     lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin'
10 |     root_folder = os.path.join(root_folder, f'{version}-train')
11 |     lidar_path = os.path.join(root_folder, lidar_path)
12 |     assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \
13 |         f'dataset and make sure {lidar_path} is present.'
14 |     points = np.fromfile(lidar_path, dtype=np.float32, count=-1)
15 |     try:
16 |         points.reshape([-1, 5])
17 |         print(f'This fix is not required for version {version}.')
18 |     except ValueError:
19 |         new_points = np.array(list(points) + [100.0, 1.0], dtype='float32')
20 |         new_points.tofile(lidar_path)
21 |         print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.')
22 | 
23 | 
24 | parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser')
25 | parser.add_argument(
26 |     '--root-folder',
27 |     type=str,
28 |     default='./data/lyft',
29 |     help='specify the root path of Lyft dataset')
30 | parser.add_argument(
31 |     '--version',
32 |     type=str,
33 |     default='v1.01',
34 |     help='specify Lyft dataset version')
35 | args = parser.parse_args()
36 | 
37 | if __name__ == '__main__':
38 |     fix_lyft(root_folder=args.root_folder, version=args.version)
39 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29503}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --eval bbox
11 | 


--------------------------------------------------------------------------------
/tools/dist_test_dense.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29504}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test_dense.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --eval bbox
11 | 


--------------------------------------------------------------------------------
/tools/dist_test_occ.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29503}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}  --occupancy
11 | 


--------------------------------------------------------------------------------
/tools/dist_test_seg.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29503}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --out 'seg_result.pkl'
11 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | #
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | NNODES=${NNODES:-1}
 6 | NODE_RANK=${NODE_RANK:-0}
 7 | PORT=${PORT:-29502}
 8 | MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
 9 | 
10 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
11 | python -m torch.distributed.launch \
12 |     --nnodes=$NNODES \
13 |     --node_rank=$NODE_RANK \
14 |     --master_addr=$MASTER_ADDR \
15 |     --nproc_per_node=$GPUS \
16 |     --master_port=$PORT \
17 |     $(dirname "$0")/train.py \
18 |     $CONFIG \
19 |     --seed 0 \
20 |     --launcher pytorch ${@:3} --deterministic 2>&1 | tee output.log


--------------------------------------------------------------------------------
/tools/eval_metrics/lidar_seg.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def fast_hist(pred, label, n):
 4 |     k = (label >= 0) & (label < n)
 5 |     bin_count = np.bincount(
 6 |         n * label[k].astype(int) + pred[k], minlength=n ** 2)
 7 |     return bin_count[:n ** 2].reshape(n, n)
 8 | def per_class_iu(hist):
 9 |     return np.diag(hist) / (hist.sum(1) + hist.sum(0) - np.diag(hist))
10 | 
11 | def compute_iou(seg_result,n=17):
12 |     hist_list = []
13 |     for seg_i in seg_result:
14 |         pred = seg_i['lidar_pred']
15 |         label = seg_i['lidar_label']
16 |         assert pred.shape[0]==label.shape[0]
17 |         hist = fast_hist(pred, label, n)
18 |         hist_list.append(hist)
19 |     iou = per_class_iu(sum(hist_list))
20 |     return iou


--------------------------------------------------------------------------------
/tools/eval_metrics/lidar_seg_convert.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | import pickle
 4 | from nuscenes import NuScenes
 5 | import os
 6 | 
 7 | def load_pkl(path):
 8 |     f = open(path,'rb')
 9 |     info = pickle.load(f)
10 |     return info
11 | 
12 | def main():
13 |     nuscenes_root = '/data/yuqi_wang/nuscenes' 
14 |     pred_lidarseg_path = '/root/workspace/Occupancy/seg_result.pkl'
15 |     output_dir = '/root/workspace/Occupancy/work_dirs/lidar_seg_r101'
16 |     os.makedirs(output_dir,exist_ok=True)
17 | 
18 |     val_path = os.path.join(nuscenes_root,'nuscenes_infos_temporal_val.pkl')
19 |     nusc_seg = NuScenes(version='v1.0-trainval', dataroot=nuscenes_root, verbose=True)
20 | 
21 |     val_info = load_pkl(val_path)
22 |     pred_info = load_pkl(pred_lidarseg_path)
23 | 
24 |     pred_seg = {}
25 |     for p in pred_info:
26 |         pred_seg[p['token']]=p['lidar_pred']
27 |     
28 |     for vi in range(len(val_info['infos'])):
29 |         vif = val_info['infos'][vi]
30 |         lidar_sd_token = nusc_seg.get('sample', vif['token'])['data']['LIDAR_TOP']
31 |         save_name = lidar_sd_token+'_lidarseg.bin'
32 |         save_path = os.path.join(output_dir,save_name)
33 |         pred = pred_seg[vif['token']].astype('uint8')
34 |         assert pred.shape
35 |         pred.tofile(save_path)
36 |         if vi%500==0:
37 |             print(vi)
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     main()


--------------------------------------------------------------------------------
/tools/fp16/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-28508}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic
10 | 


--------------------------------------------------------------------------------
/tools/learning_map._nus.yaml:
--------------------------------------------------------------------------------
 1 | labels:
 2 |   0: 'noise'
 3 |   1: 'animal'
 4 |   2: 'human.pedestrian.adult'
 5 |   3: 'human.pedestrian.child'
 6 |   4: 'human.pedestrian.construction_worker'
 7 |   5: 'human.pedestrian.personal_mobility'
 8 |   6: 'human.pedestrian.police_officer'
 9 |   7: 'human.pedestrian.stroller'
10 |   8: 'human.pedestrian.wheelchair'
11 |   9: 'movable_object.barrier'
12 |   10: 'movable_object.debris'
13 |   11: 'movable_object.pushable_pullable'
14 |   12: 'movable_object.trafficcone'
15 |   13: 'static_object.bicycle_rack'
16 |   14: 'vehicle.bicycle'
17 |   15: 'vehicle.bus.bendy'
18 |   16: 'vehicle.bus.rigid'
19 |   17: 'vehicle.car'
20 |   18: 'vehicle.construction'
21 |   19: 'vehicle.emergency.ambulance'
22 |   20: 'vehicle.emergency.police'
23 |   21: 'vehicle.motorcycle'
24 |   22: 'vehicle.trailer'
25 |   23: 'vehicle.truck'
26 |   24: 'flat.driveable_surface'
27 |   25: 'flat.other'
28 |   26: 'flat.sidewalk'
29 |   27: 'flat.terrain'
30 |   28: 'static.manmade'
31 |   29: 'static.other'
32 |   30: 'static.vegetation'
33 |   31: 'vehicle.ego'
34 | labels_16:
35 |   0: 'noise'
36 |   1: 'barrier'
37 |   2: 'bicycle'
38 |   3: 'bus'
39 |   4: 'car'
40 |   5: 'construction_vehicle'
41 |   6: 'motorcycle'
42 |   7: 'pedestrian'
43 |   8: 'traffic_cone'
44 |   9: 'trailer'
45 |   10: 'truck'
46 |   11: 'driveable_surface'
47 |   12: 'other_flat'
48 |   13: 'sidewalk'
49 |   14: 'terrain'
50 |   15: 'manmade'
51 |   16: 'vegetation'
52 | learning_map:
53 |   1: 0
54 |   5: 0
55 |   7: 0
56 |   8: 0
57 |   10: 0
58 |   11: 0
59 |   13: 0
60 |   19: 0
61 |   20: 0
62 |   0: 0
63 |   29: 0
64 |   31: 0
65 |   9: 1
66 |   14: 2
67 |   15: 3
68 |   16: 3
69 |   17: 4
70 |   18: 5
71 |   21: 6
72 |   2: 7
73 |   3: 7
74 |   4: 7
75 |   6: 7
76 |   12: 8
77 |   22: 9
78 |   23: 10
79 |   24: 11
80 |   25: 12
81 |   26: 13
82 |   27: 14
83 |   28: 15
84 |   30: 16


--------------------------------------------------------------------------------
/tools/misc/fuse_conv_bn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import torch
 4 | from mmcv.runner import save_checkpoint
 5 | from torch import nn as nn
 6 | 
 7 | from mmdet.apis import init_model
 8 | 
 9 | 
10 | def fuse_conv_bn(conv, bn):
11 |     """During inference, the functionary of batch norm layers is turned off but
12 |     only the mean and var alone channels are used, which exposes the chance to
13 |     fuse it with the preceding conv layers to save computations and simplify
14 |     network structures."""
15 |     conv_w = conv.weight
16 |     conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
17 |         bn.running_mean)
18 | 
19 |     factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
20 |     conv.weight = nn.Parameter(conv_w *
21 |                                factor.reshape([conv.out_channels, 1, 1, 1]))
22 |     conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
23 |     return conv
24 | 
25 | 
26 | def fuse_module(m):
27 |     last_conv = None
28 |     last_conv_name = None
29 | 
30 |     for name, child in m.named_children():
31 |         if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)):
32 |             if last_conv is None:  # only fuse BN that is after Conv
33 |                 continue
34 |             fused_conv = fuse_conv_bn(last_conv, child)
35 |             m._modules[last_conv_name] = fused_conv
36 |             # To reduce changes, set BN as Identity instead of deleting it.
37 |             m._modules[name] = nn.Identity()
38 |             last_conv = None
39 |         elif isinstance(child, nn.Conv2d):
40 |             last_conv = child
41 |             last_conv_name = name
42 |         else:
43 |             fuse_module(child)
44 |     return m
45 | 
46 | 
47 | def parse_args():
48 |     parser = argparse.ArgumentParser(
49 |         description='fuse Conv and BN layers in a model')
50 |     parser.add_argument('config', help='config file path')
51 |     parser.add_argument('checkpoint', help='checkpoint file path')
52 |     parser.add_argument('out', help='output path of the converted model')
53 |     args = parser.parse_args()
54 |     return args
55 | 
56 | 
57 | def main():
58 |     args = parse_args()
59 |     # build the model from a config file and a checkpoint file
60 |     model = init_model(args.config, args.checkpoint)
61 |     # fuse conv and bn layers of the model
62 |     fused_model = fuse_module(model)
63 |     save_checkpoint(fused_model, args.out)
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     main()
68 | 


--------------------------------------------------------------------------------
/tools/misc/print_config.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | from mmcv import Config, DictAction
 4 | 
 5 | 
 6 | def parse_args():
 7 |     parser = argparse.ArgumentParser(description='Print the whole config')
 8 |     parser.add_argument('config', help='config file path')
 9 |     parser.add_argument(
10 |         '--options', nargs='+', action=DictAction, help='arguments in dict')
11 |     args = parser.parse_args()
12 | 
13 |     return args
14 | 
15 | 
16 | def main():
17 |     args = parse_args()
18 | 
19 |     cfg = Config.fromfile(args.config)
20 |     if args.options is not None:
21 |         cfg.merge_from_dict(args.options)
22 |     print(f'Config:\n{cfg.pretty_text}')
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     main()
27 | 


--------------------------------------------------------------------------------
/tools/misc/visualize_results.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import mmcv
 4 | from mmcv import Config
 5 | 
 6 | from mmdet3d.datasets import build_dataset
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(
11 |         description='MMDet3D visualize the results')
12 |     parser.add_argument('config', help='test config file path')
13 |     parser.add_argument('--result', help='results file in pickle format')
14 |     parser.add_argument(
15 |         '--show-dir', help='directory where visualize results will be saved')
16 |     args = parser.parse_args()
17 | 
18 |     return args
19 | 
20 | 
21 | def main():
22 |     args = parse_args()
23 | 
24 |     if args.result is not None and \
25 |             not args.result.endswith(('.pkl', '.pickle')):
26 |         raise ValueError('The results file must be a pkl file.')
27 | 
28 |     cfg = Config.fromfile(args.config)
29 |     cfg.data.test.test_mode = True
30 | 
31 |     # build the dataset
32 |     dataset = build_dataset(cfg.data.test)
33 |     results = mmcv.load(args.result)
34 | 
35 |     if getattr(dataset, 'show', None) is not None:
36 |         # data loading pipeline for showing
37 |         eval_pipeline = cfg.get('eval_pipeline', {})
38 |         if eval_pipeline:
39 |             dataset.show(results, args.show_dir, pipeline=eval_pipeline)
40 |         else:
41 |             dataset.show(results, args.show_dir)  # use default pipeline
42 |     else:
43 |         raise NotImplementedError(
44 |             'Show is not implemented for dataset {}!'.format(
45 |                 type(dataset).__name__))
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     main()
50 | 


--------------------------------------------------------------------------------
/tools/model_converters/publish_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import subprocess
 4 | import torch
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(
 9 |         description='Process a checkpoint to be published')
10 |     parser.add_argument('in_file', help='input checkpoint filename')
11 |     parser.add_argument('out_file', help='output checkpoint filename')
12 |     args = parser.parse_args()
13 |     return args
14 | 
15 | 
16 | def process_checkpoint(in_file, out_file):
17 |     checkpoint = torch.load(in_file, map_location='cpu')
18 |     # remove optimizer for smaller file size
19 |     if 'optimizer' in checkpoint:
20 |         del checkpoint['optimizer']
21 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
22 |     # add the code here.
23 |     torch.save(checkpoint, out_file)
24 |     sha = subprocess.check_output(['sha256sum', out_file]).decode()
25 |     final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
26 |     subprocess.Popen(['mv', out_file, final_file])
27 | 
28 | 
29 | def main():
30 |     args = parse_args()
31 |     process_checkpoint(args.in_file, args.out_file)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     main()
36 | 


--------------------------------------------------------------------------------
/tools/model_converters/regnet2mmdet.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) OpenMMLab. All rights reserved.
 2 | import argparse
 3 | import torch
 4 | from collections import OrderedDict
 5 | 
 6 | 
 7 | def convert_stem(model_key, model_weight, state_dict, converted_names):
 8 |     new_key = model_key.replace('stem.conv', 'conv1')
 9 |     new_key = new_key.replace('stem.bn', 'bn1')
10 |     state_dict[new_key] = model_weight
11 |     converted_names.add(model_key)
12 |     print(f'Convert {model_key} to {new_key}')
13 | 
14 | 
15 | def convert_head(model_key, model_weight, state_dict, converted_names):
16 |     new_key = model_key.replace('head.fc', 'fc')
17 |     state_dict[new_key] = model_weight
18 |     converted_names.add(model_key)
19 |     print(f'Convert {model_key} to {new_key}')
20 | 
21 | 
22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names):
23 |     split_keys = model_key.split('.')
24 |     layer, block, module = split_keys[:3]
25 |     block_id = int(block[1:])
26 |     layer_name = f'layer{int(layer[1:])}'
27 |     block_name = f'{block_id - 1}'
28 | 
29 |     if block_id == 1 and module == 'bn':
30 |         new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}'
31 |     elif block_id == 1 and module == 'proj':
32 |         new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}'
33 |     elif module == 'f':
34 |         if split_keys[3] == 'a_bn':
35 |             module_name = 'bn1'
36 |         elif split_keys[3] == 'b_bn':
37 |             module_name = 'bn2'
38 |         elif split_keys[3] == 'c_bn':
39 |             module_name = 'bn3'
40 |         elif split_keys[3] == 'a':
41 |             module_name = 'conv1'
42 |         elif split_keys[3] == 'b':
43 |             module_name = 'conv2'
44 |         elif split_keys[3] == 'c':
45 |             module_name = 'conv3'
46 |         new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}'
47 |     else:
48 |         raise ValueError(f'Unsupported conversion of key {model_key}')
49 |     print(f'Convert {model_key} to {new_key}')
50 |     state_dict[new_key] = model_weight
51 |     converted_names.add(model_key)
52 | 
53 | 
54 | def convert(src, dst):
55 |     """Convert keys in pycls pretrained RegNet models to mmdet style."""
56 |     # load caffe model
57 |     regnet_model = torch.load(src)
58 |     blobs = regnet_model['model_state']
59 |     # convert to pytorch style
60 |     state_dict = OrderedDict()
61 |     converted_names = set()
62 |     for key, weight in blobs.items():
63 |         if 'stem' in key:
64 |             convert_stem(key, weight, state_dict, converted_names)
65 |         elif 'head' in key:
66 |             convert_head(key, weight, state_dict, converted_names)
67 |         elif key.startswith('s'):
68 |             convert_reslayer(key, weight, state_dict, converted_names)
69 | 
70 |     # check if all layers are converted
71 |     for key in blobs:
72 |         if key not in converted_names:
73 |             print(f'not converted: {key}')
74 |     # save checkpoint
75 |     checkpoint = dict()
76 |     checkpoint['state_dict'] = state_dict
77 |     torch.save(checkpoint, dst)
78 | 
79 | 
80 | def main():
81 |     parser = argparse.ArgumentParser(description='Convert model keys')
82 |     parser.add_argument('src', help='src detectron model path')
83 |     parser.add_argument('dst', help='save path')
84 |     args = parser.parse_args()
85 |     convert(args.src, args.dst)
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     main()
90 | 


--------------------------------------------------------------------------------