├── v2xvit
    ├── __init__.py
    ├── loss
    │   ├── __init__.py
    │   ├── voxel_net_loss.py
    │   └── pixor_loss.py
    ├── models
    │   ├── __init__.py
    │   ├── fuse_modules
    │   │   ├── __init__.py
    │   │   ├── f_cooper_fuse.py
    │   │   ├── fuse_utils.py
    │   │   ├── self_attn.py
    │   │   └── mswin.py
    │   ├── sub_modules
    │   │   ├── __init__.py
    │   │   ├── f_cooper_fuse.py
    │   │   ├── naive_compress.py
    │   │   ├── point_pillar_scatter.py
    │   │   ├── downsample_conv.py
    │   │   ├── fuse_utils.py
    │   │   ├── split_attn.py
    │   │   ├── auto_encoder.py
    │   │   ├── self_attn.py
    │   │   ├── psm_mask.py
    │   │   ├── base_transformer.py
    │   │   └── mswin.py
    │   ├── point_pillar_intermediate.py
    │   ├── point_pillar.py
    │   ├── point_pillar_fcooper.py
    │   ├── point_pillar_opv2v.py
    │   ├── point_pillar_single.py
    │   ├── point_pillar_cobevt.py
    │   ├── point_pillar_v2vnet.py
    │   ├── point_pillar_when2com.py
    │   └── point_pillar_transformer.py
    ├── tools
    │   ├── __init__.py
    │   ├── loop_inference.py
    │   ├── debug_utils.py
    │   └── inference_utils.py
    ├── utils
    │   ├── __init__.py
    │   ├── setup.py
    │   ├── transformation_utils.py
    │   ├── pose_utils.py
    │   ├── eval_utils.py
    │   ├── common_utils.py
    │   └── box_overlaps.pyx
    ├── data_utils
    │   ├── __init__.py
    │   ├── augmentor
    │   │   ├── __init__.py
    │   │   ├── augment_utils.py
    │   │   └── data_augmentor.py
    │   ├── datasets
    │   │   ├── .intermediate_fusion_dataset.py.swp
    │   │   └── __init__.py
    │   ├── post_processor
    │   │   └── __init__.py
    │   └── pre_processor
    │   │   ├── __init__.py
    │   │   ├── base_preprocessor.py
    │   │   ├── bev_preprocessor.py
    │   │   ├── voxel_preprocessor.py
    │   │   └── sp_voxel_preprocessor.py
    ├── hypes_yaml
    │   ├── __init__.py
    │   ├── visualization.yaml
    │   ├── point_pillar_early_fusion.yaml
    │   ├── point_pillar_late_fusion.yaml
    │   ├── point_pillar_cobevt.yaml
    │   ├── point_pillar_fcooper.yaml
    │   ├── point_pillar_v2vnet.yaml
    │   ├── point_pillar_when2com.yaml
    │   ├── point_pillar_single.yaml
    │   ├── point_pillar_opv2v.yaml
    │   ├── point_pillar_where2comm_ori.yaml
    │   ├── point_pillar_where2comm_ori_multi.yaml
    │   ├── point_pillar_where2comm_ori_single.yaml
    │   ├── where2comm_transformer_multiscale_resnet.yaml
    │   └── point_pillar_v2xvit.yaml
    ├── visualization
    │   ├── __init__.py
    │   ├── simple_plot3d
    │   │   └── __init__.py
    │   ├── vis_data_sequence.py
    │   └── simple_vis.py
    └── version.py
├── images
    └── Overview.png
├── requirements.txt
├── setup.py
├── LICENSE
├── Env.yaml
├── .gitignore
├── docs
    ├── data_annotation_tutorial.md
    └── data_intro.md
└── README.md


/v2xvit/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/v2xvit/loss/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/v2xvit/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/v2xvit/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/v2xvit/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/v2xvit/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/v2xvit/models/fuse_modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/augmentor/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/datasets/.intermediate_fusion_dataset.py.swp:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/images/Overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmgu0212/FeaCo/HEAD/images/Overview.png


--------------------------------------------------------------------------------
/v2xvit/version.py:
--------------------------------------------------------------------------------
1 | """Specifies the current version number of v2xvit."""
2 | 
3 | __version__ = "0.1.0"
4 | 


--------------------------------------------------------------------------------
/v2xvit/visualization/simple_plot3d/__init__.py:
--------------------------------------------------------------------------------
1 | from .canvas_3d import Canvas_3D
2 | from .canvas_bev import Canvas_BEV
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | matplotlib
 2 | numpy
 3 | open3d
 4 | opencv-python
 5 | cython
 6 | tensorboardX
 7 | shapely
 8 | einops
 9 | 
10 | 


--------------------------------------------------------------------------------
/v2xvit/tools/loop_inference.py:
--------------------------------------------------------------------------------
1 | import os   
2 | 
3 | for index in range(15,40,2):
4 |   cmd = f"CUDA_VISIBLE_DEVICES=1 python /home/gaojing/zjy/v2x-vit/v2xvit/tools/inference.py --eval_epoch {index} "
5 |   print(f"Running command: {cmd}")
6 |   os.system(cmd)


--------------------------------------------------------------------------------
/v2xvit/utils/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | import numpy
4 | setup(
5 |     name='box overlaps',
6 |     ext_modules=cythonize('v2xvit/utils/box_overlaps.pyx'),
7 |     include_dirs=[numpy.get_include()]
8 | )


--------------------------------------------------------------------------------
/v2xvit/data_utils/post_processor/__init__.py:
--------------------------------------------------------------------------------
 1 | from v2xvit.data_utils.post_processor.voxel_postprocessor import VoxelPostprocessor
 2 | from v2xvit.data_utils.post_processor.bev_postprocessor import BevPostprocessor
 3 | 
 4 | __all__ = {
 5 |     'VoxelPostprocessor': VoxelPostprocessor,
 6 |     'BevPostprocessor': BevPostprocessor,
 7 | }
 8 | 
 9 | 
10 | def build_postprocessor(anchor_cfg, train):
11 |     process_method_name = anchor_cfg['core_method']
12 |     assert process_method_name in ['VoxelPostprocessor', 'BevPostprocessor']
13 |     anchor_generator = __all__[process_method_name](
14 |         anchor_params=anchor_cfg,
15 |         train=train
16 |     )
17 | 
18 |     return anchor_generator
19 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/f_cooper_fuse.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implementation of F-cooper maxout fusing.
 3 | """
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | 
 8 | class SpatialFusion(nn.Module):
 9 |     def __init__(self):
10 |         super(SpatialFusion, self).__init__()
11 | 
12 |     def regroup(self, x, record_len):
13 |         cum_sum_len = torch.cumsum(record_len, dim=0)
14 |         split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
15 |         return split_x
16 | 
17 |     def forward(self, x, record_len):
18 |         # x: B, C, H, W, split x:[(B1, C, W, H), (B2, C, W, H)]
19 |         split_x = self.regroup(x, record_len)
20 |         out = []
21 | 
22 |         for xx in split_x:
23 |             xx = torch.max(xx, dim=0, keepdim=True)[0]
24 |             out.append(xx)
25 |         return torch.cat(out, dim=0)


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Author: Runsheng Xu <rxx3386@ucla.edu>
 3 | # License: TDG-Attribution-NonCommercial-NoDistrib
 4 | 
 5 | 
 6 | from os.path import dirname, realpath
 7 | from setuptools import setup, find_packages, Distribution
 8 | from v2xvit.version import __version__
 9 | 
10 | 
11 | def _read_requirements_file():
12 |     """Return the elements in requirements.txt."""
13 |     req_file_path = '%s/requirements.txt' % dirname(realpath(__file__))
14 |     with open(req_file_path) as f:
15 |         return [line.strip() for line in f]
16 | 
17 | 
18 | setup(
19 |     name='V2XViT',
20 |     version=__version__,
21 |     packages=find_packages(),
22 |     url='https://github.com/ucla-mobility/OpenCDA.git',
23 |     license='MIT',
24 |     author='Runsheng Xu, Hao Xiang, Zhengzhong Tu',
25 |     author_email='rxx3386@ucla.edu',
26 |     description='An opensource pytorch framework for autonomous driving '
27 |                 'cooperative detection',
28 |     long_description=open("README.md").read(),
29 |     install_requires=_read_requirements_file(),
30 | )
31 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/naive_compress.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class NaiveCompressor(nn.Module):
 6 |     def __init__(self, input_dim, compress_raito):
 7 |         super().__init__()
 8 |         self.encoder = nn.Sequential(
 9 |             nn.Conv2d(input_dim, input_dim//compress_raito, kernel_size=3,
10 |                       stride=1, padding=1),
11 |             nn.BatchNorm2d(input_dim//compress_raito, eps=1e-3, momentum=0.01),
12 |             nn.ReLU()
13 |         )
14 |         self.decoder = nn.Sequential(
15 |             nn.Conv2d(input_dim//compress_raito, input_dim, kernel_size=3,
16 |                       stride=1, padding=1),
17 |             nn.BatchNorm2d(input_dim, eps=1e-3, momentum=0.01),
18 |             nn.ReLU(),
19 |             nn.Conv2d(input_dim, input_dim, kernel_size=3, stride=1, padding=1),
20 |             nn.BatchNorm2d(input_dim, eps=1e-3,
21 |                            momentum=0.01),
22 |             nn.ReLU()
23 |         )
24 | 
25 |     def forward(self, x):
26 |         x = self.encoder(x)
27 |         x = self.decoder(x)
28 | 
29 |         return x


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Runsheng Xu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/pre_processor/__init__.py:
--------------------------------------------------------------------------------
 1 | from v2xvit.data_utils.pre_processor.base_preprocessor import BasePreprocessor
 2 | from v2xvit.data_utils.pre_processor.voxel_preprocessor import VoxelPreprocessor
 3 | from v2xvit.data_utils.pre_processor.bev_preprocessor import BevPreprocessor
 4 | from v2xvit.data_utils.pre_processor.sp_voxel_preprocessor import SpVoxelPreprocessor
 5 | 
 6 | __all__ = {
 7 |     'BasePreprocessor': BasePreprocessor,
 8 |     'VoxelPreprocessor': VoxelPreprocessor,
 9 |     'BevPreprocessor': BevPreprocessor,
10 |     'SpVoxelPreprocessor': SpVoxelPreprocessor
11 | }
12 | 
13 | 
14 | def build_preprocessor(preprocess_cfg, train):
15 |     process_method_name = preprocess_cfg['core_method']
16 |     error_message = f"{process_method_name} is not found. " \
17 |                      f"Please add your processor file's name in opencood/" \
18 |                      f"data_utils/processor/init.py"
19 |     assert process_method_name in ['BasePreprocessor', 'VoxelPreprocessor',
20 |                                    'BevPreprocessor', 'SpVoxelPreprocessor'], \
21 |         error_message
22 | 
23 |     processor = __all__[process_method_name](
24 |         preprocess_params=preprocess_cfg,
25 |         train=train
26 |     )
27 | 
28 |     return processor
29 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from v2xvit.data_utils.datasets.late_fusion_dataset import LateFusionDataset
 2 | from v2xvit.data_utils.datasets.early_fusion_dataset import EarlyFusionDataset
 3 | from v2xvit.data_utils.datasets.intermediate_fusion_dataset import IntermediateFusionDataset
 4 | 
 5 | __all__ = {
 6 |     'LateFusionDataset': LateFusionDataset,
 7 |     'EarlyFusionDataset': EarlyFusionDataset,
 8 |     'IntermediateFusionDataset': IntermediateFusionDataset
 9 | }
10 | 
11 | # the final range for evaluation
12 | GT_RANGE = [-140, -40, -3, 140, 40, 1]
13 | # The communication range for cavs
14 | COM_RANGE = 70
15 | 
16 | 
17 | def build_dataset(dataset_cfg, visualize=False, train=True):
18 |     dataset_name = dataset_cfg['fusion']['core_method']
19 |     error_message = f"{dataset_name} is not found. " \
20 |                     f"Please add your processor file's name in opencood/" \
21 |                     f"data_utils/datasets/init.py"
22 |     assert dataset_name in ['LateFusionDataset', 'EarlyFusionDataset',
23 |                             'IntermediateFusionDataset'], error_message
24 | 
25 |     dataset = __all__[dataset_name](
26 |         params=dataset_cfg,
27 |         visualize=visualize,
28 |         train=train
29 |     )
30 | 
31 |     return dataset
32 | 


--------------------------------------------------------------------------------
/Env.yaml:
--------------------------------------------------------------------------------
 1 | usage: conda-env [-h] {attach,create,export,list,remove,upload,update} ...
 2 | 
 3 | positional arguments:
 4 |   {attach,create,export,list,remove,upload,update}
 5 |     attach              WARNING: This command is deprecated in conda 4.4 and
 6 |                         scheduled for removal in conda 4.5. Embeds information
 7 |                         describing your conda environment into the notebook
 8 |                         metadata
 9 |     create              Create an environment based on an environment file
10 |     export              Export a given environment
11 |     list                List the Conda environments
12 |     remove              Remove an environment
13 |     upload              WARNING: This command is deprecated in conda 4.4 and
14 |                         scheduled for removal in conda 4.5. Upload an
15 |                         environment to anaconda.org
16 |     update              Update the current environment based on environment
17 |                         file
18 | 
19 | optional arguments:
20 |   -h, --help            Show this help message and exit.
21 | 
22 | conda commands available from other packages:
23 |   build
24 |   convert
25 |   develop
26 |   env
27 |   index
28 |   inspect
29 |   metapackage
30 |   render
31 |   server
32 |   skeleton
33 | 


--------------------------------------------------------------------------------
/v2xvit/visualization/vis_data_sequence.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Author: Runsheng Xu <rxx3386@ucla.edu>
 3 | # License: TDG-Attribution-NonCommercial-NoDistrib
 4 | 
 5 | 
 6 | import os
 7 | import argparse
 8 | from torch.utils.data import DataLoader
 9 | 
10 | from v2xvit.hypes_yaml.yaml_utils import load_yaml
11 | from v2xvit.visualization import vis_utils
12 | from v2xvit.data_utils.datasets.early_fusion_vis_dataset import \
13 |     EarlyFusionVisDataset
14 | 
15 | 
16 | def vis_parser():
17 |     parser = argparse.ArgumentParser(description="data visualization")
18 |     parser.add_argument('--color_mode', type=str, default="intensity",
19 |                         help='lidar color rendering mode, e.g. intensity,'
20 |                              'z-value or constant.')
21 |     opt = parser.parse_args()
22 |     return opt
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     current_path = os.path.dirname(os.path.realpath(__file__))
27 |     params = load_yaml(os.path.join(current_path,
28 |                                     '../hypes_yaml/visualization.yaml'))
29 | 
30 |     opencda_dataset = EarlyFusionVisDataset(params, visualize=True,
31 |                                             train=False)
32 |     data_loader = DataLoader(opencda_dataset, batch_size=1, num_workers=8,
33 |                              collate_fn=opencda_dataset.collate_batch_train,
34 |                              shuffle=False,
35 |                              pin_memory=False)
36 | 
37 |     opt = vis_parser()
38 |     vis_utils.visualize_sequence_dataloader(data_loader,
39 |                                             params['postprocess']['order'],
40 |                                             color_mode=opt.color_mode)
41 | 


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/visualization.yaml:
--------------------------------------------------------------------------------
 1 | # this yaml is only for visualization
 2 | name: visualization
 3 | 
 4 | yaml_parser: "load_voxel_params"
 5 | root_dir: 'v2xset/train'
 6 | validate_dir: 'v2xset/validate'
 7 | 
 8 | train_params:
 9 |   batch_size: &batch_size 4
10 |   epoches: 100
11 |   eval_freq: 1
12 |   save_freq: 1
13 | 
14 | fusion:
15 |   core_method: 'EarlyFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
16 |   args: []
17 | 
18 | # preprocess-related
19 | preprocess:
20 |   # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
21 |   core_method: 'SpVoxelPreprocessor'
22 |   args:
23 |     voxel_size: &voxel_size [0.4, 0.4, 0.4]
24 |     max_points_per_voxel: &T 32
25 |     max_voxel_train: 36000
26 |     max_voxel_test: 70000
27 |   # lidar range for each individual cav.
28 |   cav_lidar_range: &cav_lidar [-140.8, -40, -3, 140.8, 40, 1]
29 | 
30 | data_augment:
31 |   - NAME: random_world_flip
32 |     ALONG_AXIS_LIST: [ 'x' ]
33 | 
34 |   - NAME: random_world_rotation
35 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
36 | 
37 |   - NAME: random_world_scaling
38 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
39 | 
40 | # anchor box related
41 | postprocess:
42 |   core_method: 'VoxelPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
43 |   anchor_args:
44 |     cav_lidar_range: *cav_lidar
45 |     l: 3.9
46 |     w: 1.6
47 |     h: 1.56
48 |     r: [0, 90]
49 |     num: &achor_num 2
50 |   target_args:
51 |     pos_threshold: 0.6
52 |     neg_threshold: 0.45
53 |     score_threshold: 0.96
54 |   order: 'hwl' # hwl or lwh
55 |   max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
56 |   nms_thresh: 0.15


--------------------------------------------------------------------------------
/v2xvit/models/fuse_modules/f_cooper_fuse.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Author: Runsheng Xu <rxx3386@ucla.edu>
 3 | # License: TDG-Attribution-NonCommercial-NoDistrib
 4 | 
 5 | 
 6 | """
 7 | Implementation of F-cooper maxout fusing.
 8 | """
 9 | import torch
10 | import torch.nn as nn
11 | import math
12 | from collections import OrderedDict
13 | 
14 | class SpatialFusion(nn.Module):
15 |     def __init__(self):
16 |         super(SpatialFusion, self).__init__()
17 |         # self.conv1 = nn.Conv3d(in_channels=2, out_channels=1, kernel_size=(3,3,3), stride=1, padding=1, groups=1)
18 |         self.conv1 = nn.Sequential(
19 |         OrderedDict(
20 |             [
21 |                 ('conv', nn.Conv3d(2, 1, kernel_size=(3,3,3),stride=1, padding=1, groups=1)),
22 |                 ('activation', nn.ReLU()),
23 |             ]
24 |         )
25 |     )
26 |     def regroup(self, x, record_len):
27 |         cum_sum_len = torch.cumsum(record_len, dim=0)
28 |         split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
29 |         return split_x
30 |     
31 |     def forward(self, x, record_len):
32 |         # x: B, C, H, W, split x:[(B1, C, W, H), (B2, C, W, H)]
33 |         split_x = self.regroup(x, record_len)
34 |         out = []
35 |         
36 |         for xx in split_x:
37 |             xx_max = torch.max(xx, dim=0, keepdim=True)[0]
38 |             xx_avg = torch.mean(xx, dim=0, keepdim=True)
39 |             F_Sp = torch.cat((xx_max,xx_avg),dim = 0).unsqueeze(0)
40 |             # F_Sp = F_Sp.permute(0,2,1,3,4)
41 |             # print(F_Sp.shape)
42 |             # exit()
43 |             # conv = nn.Conv3d(2, 1, kernel_size=(3,3,3), stride=1, padding=1)
44 |             xx = self.conv1(F_Sp)[0]
45 |             # print(aa.shape)
46 |             # exit()
47 |             out.append(xx)
48 |         return torch.cat(out, dim=0)


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/point_pillar_scatter.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class PointPillarScatter(nn.Module):
 6 |     def __init__(self, model_cfg):
 7 |         super().__init__()
 8 | 
 9 |         self.model_cfg = model_cfg
10 |         self.num_bev_features = self.model_cfg['num_features']
11 |         self.nx, self.ny, self.nz = model_cfg['grid_size']
12 |         assert self.nz == 1
13 | 
14 |     def forward(self, batch_dict):
15 |         pillar_features, coords = batch_dict['pillar_features'], batch_dict[
16 |             'voxel_coords']
17 |         batch_spatial_features = []
18 |         batch_size = coords[:, 0].max().int().item() + 1
19 | 
20 |         for batch_idx in range(batch_size):
21 |             spatial_feature = torch.zeros(
22 |                 self.num_bev_features,
23 |                 self.nz * self.nx * self.ny,
24 |                 dtype=pillar_features.dtype,
25 |                 device=pillar_features.device)
26 | 
27 |             batch_mask = coords[:, 0] == batch_idx
28 |             this_coords = coords[batch_mask, :]
29 | 
30 |             indices = this_coords[:, 1] + \
31 |                       this_coords[:, 2] * self.nx + \
32 |                       this_coords[:, 3]
33 |             indices = indices.type(torch.long)
34 | 
35 |             pillars = pillar_features[batch_mask, :]
36 |             pillars = pillars.t()
37 |             spatial_feature[:, indices] = pillars
38 |             batch_spatial_features.append(spatial_feature)
39 | 
40 |         batch_spatial_features = \
41 |             torch.stack(batch_spatial_features, 0)
42 |         batch_spatial_features = \
43 |             batch_spatial_features.view(batch_size, self.num_bev_features *
44 |                                         self.nz, self.ny, self.nx)
45 |         batch_dict['spatial_features'] = batch_spatial_features
46 | 
47 |         return batch_dict
48 | 
49 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/downsample_conv.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Class used to downsample features by 3*3 conv
 3 | """
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | class DoubleConv(nn.Module):
10 |     """
11 |     Double convoltuion
12 |     Args:
13 |         in_channels: input channel num
14 |         out_channels: output channel num
15 |     """
16 | 
17 |     def __init__(self, in_channels, out_channels, kernel_size,
18 |                  stride, padding):
19 |         super().__init__()
20 |         self.double_conv = nn.Sequential(
21 |             nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size,
22 |                       stride=stride, padding=padding),
23 |             nn.ReLU(inplace=True),
24 |             nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
25 |             nn.ReLU(inplace=True)
26 |         )
27 | 
28 |     def forward(self, x):
29 |         return self.double_conv(x)
30 | 
31 | 
32 | class DownsampleConv(nn.Module):
33 |     def __init__(self, config):
34 |         super(DownsampleConv, self).__init__()
35 |         self.layers = nn.ModuleList([])
36 |         input_dim = config['input_dim']
37 | 
38 |         for (ksize, dim, stride, padding) in zip(config['kernal_size'],
39 |                                                  config['dim'],
40 |                                                  config['stride'],
41 |                                                  config['padding']):
42 |             self.layers.append(DoubleConv(input_dim,
43 |                                           dim,
44 |                                           kernel_size=ksize,
45 |                                           stride=stride,
46 |                                           padding=padding))
47 |             input_dim = dim
48 | 
49 |     def forward(self, x):
50 |         for i in range(len(self.layers)):
51 |             x = self.layers[i](x)
52 |         return x


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/fuse_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | from einops import rearrange
 5 | from v2xvit.utils.common_utils import torch_tensor_to_numpy
 6 | 
 7 | 
 8 | def regroup(dense_feature, record_len, max_len):
 9 |     """
10 |     Regroup the data based on the record_len.
11 | 
12 |     Parameters
13 |     ----------
14 |     dense_feature : torch.Tensor
15 |         N, C, H, W
16 |     record_len : list
17 |         [sample1_len, sample2_len, ...]
18 |     max_len : int
19 |         Maximum cav number
20 | 
21 |     Returns
22 |     -------
23 |     regroup_feature : torch.Tensor
24 |         B, L, C, H, W
25 |     """
26 |     cum_sum_len = list(np.cumsum(torch_tensor_to_numpy(record_len)))
27 |     split_features = torch.tensor_split(dense_feature,
28 |                                         cum_sum_len[:-1])
29 |     regroup_features = []
30 |     mask = []
31 | 
32 |     for split_feature in split_features:
33 |         # M, C, H, W
34 |         feature_shape = split_feature.shape
35 | 
36 |         # the maximum M is 5 as most 5 cavs
37 |         padding_len = max_len - feature_shape[0]
38 |         mask.append([1] * feature_shape[0] + [0] * padding_len)
39 | 
40 |         padding_tensor = torch.zeros(padding_len, feature_shape[1],
41 |                                      feature_shape[2], feature_shape[3])
42 |         padding_tensor = padding_tensor.to(split_feature.device)
43 | 
44 |         split_feature = torch.cat([split_feature, padding_tensor],
45 |                                   dim=0)
46 | 
47 |         # 1, 5C, H, W
48 |         split_feature = split_feature.view(-1,
49 |                                            feature_shape[2],
50 |                                            feature_shape[3]).unsqueeze(0)
51 |         regroup_features.append(split_feature)
52 | 
53 |     # B, 5C, H, W
54 |     regroup_features = torch.cat(regroup_features, dim=0)
55 |     # B, L, C, H, W
56 |     regroup_features = rearrange(regroup_features,
57 |                                  'b (l c) h w -> b l c h w',
58 |                                  l=max_len)
59 |     mask = torch.from_numpy(np.array(mask)).to(regroup_features.device)
60 | 
61 |     return regroup_features, mask
62 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/split_attn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class RadixSoftmax(nn.Module):
 7 |     def __init__(self, radix, cardinality):
 8 |         super(RadixSoftmax, self).__init__()
 9 |         self.radix = radix
10 |         self.cardinality = cardinality
11 | 
12 |     def forward(self, x):
13 |         # x: (B, L, 1, 1, 3C)
14 |         batch = x.size(0)
15 |         cav_num = x.size(1)
16 | 
17 |         if self.radix > 1:
18 |             # x: (B, L, 1, 3, C)
19 |             x = x.view(batch,
20 |                        cav_num,
21 |                        self.cardinality, self.radix, -1)
22 |             x = F.softmax(x, dim=3)
23 |             # B, 3LC
24 |             x = x.reshape(batch, -1)
25 |         else:
26 |             x = torch.sigmoid(x)
27 |         return x
28 | 
29 | 
30 | class SplitAttn(nn.Module):
31 |     def __init__(self, input_dim):
32 |         super(SplitAttn, self).__init__()
33 |         self.input_dim = input_dim
34 | 
35 |         self.fc1 = nn.Linear(input_dim, input_dim, bias=False)
36 |         self.bn1 = nn.LayerNorm(input_dim)
37 |         self.act1 = nn.ReLU()
38 |         self.fc2 = nn.Linear(input_dim, input_dim * 3, bias=False)
39 | 
40 |         self.rsoftmax = RadixSoftmax(3, 1)
41 | 
42 |     def forward(self, window_list):
43 |         # window list: [(B, L, H, W, C) * 3]
44 |         assert len(window_list) == 3, 'only 3 windows are supported'
45 | 
46 |         sw, mw, bw = window_list[0], window_list[1], window_list[2]
47 |         B, L = sw.shape[0], sw.shape[1]
48 | 
49 |         # global average pooling, B, L, H, W, C
50 |         x_gap = sw + mw + bw
51 |         # B, L, 1, 1, C
52 |         x_gap = x_gap.mean((2, 3), keepdim=True)
53 |         x_gap = self.act1(self.bn1(self.fc1(x_gap)))
54 |         # B, L, 1, 1, 3C
55 |         x_attn = self.fc2(x_gap)
56 |         # B L 1 1 3C
57 |         x_attn = self.rsoftmax(x_attn).view(B, L, 1, 1, -1)
58 | 
59 |         out = sw * x_attn[:, :, :, :, 0:self.input_dim] + \
60 |               mw * x_attn[:, :, :, :, self.input_dim:2*self.input_dim] +\
61 |               bw * x_attn[:, :, :, :, self.input_dim*2:]
62 | 
63 |         return out
64 | 


--------------------------------------------------------------------------------
/v2xvit/models/point_pillar_intermediate.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Author: Runsheng Xu <rxx3386@ucla.edu>
 3 | # License: TDG-Attribution-NonCommercial-NoDistrib
 4 | 
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | 
 9 | 
10 | from v2xvit.models.sub_modules.pillar_vfe import PillarVFE
11 | from v2xvit.models.sub_modules.point_pillar_scatter import PointPillarScatter
12 | from v2xvit.models.sub_modules.att_bev_backbone import AttBEVBackbone
13 | 
14 | 
15 | class PointPillarIntermediate(nn.Module):
16 |     def __init__(self, args):
17 |         super(PointPillarIntermediate, self).__init__()
18 | 
19 |         # PIllar VFE
20 |         self.pillar_vfe = PillarVFE(args['pillar_vfe'],
21 |                                     num_point_features=4,
22 |                                     voxel_size=args['voxel_size'],
23 |                                     point_cloud_range=args['lidar_range'])
24 | 
25 |         self.scatter = PointPillarScatter(args['point_pillar_scatter'])
26 |         self.backbone = AttBEVBackbone(args['base_bev_backbone'], 64)
27 | 
28 |         self.cls_head = nn.Conv2d(128 * 3, args['anchor_number'],
29 |                                   kernel_size=1)
30 |         self.reg_head = nn.Conv2d(128 * 3, 7 * args['anchor_num'],
31 |                                   kernel_size=1)
32 | 
33 |     def forward(self, data_dict):
34 | 
35 |         voxel_features = data_dict['processed_lidar']['voxel_features']
36 |         voxel_coords = data_dict['processed_lidar']['voxel_coords']
37 |         voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
38 |         record_len = data_dict['record_len']
39 | 
40 |         batch_dict = {'voxel_features': voxel_features,
41 |                       'voxel_coords': voxel_coords,
42 |                       'voxel_num_points': voxel_num_points,
43 |                       'record_len': record_len}
44 | 
45 |         batch_dict = self.pillar_vfe(batch_dict)
46 |         batch_dict = self.scatter(batch_dict)
47 |         batch_dict = self.backbone(batch_dict)
48 | 
49 |         spatial_features_2d = batch_dict['spatial_features_2d']
50 | 
51 |         psm = self.cls_head(spatial_features_2d)
52 |         rm = self.reg_head(spatial_features_2d)
53 | 
54 |         output_dict = {'psm': psm,
55 |                        'rm': rm}
56 | 
57 |         return output_dict


--------------------------------------------------------------------------------
/v2xvit/models/fuse_modules/fuse_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Author: Runsheng Xu <rxx3386@ucla.edu>
 3 | # License: TDG-Attribution-NonCommercial-NoDistrib
 4 | 
 5 | 
 6 | import torch
 7 | import numpy as np
 8 | 
 9 | from einops import rearrange
10 | from v2xvit.utils.common_utils import torch_tensor_to_numpy
11 | 
12 | 
13 | def regroup(dense_feature, record_len, max_len):
14 |     """
15 |     Regroup the data based on the record_len.
16 | 
17 |     Parameters
18 |     ----------
19 |     dense_feature : torch.Tensor
20 |         N, C, H, W
21 |     record_len : list
22 |         [sample1_len, sample2_len, ...]
23 |     max_len : int
24 |         Maximum cav number
25 | 
26 |     Returns
27 |     -------
28 |     regroup_feature : torch.Tensor
29 |         B, L, C, H, W
30 |     """
31 |     cum_sum_len = list(np.cumsum(torch_tensor_to_numpy(record_len)))
32 |     split_features = torch.tensor_split(dense_feature,
33 |                                         cum_sum_len[:-1])
34 |     regroup_features = []
35 |     mask = []
36 | 
37 |     for split_feature in split_features:
38 |         # M, C, H, W
39 |         feature_shape = split_feature.shape
40 | 
41 |         # the maximum M is 5 as most 5 cavs
42 |         padding_len = max_len - feature_shape[0]
43 |         mask.append([1] * feature_shape[0] + [0] * padding_len)
44 | 
45 |         padding_tensor = torch.zeros(padding_len, feature_shape[1],
46 |                                      feature_shape[2], feature_shape[3])
47 |         padding_tensor = padding_tensor.to(split_feature.device)
48 | 
49 |         split_feature = torch.cat([split_feature, padding_tensor],
50 |                                   dim=0)
51 | 
52 |         # 1, 5C, H, W
53 |         split_feature = split_feature.view(-1,
54 |                                            feature_shape[2],
55 |                                            feature_shape[3]).unsqueeze(0)
56 |         regroup_features.append(split_feature)
57 | 
58 |     # B, 5C, H, W
59 |     regroup_features = torch.cat(regroup_features, dim=0)
60 |     # B, L, C, H, W
61 |     regroup_features = rearrange(regroup_features,
62 |                                  'b (l c) h w -> b l c h w',
63 |                                  l=max_len)
64 |     mask = torch.from_numpy(np.array(mask)).to(regroup_features.device)
65 | 
66 |     return regroup_features, mask
67 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/pre_processor/base_preprocessor.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from v2xvit.utils import pcd_utils
 4 | 
 5 | 
 6 | class BasePreprocessor(object):
 7 |     """
 8 |     Basic Lidar pre-processor.
 9 | 
10 |     Parameters
11 |     ----------
12 |     preprocess_params : dict
13 |         The dictionary containing all parameters of the preprocessing.
14 | 
15 |     train : bool
16 |         Train or test mode.
17 |     """
18 | 
19 |     def __init__(self, preprocess_params, train):
20 |         self.params = preprocess_params
21 |         self.train = train
22 | 
23 |     def preprocess(self, pcd_np):
24 |         """
25 |         Preprocess the lidar points by simple sampling.
26 | 
27 |         Parameters
28 |         ----------
29 |         pcd_np : np.ndarray
30 |             The raw lidar.
31 | 
32 |         Returns
33 |         -------
34 |         data_dict : the output dictionary.
35 |         """
36 |         data_dict = {}
37 |         sample_num = self.params['args']['sample_num']
38 | 
39 |         pcd_np = pcd_utils.downsample_lidar(pcd_np, sample_num)
40 |         data_dict['downsample_lidar'] = pcd_np
41 | 
42 |         return data_dict
43 | 
44 |     def project_points_to_bev_map(self, points, ratio=0.1):
45 |         """
46 |         Project points to BEV occupancy map with default ratio=0.1.
47 | 
48 |         Parameters
49 |         ----------
50 |         points : np.ndarray
51 |             (N, 3) / (N, 4)
52 | 
53 |         ratio : float
54 |             Discretization parameters. Default is 0.1.
55 | 
56 |         Returns
57 |         -------
58 |         bev_map : np.ndarray
59 |             BEV occupancy map including projected points with shape
60 |             (img_row, img_col).
61 | 
62 |         """
63 |         L1, W1, H1, L2, W2, H2 = self.params["cav_lidar_range"]
64 |         img_row = int((L2 - L1) / ratio)
65 |         img_col = int((W2 - W1) / ratio)
66 |         bev_map = np.zeros((img_row, img_col))
67 |         bev_origin = np.array([L1, W1, H1]).reshape(1, -1)
68 |         # (N, 3)
69 |         indices = ((points[:, :3] - bev_origin) / ratio).astype(int)
70 |         mask = np.logical_and(indices[:, 0] > 0, indices[:, 0] < img_row)
71 |         mask = np.logical_and(mask, np.logical_and(indices[:, 1] > 0,
72 |                                                    indices[:, 1] < img_col))
73 |         indices = indices[mask, :]
74 |         bev_map[indices[:, 0], indices[:, 1]] = 1
75 |         return bev_map
76 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/auto_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class AutoEncoder(nn.Module):
 6 |     def __init__(self, feature_num, layer_num):
 7 |         super().__init__()
 8 |         self.feature_num = feature_num
 9 |         self.feature_stride = 2
10 | 
11 |         self.encoder = nn.ModuleList()
12 |         self.decoder = nn.ModuleList()
13 | 
14 |         for i in range(layer_num):
15 |             cur_layers = [
16 |                 nn.ZeroPad2d(1),
17 |                 nn.Conv2d(
18 |                     feature_num, feature_num, kernel_size=3,
19 |                     stride=2, padding=0, bias=False
20 |                 ),
21 |                 nn.BatchNorm2d(feature_num, eps=1e-3, momentum=0.01),
22 |                 nn.ReLU()]
23 | 
24 |             cur_layers.extend([
25 |                 nn.Conv2d(feature_num, feature_num // self.feature_stride,
26 |                           kernel_size=3, padding=1, bias=False),
27 |                 nn.BatchNorm2d(feature_num // self.feature_stride,
28 |                                eps=1e-3, momentum=0.01),
29 |                 nn.ReLU()
30 |             ])
31 | 
32 |             self.encoder.append(nn.Sequential(*cur_layers))
33 |             feature_num = feature_num // self.feature_stride
34 | 
35 |         feature_num = self.feature_num
36 |         for i in range(layer_num):
37 |             cur_layers = [nn.Sequential(
38 |                 nn.ConvTranspose2d(
39 |                     feature_num // 2, feature_num,
40 |                     kernel_size=2,
41 |                     stride=2, bias=False
42 |                 ),
43 |                 nn.BatchNorm2d(feature_num,
44 |                                eps=1e-3, momentum=0.01),
45 |                 nn.ReLU()
46 |             )]
47 | 
48 |             cur_layers.extend([nn.Sequential(
49 |                 nn.Conv2d(
50 |                     feature_num, feature_num, kernel_size=3,
51 |                     stride=1, bias=False, padding=1
52 |                 ),
53 |                 nn.BatchNorm2d(feature_num, eps=1e-3,
54 |                                momentum=0.01),
55 |                 nn.ReLU()
56 |             )])
57 |             self.decoder.append(nn.Sequential(*cur_layers))
58 |             feature_num //= 2
59 | 
60 |     def forward(self, x):
61 |         for i in range(len(self.encoder)):
62 |             x = self.encoder[i](x)
63 | 
64 |         for i in range(len(self.decoder)-1, -1, -1):
65 |             x = self.decoder[i](x)
66 | 
67 |         return x


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/self_attn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class ScaledDotProductAttention(nn.Module):
 8 |     """
 9 |     Scaled Dot-Product Attention proposed in "Attention Is All You Need"
10 |     Compute the dot products of the query with all keys, divide each by sqrt(dim),
11 |     and apply a softmax function to obtain the weights on the values
12 |     Args: dim, mask
13 |         dim (int): dimention of attention
14 |         mask (torch.Tensor): tensor containing indices to be masked
15 |     Inputs: query, key, value, mask
16 |         - **query** (batch, q_len, d_model): tensor containing projection vector for decoder.
17 |         - **key** (batch, k_len, d_model): tensor containing projection vector for encoder.
18 |         - **value** (batch, v_len, d_model): tensor containing features of the encoded input sequence.
19 |         - **mask** (-): tensor containing indices to be masked
20 |     Returns: context, attn
21 |         - **context**: tensor containing the context vector from attention mechanism.
22 |         - **attn**: tensor containing the attention (alignment) from the encoder outputs.
23 |     """
24 | 
25 |     def __init__(self, dim):
26 |         super(ScaledDotProductAttention, self).__init__()
27 |         self.sqrt_dim = np.sqrt(dim)
28 | 
29 |     def forward(self, query, key, value):
30 |         score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim
31 |         attn = F.softmax(score, -1)
32 |         context = torch.bmm(attn, value)
33 |         return context
34 | 
35 | 
36 | class AttFusion(nn.Module):
37 |     def __init__(self, feature_dim):
38 |         super(AttFusion, self).__init__()
39 |         self.att = ScaledDotProductAttention(feature_dim)
40 | 
41 |     def forward(self, x, record_len):
42 |         split_x = self.regroup(x, record_len)
43 |         batch_size = len(record_len)
44 |         C, W, H = split_x[0].shape[1:]
45 |         out = []
46 |         for xx in split_x:
47 |             cav_num = xx.shape[0]
48 |             xx = xx.view(cav_num, C, -1).permute(2, 0, 1)
49 |             h = self.att(xx, xx, xx)
50 |             h = h.permute(1, 2, 0).view(cav_num, C, W, H)[0, ...].unsqueeze(0)
51 |             out.append(h)
52 |         return torch.cat(out, dim=0)
53 | 
54 |     def regroup(self, x, record_len):
55 |         cum_sum_len = torch.cumsum(record_len, dim=0)
56 |         split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
57 |         return split_x
58 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | logs/
131 | *.c
132 | *.so
133 | .idea
134 | opv2x
135 | .DS_Store
136 | 


--------------------------------------------------------------------------------
/v2xvit/models/point_pillar.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Vanilla pointpillar for early and late fusion.
 3 | """
 4 | import torch.nn as nn
 5 | 
 6 | from v2xvit.models.sub_modules.pillar_vfe import PillarVFE
 7 | from v2xvit.models.sub_modules.point_pillar_scatter import PointPillarScatter
 8 | from v2xvit.models.sub_modules.base_bev_backbone import BaseBEVBackbone
 9 | from v2xvit.models.sub_modules.downsample_conv import DownsampleConv
10 | 
11 | 
12 | class PointPillar(nn.Module):
13 |     def __init__(self, args):
14 |         super(PointPillar, self).__init__()
15 | 
16 |         # PIllar VFE
17 |         self.pillar_vfe = PillarVFE(args['pillar_vfe'],
18 |                                     num_point_features=4,
19 |                                     voxel_size=args['voxel_size'],
20 |                                     point_cloud_range=args['lidar_range'])
21 |         self.scatter = PointPillarScatter(args['point_pillar_scatter'])
22 |         self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
23 |         # used to downsample the feature map for efficient computation
24 |         self.shrink_flag = False
25 |         if 'shrink_header' in args:
26 |             self.shrink_flag = True
27 |             self.shrink_conv = DownsampleConv(args['shrink_header'])
28 | 
29 |         self.cls_head = nn.Conv2d(args['cls_head_dim'], args['anchor_number'],
30 |                                   kernel_size=1)
31 |         self.reg_head = nn.Conv2d(args['cls_head_dim'],
32 |                                   7 * args['anchor_number'],
33 |                                   kernel_size=1)
34 | 
35 |     def forward(self, data_dict):
36 | 
37 |         voxel_features = data_dict['processed_lidar']['voxel_features']
38 |         voxel_coords = data_dict['processed_lidar']['voxel_coords']
39 |         voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
40 | 
41 |         batch_dict = {'voxel_features': voxel_features,
42 |                       'voxel_coords': voxel_coords,
43 |                       'voxel_num_points': voxel_num_points}
44 | 
45 |         batch_dict = self.pillar_vfe(batch_dict)
46 |         batch_dict = self.scatter(batch_dict)
47 |         batch_dict = self.backbone(batch_dict)
48 | 
49 |         spatial_features_2d = batch_dict['spatial_features_2d']
50 |         if self.shrink_flag:
51 |             spatial_features_2d = self.shrink_conv(spatial_features_2d)
52 | 
53 |         psm = self.cls_head(spatial_features_2d)
54 |         rm = self.reg_head(spatial_features_2d)
55 | 
56 |         output_dict = {'psm': psm,
57 |                        'rm': rm}
58 | 
59 |         return output_dict


--------------------------------------------------------------------------------
/v2xvit/models/fuse_modules/self_attn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Author: Hao Xiang <haxiang@g.ucla.edu>, Runsheng Xu <rxx3386@ucla.edu>
 3 | # License: TDG-Attribution-NonCommercial-NoDistrib
 4 | 
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | import torch.nn as nn
 9 | import torch.nn.functional as F
10 | 
11 | 
12 | class ScaledDotProductAttention(nn.Module):
13 |     """
14 |     Scaled Dot-Product Attention proposed in "Attention Is All You Need"
15 |     Compute the dot products of the query with all keys, divide each by sqrt(dim),
16 |     and apply a softmax function to obtain the weights on the values
17 |     Args: dim, mask
18 |         dim (int): dimention of attention
19 |         mask (torch.Tensor): tensor containing indices to be masked
20 |     Inputs: query, key, value, mask
21 |         - **query** (batch, q_len, d_model): tensor containing projection
22 |           vector for decoder.
23 |         - **key** (batch, k_len, d_model): tensor containing projection
24 |           vector for encoder.
25 |         - **value** (batch, v_len, d_model): tensor containing features of the
26 |           encoded input sequence.
27 |         - **mask** (-): tensor containing indices to be masked
28 |     Returns: context, attn
29 |         - **context**: tensor containing the context vector from
30 |           attention mechanism.
31 |         - **attn**: tensor containing the attention (alignment) from the
32 |           encoder outputs.
33 |     """
34 | 
35 |     def __init__(self, dim):
36 |         super(ScaledDotProductAttention, self).__init__()
37 |         self.sqrt_dim = np.sqrt(dim)
38 | 
39 |     def forward(self, query, key, value):
40 |         score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim
41 |         attn = F.softmax(score, -1)
42 |         context = torch.bmm(attn, value)
43 |         return context
44 | 
45 | 
46 | class AttFusion(nn.Module):
47 |     def __init__(self, feature_dim):
48 |         super(AttFusion, self).__init__()
49 |         self.att = ScaledDotProductAttention(feature_dim)
50 | 
51 |     def forward(self, x, record_len):
52 |         split_x = self.regroup(x, record_len)
53 |         batch_size = len(record_len)
54 |         C, W, H = split_x[0].shape[1:]
55 |         out = []
56 |         for xx in split_x:
57 |             cav_num = xx.shape[0]
58 |             xx = xx.view(cav_num, C, -1).permute(2, 0, 1)
59 |             h = self.att(xx, xx, xx)
60 |             h = h.permute(1, 2, 0).view(cav_num, C, W, H)[0, ...].unsqueeze(0)
61 |             out.append(h)
62 |         return torch.cat(out, dim=0)
63 | 
64 |     def regroup(self, x, record_len):
65 |         cum_sum_len = torch.cumsum(record_len, dim=0)
66 |         split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
67 |         return split_x
68 | 


--------------------------------------------------------------------------------
/v2xvit/tools/debug_utils.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import torch
 4 | from torch.utils.data import DataLoader
 5 | 
 6 | import v2xvit.hypes_yaml.yaml_utils as yaml_utils
 7 | from v2xvit.tools import train_utils
 8 | from v2xvit.data_utils.datasets import build_dataset
 9 | from v2xvit.visualization import vis_utils
10 | 
11 | 
12 | def test_parser():
13 |     parser = argparse.ArgumentParser(description="synthetic data generation")
14 |     parser.add_argument('--model_dir', type=str, required=True,
15 |                         help='Continued training path')
16 |     parser.add_argument('--fusion_method', type=str, default='late',
17 |                         help='late, early or intermediate')
18 |     opt = parser.parse_args()
19 |     return opt
20 | 
21 | 
22 | def test_bev_post_processing():
23 |     opt = test_parser()
24 |     assert opt.fusion_method in ['late', 'early', 'intermediate']
25 | 
26 |     hypes = yaml_utils.load_yaml(None, opt)
27 | 
28 |     print('Dataset Building')
29 |     opencood_dataset = build_dataset(hypes, visualize=True, train=False)
30 |     data_loader = DataLoader(opencood_dataset,
31 |                              batch_size=1,
32 |                              num_workers=0,
33 |                              collate_fn=opencood_dataset.collate_batch_test,
34 |                              shuffle=False,
35 |                              pin_memory=False,
36 |                              drop_last=False)
37 | 
38 |     print('Creating Model')
39 |     model = train_utils.create_model(hypes)
40 |     # we assume gpu is necessary
41 |     if torch.cuda.is_available():
42 |         model.cuda()
43 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
44 | 
45 |     print('Loading Model from checkpoint')
46 |     saved_path = opt.model_dir
47 |     _, model = train_utils.load_saved_model(saved_path, model)
48 |     model.eval()
49 |     for i, batch_data in enumerate(data_loader):
50 |         batch_data = train_utils.to_device(batch_data, device)
51 |         label_map = batch_data["ego"]["label_dict"]["label_map"]
52 |         output_dict = {
53 |             "cls": label_map[:, 0, :, :],
54 |             "reg": label_map[:, 1:, :, :]
55 |         }
56 |         gt_box_tensor, _ = opencood_dataset.post_processor.post_process_debug(
57 |             batch_data["ego"], output_dict)
58 |         vis_utils.visualize_single_sample_output_bev(gt_box_tensor,
59 |                                                      batch_data['ego'][
60 |                                                          'origin_lidar'].squeeze(
61 |                                                          0),
62 |                                                      opencood_dataset)
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     test_bev_post_processing()
67 | 


--------------------------------------------------------------------------------
/docs/data_annotation_tutorial.md:
--------------------------------------------------------------------------------
 1 | ## Data Annotation Introduction
 2 | 
 3 | ---
 4 | We save all groundtruth annotations per agent per timestamp in the yaml files. For instance,
 5 | `2021_08_24_21_29_28/4805/000069.yaml` refers to the data annotations with the perspective of te
 6 | agent 4805 at timestamp 69 in the scenario database `2021_08_24_21_29_28`. Here we go through an example:
 7 | 
 8 | ```yaml
 9 | camera0: # parameters for frontal camera
10 |   cords: # the x,y,z,roll,yaw,pitch under CARLA map coordinate
11 |   - 141.35067749023438
12 |   - -388.642578125
13 |   - 1.0410505533218384
14 |   - 0.07589337974786758
15 |   - 174.18048095703125
16 |   - 0.20690691471099854
17 |   extrinsic: # extrinsic matrix from camera to LiDAR
18 |   - - 0.9999999999999999
19 |     - -5.1230071481984265e-18
20 |     - 9.322129061605055e-20
21 |     - -2.999993025731527
22 |   - - -2.5011383190939924e-18
23 |     - 1.0
24 |     - 1.1458579204685086e-19
25 |     - -3.934422863949294e-06
26 |   - - 2.7713237218713775e-20
27 |     - 3.7310309839064755e-20
28 |     - 1.0
29 |     - 0.8999999040861146
30 |   - - 0.0
31 |     - 0.0
32 |     - 0.0
33 |     - 1.0
34 |   intrinsic: # camera intrinsic matrix
35 |   - - 335.639852470912
36 |     - 0.0
37 |     - 400.0
38 |   - - 0.0
39 |     - 335.639852470912
40 |     - 300.0
41 |   - - 0.0
42 |     - 0.0
43 |     - 1.0
44 | camera1: ... # params of right rear camera
45 | camera2: ... # params of left rear camera
46 | canera3: ... # params of back camera
47 | ego_speed: 18.13 # agent's current speed, km/h
48 | lidar_pose: # LiDAR pose under CARLA map coordinate system
49 | - 144.33
50 | - -388.94
51 | - 1.93
52 | - 0.078
53 | - 174.18
54 | - 0.21
55 | plan_trajectory: # agent's planning trajectory
56 | - - 140.
57 |   - -388
58 |   - 87
59 | predicted_ego_pos: # agent's localization (x,y,z,roll,yaw,pitch) gained from GPS
60 | - 143.78
61 | - -388.94
62 | - 0.036
63 | - 0.080
64 | - -185.95
65 | - 0.18
66 | true_ego_pos: # agent's true localization
67 | - 143.83
68 | - -388.89
69 | - 0.032
70 | - 0.075
71 | - 174.18
72 | - 0.21
73 | vehicles: # the surrounding vehicles that have at least one LiDAR point hit from the agent
74 |   4796: # the id of the vehicle (i.e. object)
75 |     angle: # roll, yaw, pitch under CARLA map coordinate system
76 |     - 0.096
77 |     - -177.86
78 |     - 0.197
79 |     center: # the relative position from bounding box center to the frontal axis of this vehicle
80 |     - 0.0004
81 |     - 0.0005
82 |     - 0.71
83 |     extent: # half length, width and height of the vehicle in meter
84 |     - 2.45
85 |     - 1.06
86 |     - 0.75
87 |     location: # x, y ,z position of the center in the frontal axis of the vehicle under CARLA map coordinate system
88 |     - 158.55
89 |     - -385.75
90 |     - 0.032
91 |     speed: 19.47 # vehicle's speed
92 |   4880: ...
93 | ```
94 | 
95 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/augmentor/augment_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from v2xvit.utils import common_utils
 4 | 
 5 | 
 6 | def random_flip_along_x(gt_boxes, points):
 7 |     """
 8 |     Args:
 9 |         gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
10 |         points: (M, 3 + C)
11 |     Returns:
12 |     """
13 |     enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5])
14 |     if enable:
15 |         gt_boxes[:, 1] = -gt_boxes[:, 1]
16 |         gt_boxes[:, 6] = -gt_boxes[:, 6]
17 |         points[:, 1] = -points[:, 1]
18 | 
19 |         if gt_boxes.shape[1] > 7:
20 |             gt_boxes[:, 8] = -gt_boxes[:, 8]
21 | 
22 |     return gt_boxes, points
23 | 
24 | 
25 | def random_flip_along_y(gt_boxes, points):
26 |     """
27 |     Args:
28 |         gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
29 |         points: (M, 3 + C)
30 |     Returns:
31 |     """
32 |     enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5])
33 |     if enable:
34 |         gt_boxes[:, 0] = -gt_boxes[:, 0]
35 |         gt_boxes[:, 6] = -(gt_boxes[:, 6] + np.pi)
36 |         points[:, 0] = -points[:, 0]
37 | 
38 |         if gt_boxes.shape[1] > 7:
39 |             gt_boxes[:, 7] = -gt_boxes[:, 7]
40 | 
41 |     return gt_boxes, points
42 | 
43 | 
44 | def global_rotation(gt_boxes, points, rot_range):
45 |     """
46 |     Args:
47 |         gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
48 |         points: (M, 3 + C),
49 |         rot_range: [min, max]
50 |     Returns:
51 |     """
52 |     noise_rotation = np.random.uniform(rot_range[0],
53 |                                        rot_range[1])
54 |     points = common_utils.rotate_points_along_z(points[np.newaxis, :, :],
55 |                                                 np.array([noise_rotation]))[0]
56 | 
57 |     gt_boxes[:, 0:3] = \
58 |         common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3],
59 |                                            np.array([noise_rotation]))[0]
60 |     gt_boxes[:, 6] += noise_rotation
61 | 
62 |     if gt_boxes.shape[1] > 7:
63 |         gt_boxes[:, 7:9] = common_utils.rotate_points_along_z(
64 |             np.hstack((gt_boxes[:, 7:9], np.zeros((gt_boxes.shape[0], 1))))[
65 |             np.newaxis, :, :],
66 |             np.array([noise_rotation]))[0][:, 0:2]
67 | 
68 |     return gt_boxes, points
69 | 
70 | 
71 | def global_scaling(gt_boxes, points, scale_range):
72 |     """
73 |     Args:
74 |         gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
75 |         points: (M, 3 + C),
76 |         scale_range: [min, max]
77 |     Returns:
78 |     """
79 |     if scale_range[1] - scale_range[0] < 1e-3:
80 |         return gt_boxes, points
81 |     noise_scale = np.random.uniform(scale_range[0], scale_range[1])
82 |     points[:, :3] *= noise_scale
83 |     gt_boxes[:, :6] *= noise_scale
84 | 
85 |     return gt_boxes, points
86 | 


--------------------------------------------------------------------------------
/v2xvit/utils/transformation_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Transformation utils
  3 | """
  4 | 
  5 | import numpy as np
  6 | 
  7 | 
  8 | def x_to_world(pose):
  9 |     """
 10 |     The transformation matrix from x-coordinate system to carla world system
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     pose : list
 15 |         [x, y, z, roll, yaw, pitch]
 16 | 
 17 |     Returns
 18 |     -------
 19 |     matrix : np.ndarray
 20 |         The transformation matrix.
 21 |     """
 22 |     x, y, z, roll, yaw, pitch = pose[:]
 23 | 
 24 |     # used for rotation matrix
 25 |     c_y = np.cos(np.radians(yaw))
 26 |     s_y = np.sin(np.radians(yaw))
 27 |     c_r = np.cos(np.radians(roll))
 28 |     s_r = np.sin(np.radians(roll))
 29 |     c_p = np.cos(np.radians(pitch))
 30 |     s_p = np.sin(np.radians(pitch))
 31 | 
 32 |     matrix = np.identity(4)
 33 |     # translation matrix
 34 |     matrix[0, 3] = x
 35 |     matrix[1, 3] = y
 36 |     matrix[2, 3] = z
 37 | 
 38 |     # rotation matrix
 39 |     matrix[0, 0] = c_p * c_y
 40 |     matrix[0, 1] = c_y * s_p * s_r - s_y * c_r
 41 |     matrix[0, 2] = -c_y * s_p * c_r - s_y * s_r
 42 |     matrix[1, 0] = s_y * c_p
 43 |     matrix[1, 1] = s_y * s_p * s_r + c_y * c_r
 44 |     matrix[1, 2] = -s_y * s_p * c_r + c_y * s_r
 45 |     matrix[2, 0] = s_p
 46 |     matrix[2, 1] = -c_p * s_r
 47 |     matrix[2, 2] = c_p * c_r
 48 | 
 49 |     return matrix
 50 | 
 51 | 
 52 | def x1_to_x2(x1, x2):
 53 |     """
 54 |     Transformation matrix from x1 to x2.
 55 | 
 56 |     Parameters
 57 |     ----------
 58 |     x1 : list
 59 |         The pose of x1 under world coordinates.
 60 |     x2 : list
 61 |         The pose of x2 under world coordinates.
 62 | 
 63 |     Returns
 64 |     -------
 65 |     transformation_matrix : np.ndarray
 66 |         The transformation matrix.
 67 | 
 68 |     """
 69 |     x1_to_world = x_to_world(x1)
 70 |     x2_to_world = x_to_world(x2)
 71 |     world_to_x2 = np.linalg.inv(x2_to_world)
 72 | 
 73 |     transformation_matrix = np.dot(world_to_x2, x1_to_world)
 74 |     return transformation_matrix
 75 | 
 76 | 
 77 | def dist_to_continuous(p_dist, displacement_dist, res, downsample_rate):
 78 |     """
 79 |     Convert points discretized format to continuous space for BEV representation.
 80 |     Parameters
 81 |     ----------
 82 |     p_dist : numpy.array
 83 |         Points in discretized coorindates.
 84 | 
 85 |     displacement_dist : numpy.array
 86 |         Discretized coordinates of bottom left origin.
 87 | 
 88 |     res : float
 89 |         Discretization resolution.
 90 | 
 91 |     downsample_rate : int
 92 |         Dowmsamping rate.
 93 | 
 94 |     Returns
 95 |     -------
 96 |     p_continuous : numpy.array
 97 |         Points in continuous coorindates.
 98 | 
 99 |     """
100 |     p_dist = np.copy(p_dist)
101 |     p_dist = p_dist + displacement_dist
102 |     p_continuous = p_dist * res * downsample_rate
103 |     return p_continuous
104 | 


--------------------------------------------------------------------------------
/v2xvit/tools/inference_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from collections import OrderedDict
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | 
 7 | from v2xvit.utils.common_utils import torch_tensor_to_numpy
 8 | 
 9 | 
10 | def inference_late_fusion(batch_data, model, dataset):
11 |     """
12 |     Model inference for late fusion.
13 | 
14 |     Parameters
15 |     ----------
16 |     batch_data : dict
17 |     model : opencood.object
18 |     dataset : opencood.LateFusionDataset
19 | 
20 |     Returns
21 |     -------
22 |     pred_box_tensor : torch.Tensor
23 |         The tensor of prediction bounding box after NMS.
24 |     gt_box_tensor : torch.Tensor
25 |         The tensor of gt bounding box.
26 |     """
27 |     output_dict = OrderedDict()
28 | 
29 |     for cav_id, cav_content in batch_data.items():
30 |         output_dict[cav_id] = model(cav_content)
31 | 
32 |     pred_box_tensor, pred_score, gt_box_tensor = \
33 |         dataset.post_process(batch_data,
34 |                              output_dict)
35 | 
36 |     return pred_box_tensor, pred_score, gt_box_tensor
37 | 
38 | 
39 | def inference_early_fusion(batch_data, model, dataset):
40 |     """
41 |     Model inference for early fusion.
42 | 
43 |     Parameters
44 |     ----------
45 |     batch_data : dict
46 |     model : opencood.object
47 |     dataset : opencood.EarlyFusionDataset
48 | 
49 |     Returns
50 |     -------
51 |     pred_box_tensor : torch.Tensor
52 |         The tensor of prediction bounding box after NMS.
53 |     gt_box_tensor : torch.Tensor
54 |         The tensor of gt bounding box.
55 |     """
56 |     output_dict = OrderedDict()
57 |     cav_content = batch_data['ego']
58 | 
59 |     output_dict['ego'] = model(cav_content)
60 | 
61 |     pred_box_tensor, pred_score, gt_box_tensor = \
62 |         dataset.post_process(batch_data,
63 |                              output_dict)
64 | 
65 |     return pred_box_tensor, pred_score, gt_box_tensor
66 | 
67 | 
68 | def inference_intermediate_fusion(batch_data, model, dataset):
69 |     """
70 |     Model inference for early fusion.
71 | 
72 |     Parameters
73 |     ----------
74 |     batch_data : dict
75 |     model : opencood.object
76 |     dataset : opencood.EarlyFusionDataset
77 | 
78 |     Returns
79 |     -------
80 |     pred_box_tensor : torch.Tensor
81 |         The tensor of prediction bounding box after NMS.
82 |     gt_box_tensor : torch.Tensor
83 |         The tensor of gt bounding box.
84 |     """
85 |     return inference_early_fusion(batch_data, model, dataset)
86 | 
87 | 
88 | def save_prediction_gt(pred_tensor, gt_tensor, pcd, timestamp, save_path):
89 |     """
90 |     Save prediction and gt tensor to txt file.
91 |     """
92 |     pred_np = torch_tensor_to_numpy(pred_tensor)
93 |     gt_np = torch_tensor_to_numpy(gt_tensor)
94 |     pcd_np = torch_tensor_to_numpy(pcd)
95 | 
96 |     np.save(os.path.join(save_path, '%04d_pcd.npy' % timestamp), pcd_np)
97 |     np.save(os.path.join(save_path, '%04d_pred.npy' % timestamp), pred_np)
98 |     np.save(os.path.join(save_path, '%04d_gt.npy' % timestamp), gt_np)
99 | 


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/point_pillar_early_fusion.yaml:
--------------------------------------------------------------------------------
  1 | name: point_pillar_early_fusion
  2 | root_dir: '/data/opv2v/train'
  3 | validate_dir: '/data/opv2v/validate'
  4 | yaml_parser: "load_point_pillar_params"
  5 | 
  6 | wild_setting:
  7 |   async: false
  8 |   async_overhead: 60
  9 |   seed: 20
 10 |   loc_err: false
 11 |   xyz_std: 0.2
 12 |   ryp_std: 0.2
 13 | 
 14 | train_params:
 15 |   batch_size: &batch_size 4
 16 |   epoches: 22
 17 |   eval_freq: 1
 18 |   save_freq: 1
 19 | 
 20 | fusion:
 21 |   core_method: 'EarlyFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
 22 |   args: []
 23 | 
 24 | # preprocess-related
 25 | preprocess:
 26 |   # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
 27 |   core_method: 'SpVoxelPreprocessor'
 28 |   args:
 29 |     voxel_size: &voxel_size [0.4, 0.4, 4]
 30 |     max_points_per_voxel: 32
 31 |     max_voxel_train: 32000
 32 |     max_voxel_test: 70000
 33 |   # lidar range for each individual cav.
 34 |   cav_lidar_range: &cav_lidar [-140.8, -40, -3, 140.8, 40, 1]
 35 | 
 36 | data_augment:
 37 |   - NAME: random_world_flip
 38 |     ALONG_AXIS_LIST: [ 'x' ]
 39 | 
 40 |   - NAME: random_world_rotation
 41 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 42 | 
 43 |   - NAME: random_world_scaling
 44 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 45 | 
 46 | # anchor box related
 47 | postprocess:
 48 |   core_method: 'VoxelPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
 49 |   anchor_args:
 50 |     cav_lidar_range: *cav_lidar
 51 |     l: 3.9
 52 |     w: 1.6
 53 |     h: 1.56
 54 |     r: [0, 90]
 55 |     num: &achor_num 2
 56 |     feature_stride: 4
 57 |   target_args:
 58 |     pos_threshold: 0.6
 59 |     neg_threshold: 0.45
 60 |     score_threshold: 0.20
 61 |   order: 'hwl' # hwl or lwh
 62 |   max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
 63 |   nms_thresh: 0.15
 64 | 
 65 | # model related
 66 | model:
 67 |   core_method: point_pillar
 68 |   args:
 69 |     voxel_size: *voxel_size
 70 |     lidar_range: *cav_lidar
 71 |     anchor_number: *achor_num
 72 |     pillar_vfe:
 73 |       use_norm: true
 74 |       with_distance: false
 75 |       use_absolute_xyz: true
 76 |       num_filters: [64]
 77 |     point_pillar_scatter:
 78 |       num_features: 64
 79 | 
 80 |     base_bev_backbone:
 81 |       layer_nums: [3, 5, 8]
 82 |       layer_strides: [2, 2, 2]
 83 |       num_filters: [64, 128, 256]
 84 |       upsample_strides: [1, 2, 4]
 85 |       num_upsample_filter: [128, 128, 128]
 86 | 
 87 |     shrink_header:
 88 |       kernal_size: [ 3 ]
 89 |       stride: [ 2 ]
 90 |       padding: [ 1 ]
 91 |       dim: [ 256 ]
 92 |       input_dim: 384 # 128 * 3
 93 | 
 94 |     cls_head_dim: 256
 95 | 
 96 |     anchor_num: *achor_num
 97 | 
 98 | loss:
 99 |   core_method: point_pillar_loss
100 |   args:
101 |     cls_weight: 1.0
102 |     reg: 2.0
103 | 
104 | optimizer:
105 |   core_method: Adam
106 |   lr: 0.002
107 |   args:
108 |     eps: 1e-10
109 |     weight_decay: 1e-4
110 | 
111 | lr_scheduler:
112 |   core_method: multistep #step, multistep and Exponential support
113 |   gamma: 0.1
114 |   step_size: [20, 30]
115 | 
116 | 


--------------------------------------------------------------------------------
/docs/data_intro.md:
--------------------------------------------------------------------------------
 1 | ## Data Introduction
 2 | 
 3 | ---
 4 | 
 5 | V2XSet data is structured as following:
 6 | 
 7 | ```sh
 8 | V2XSet
 9 | ├── train # data for training
10 | │   ├── 2021_08_22_21_41_24  # scenario folder
11 | │     ├── data_protocol.yaml # the simulation parameters used to collect the data in Carla
12 | │     └──  -1 # The infra's id 
13 | │       └── 00000.pcd - 00700.pcd # the point clouds data from timestamp 0 to 700
14 | │       ├── 00000.yaml - 00700.yaml # corresponding metadata for each timestamp
15 | │       ├── 00000_camera0.png - 00700_camera0.png # frontal camera images
16 | │       ├── 00000_camera1.png - 00700_camera1.png # right rear camera images
17 | │       ├── 00000_camera2.png - 00700_camera2.png # left rear camera images
18 | │       └── 00000_camera3.png - 00700_camera3.png # back camera images
19 | |      └──  112 # The connected vehicle id
20 | ├── validate  
21 | ├── test
22 | ```
23 | 
24 | ### 1. Data Split
25 | OPV2V dataset can be divided into 4 different folders: `train`, `validation`, `test`
26 | - `train`: contains all training data
27 | - `validate`: used for validation during training
28 | - `test`: test set 
29 | 
30 | ### 2. Scenario Database
31 | V2XSet has 58 scenarios in total, where each of them contains data stream from different agents across different timestamps.
32 | Each scenario is named by the time it was gathered, e.g., `2021_08_22_21_41_24`.
33 | 
34 | ### 3. Agent Contents
35 | Under each scenario folder,  the data of every intelligent agent~(i.e. infrastructure or connected automated vehicle) appearing in the current scenario is saved in different folders. Each folder is named by the agent's unique id, e.g., 1732. <strong>A negative id means infrastructure.</strong>
36 | 
37 | In each agent folder, data across different timestamps will be saved. Those timestamps are represented by five digits integers
38 | as the prefix of the filenames (e.g., 00700.pcd). There are three types of files inside the agent folders: LiDAR point clouds (`.pcd` files), camera images (`.png` files), and metadata (`.yaml` files).
39 | 
40 | #### 3.1 Lidar point cloud
41 | The LiDAR data is saved with Open3d package and has a postfix ".pcd" in the name. 
42 | 
43 | #### 3.2 Camera images
44 | Each CAV and Infra is equipped with 4 RGB cameras to capture the 360 degree of view of the surrounding scene.`camera0`, `camera1`, `camera2`, and `camera3` represent the front, right rear, left rear, and back cameras respectively.
45 | 
46 | #### 3.3  Data Annotation
47 | All the metadata is saved in yaml files. It records the following important information at the current timestamp:
48 | - **ego information**:  Current ego pose with and without GPS noise under Carla world coordinates, ego speed in km/h, the LiDAR pose, and future planning trajectories. 
49 | - **calibration**: The intrinsic matrix and extrinsic matrix from each camera to the LiDAR sensor.
50 | - **objects annotation**: The pose and velocity of each surrounding human driving vehicle that has at least one point hit by the agent's LiDAR sensor. See [data annotation section](data_annotation_tutorial.md) for more details. 
51 | 
52 | ### 4. Data Collection Protocol
53 | Besides agent contents, every scenario database also has a yaml file named `data_protocol.yaml`. 
54 | This yaml file records the simulation configuration to collect the current scenario.
55 | 
56 | 


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/point_pillar_late_fusion.yaml:
--------------------------------------------------------------------------------
  1 | name: point_pillar_late_fusion
  2 | root_dir: '/data/opv2v/train'
  3 | validate_dir: '/data/opv2v/validate'
  4 | yaml_parser: "load_point_pillar_params"
  5 | 
  6 | wild_setting:
  7 |   async: false
  8 |   async_overhead: 100
  9 |   seed: 20
 10 |   loc_err: false
 11 |   xyz_std: 0.2
 12 |   ryp_std: 0.2
 13 | 
 14 | noise_setting:
 15 |   add_noise: false
 16 |   args: 
 17 |     pos_std: 0
 18 |     rot_std: 0
 19 |     pos_mean: 0
 20 |     rot_mean: 0
 21 | 
 22 | 
 23 | train_params:
 24 |   batch_size: &batch_size 8
 25 |   epoches: 25
 26 |   eval_freq: 1
 27 |   save_freq: 1
 28 | 
 29 | fusion:
 30 |   core_method: 'LateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
 31 |   args: []
 32 | 
 33 | # preprocess-related
 34 | preprocess:
 35 |   # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
 36 |   core_method: 'SpVoxelPreprocessor'
 37 |   args:
 38 |     voxel_size: &voxel_size [0.4, 0.4, 4]
 39 |     max_points_per_voxel: 32
 40 |     max_voxel_train: 16000
 41 |     max_voxel_test: 40000
 42 |   # lidar range for each individual cav.
 43 |   cav_lidar_range: &cav_lidar [-70.4, -40, -3, 70.4, 40, 1]
 44 | 
 45 | data_augment:
 46 |   - NAME: random_world_flip
 47 |     ALONG_AXIS_LIST: [ 'x' ]
 48 | 
 49 |   - NAME: random_world_rotation
 50 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 51 | 
 52 |   - NAME: random_world_scaling
 53 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 54 | 
 55 | # anchor box related
 56 | postprocess:
 57 |   core_method: 'VoxelPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
 58 |   anchor_args:
 59 |     cav_lidar_range: *cav_lidar
 60 |     l: 3.9
 61 |     w: 1.6
 62 |     h: 1.56
 63 |     r: [0, 90]
 64 |     feature_stride: 4
 65 |     num: &achor_num 2
 66 |   target_args:
 67 |     pos_threshold: 0.6
 68 |     neg_threshold: 0.45
 69 |     score_threshold: 0.20
 70 |   order: 'hwl' # hwl or lwh
 71 |   max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
 72 |   nms_thresh: 0.15
 73 | 
 74 | # model related
 75 | model:
 76 |   core_method: point_pillar
 77 |   args:
 78 |     voxel_size: *voxel_size
 79 |     lidar_range: *cav_lidar
 80 |     anchor_number: *achor_num
 81 |     pillar_vfe:
 82 |       use_norm: true
 83 |       with_distance: false
 84 |       use_absolute_xyz: true
 85 |       num_filters: [64]
 86 |     point_pillar_scatter:
 87 |       num_features: 64
 88 | 
 89 |     base_bev_backbone:
 90 |       layer_nums: [3, 5, 8]
 91 |       layer_strides: [2, 2, 2]
 92 |       num_filters: [64, 128, 256]
 93 |       upsample_strides: [1, 2, 4]
 94 |       num_upsample_filter: [128, 128, 128]
 95 | 
 96 |     shrink_header:
 97 |       kernal_size: [ 3 ]
 98 |       stride: [ 2 ]
 99 |       padding: [ 1 ]
100 |       dim: [ 256 ]
101 |       input_dim: 384 # 128 * 3
102 | 
103 |     cls_head_dim: 256
104 | 
105 |     anchor_num: *achor_num
106 | 
107 | loss:
108 |   core_method: point_pillar_loss
109 |   args:
110 |     cls_weight: 1.0
111 |     reg: 2.0
112 | 
113 | optimizer:
114 |   core_method: Adam
115 |   lr: 0.002
116 |   args:
117 |     eps: 1e-10
118 |     weight_decay: 1e-4
119 | 
120 | lr_scheduler:
121 |   core_method: multistep #step, multistep and Exponential support
122 |   gamma: 0.1
123 |   step_size: [20, 30]
124 | 
125 | 


--------------------------------------------------------------------------------
/v2xvit/loss/voxel_net_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class VoxelNetLoss(nn.Module):
 7 |     def __init__(self, args):
 8 |         super(VoxelNetLoss, self).__init__()
 9 |         self.smoothl1loss = nn.SmoothL1Loss(size_average=False)
10 |         self.alpha = args['alpha']
11 |         self.beta = args['beta']
12 |         self.reg_coe = args['reg']
13 |         self.loss_dict = {}
14 | 
15 |     def forward(self, output_dict, target_dict):
16 |         """
17 |         Parameters
18 |         ----------
19 |         output_dict : dict
20 |         target_dict : dict
21 |         """
22 |         rm = output_dict['rm']
23 |         psm = output_dict['psm']
24 | 
25 |         pos_equal_one = target_dict['pos_equal_one']
26 |         neg_equal_one = target_dict['neg_equal_one']
27 |         targets = target_dict['targets']
28 | 
29 |         p_pos = F.sigmoid(psm.permute(0, 2, 3, 1))
30 |         rm = rm.permute(0, 2, 3, 1).contiguous()
31 |         rm = rm.view(rm.size(0), rm.size(1), rm.size(2), -1, 7)
32 |         targets = targets.view(targets.size(0), targets.size(1),
33 |                                targets.size(2), -1, 7)
34 |         pos_equal_one_for_reg = pos_equal_one.unsqueeze(
35 |             pos_equal_one.dim()).expand(-1, -1, -1, -1, 7)
36 | 
37 |         rm_pos = rm * pos_equal_one_for_reg
38 |         targets_pos = targets * pos_equal_one_for_reg
39 | 
40 |         cls_pos_loss = -pos_equal_one * torch.log(p_pos + 1e-6)
41 |         cls_pos_loss = cls_pos_loss.sum() / (pos_equal_one.sum() + 1e-6)
42 | 
43 |         cls_neg_loss = -neg_equal_one * torch.log(1 - p_pos + 1e-6)
44 |         cls_neg_loss = cls_neg_loss.sum() / (neg_equal_one.sum() + 1e-6)
45 | 
46 |         reg_loss = self.smoothl1loss(rm_pos, targets_pos)
47 |         reg_loss = reg_loss / (pos_equal_one.sum() + 1e-6)
48 |         conf_loss = self.alpha * cls_pos_loss + self.beta * cls_neg_loss
49 | 
50 |         total_loss = self.reg_coe * reg_loss + conf_loss
51 | 
52 |         self.loss_dict.update({'total_loss': total_loss,
53 |                                'reg_loss': reg_loss,
54 |                                'conf_loss': conf_loss})
55 | 
56 |         return total_loss
57 | 
58 |     def logging(self, epoch, batch_id, batch_len, writer):
59 |         """
60 |         Print out  the loss function for current iteration.
61 | 
62 |         Parameters
63 |         ----------
64 |         epoch : int
65 |             Current epoch for training.
66 |         batch_id : int
67 |             The current batch.
68 |         batch_len : int
69 |             Total batch length in one iteration of training,
70 |         writer : SummaryWriter
71 |             Used to visualize on tensorboard
72 |         """
73 |         total_loss = self.loss_dict['total_loss']
74 |         reg_loss = self.loss_dict['reg_loss']
75 |         conf_loss = self.loss_dict['conf_loss']
76 | 
77 |         print("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f"
78 |               " || Loc Loss: %.4f" % (
79 |                   epoch, batch_id + 1, batch_len,
80 |                   total_loss.item(), conf_loss.item(), reg_loss.item()))
81 | 
82 |         writer.add_scalar('Regression_loss', reg_loss.item(),
83 |                           epoch*batch_len + batch_id)
84 |         writer.add_scalar('Confidence_loss', conf_loss.item(),
85 |                           epoch*batch_len + batch_id)
86 | 


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/point_pillar_cobevt.yaml:
--------------------------------------------------------------------------------
  1 | name: corpbevtlidar
  2 | root_dir: '/data/opv2v/train'
  3 | validate_dir: '/data/opv2v/validate'
  4 | 
  5 | wild_setting:
  6 |   async: false
  7 |   async_overhead: 100
  8 |   seed: 20
  9 |   loc_err: false
 10 |   xyz_std: 0.2
 11 |   ryp_std: 0.2
 12 |   data_size: 1.06 # Mb!!
 13 |   transmission_speed: 27 # Mbps!!
 14 |   backbone_delay: 10 # ms
 15 | 
 16 | yaml_parser: "load_point_pillar_params"
 17 | train_params:
 18 |   batch_size: &batch_size 2
 19 |   epoches: &epoches 90
 20 |   eval_freq: 2
 21 |   save_freq: 1
 22 |   max_cav: &max_cav 5
 23 | 
 24 | fusion:
 25 |   core_method: 'IntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
 26 |   args:
 27 |     cur_ego_pose_flag: true
 28 | 
 29 | # preprocess-related
 30 | preprocess:
 31 |   # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
 32 |   core_method: 'SpVoxelPreprocessor'
 33 |   args:
 34 |     voxel_size: &voxel_size [0.4, 0.4, 4]
 35 |     max_points_per_voxel: 32
 36 |     max_voxel_train: 32000
 37 |     max_voxel_test: 70000
 38 |   # lidar range for each individual cav.
 39 |   cav_lidar_range: &cav_lidar [-140.8, -38.4, -3, 140.8, 38.4, 1]
 40 | 
 41 | data_augment:
 42 |   - NAME: random_world_flip
 43 |     ALONG_AXIS_LIST: [ 'x' ]
 44 | 
 45 |   - NAME: random_world_rotation
 46 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 47 | 
 48 |   - NAME: random_world_scaling
 49 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 50 | 
 51 | # anchor box related
 52 | postprocess:
 53 |   core_method: 'VoxelPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
 54 |   anchor_args:
 55 |     cav_lidar_range: *cav_lidar
 56 |     l: 3.9
 57 |     w: 1.6
 58 |     h: 1.56
 59 |     r: [0, 90]
 60 |     feature_stride: 4
 61 |     num: &achor_num 2
 62 |   target_args:
 63 |     pos_threshold: 0.6
 64 |     neg_threshold: 0.45
 65 |     score_threshold: 0.20
 66 |   order: 'hwl' # hwl or lwh
 67 |   max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
 68 |   nms_thresh: 0.15
 69 | 
 70 | # model related
 71 | model:
 72 |   core_method: point_pillar_cobevt
 73 |   args:
 74 |     voxel_size: *voxel_size
 75 |     lidar_range: *cav_lidar
 76 |     anchor_number: *achor_num
 77 |     max_cav: *max_cav
 78 |     compression: 0 # compression rate
 79 |     backbone_fix: false
 80 | 
 81 |     pillar_vfe:
 82 |       use_norm: true
 83 |       with_distance: false
 84 |       use_absolute_xyz: true
 85 |       num_filters: [64]
 86 |     point_pillar_scatter:
 87 |       num_features: 64
 88 | 
 89 |     base_bev_backbone:
 90 |       layer_nums: [3, 5, 8]
 91 |       layer_strides: [2, 2, 2]
 92 |       num_filters: [64, 128, 256]
 93 |       upsample_strides: [1, 2, 4]
 94 |       num_upsample_filter: [128, 128, 128]
 95 |     shrink_header:
 96 |       kernal_size: [3]
 97 |       stride: [2]
 98 |       padding: [1]
 99 |       dim: [256]
100 |       input_dim: 384 # 128 * 3
101 | 
102 |     fax_fusion:
103 |       input_dim: 256
104 |       mlp_dim: 256
105 |       agent_size: *max_cav
106 |       window_size: 4
107 |       dim_head: 32
108 |       drop_out: 0.1
109 |       depth: 3
110 |       mask: true
111 | 
112 | 
113 |       # add decoder later
114 | 
115 | loss:
116 |   core_method: point_pillar_loss
117 |   args:
118 |     cls_weight: 1.0
119 |     reg: 2.0
120 | 
121 | optimizer:
122 |   core_method: Adam
123 |   lr: 0.001
124 |   args:
125 |     eps: 1e-10
126 |     weight_decay: 1e-4
127 | 
128 | lr_scheduler:
129 |   core_method: multistep #step, multistep and Exponential support
130 |   gamma: 0.1
131 |   step_size: [15, 50]
132 | 
133 | 


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/point_pillar_fcooper.yaml:
--------------------------------------------------------------------------------
  1 | name: point_pillar_fcooper
  2 | root_dir: 'v2xset/train'
  3 | validate_dir: 'v2xset/validate'
  4 | wild_setting:
  5 |   async: false
  6 |   async_overhead: 100
  7 |   seed: 20
  8 |   loc_err: false
  9 |   xyz_std: 0.2
 10 |   ryp_std: 0.2
 11 |   data_size: 1.06 # Mb!!
 12 |   transmission_speed: 27 # Mbps!!
 13 |   backbone_delay: 10 # ms
 14 | 
 15 | yaml_parser: "load_point_pillar_params"
 16 | train_params:
 17 |   batch_size: &batch_size 4
 18 |   epoches: 60
 19 |   eval_freq: 1
 20 |   save_freq: 1
 21 |   max_cav: &max_cav 5
 22 | 
 23 | fusion:
 24 |   core_method: 'IntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
 25 |   args:
 26 |     cur_ego_pose_flag: True
 27 |       # when the cur_ego_pose_flag is set to True, there is no time gap
 28 |       # between  the time when the LiDAR data is captured by connected
 29 |       # agents and when the extracted features are received by
 30 |       # the ego vehicle, which is equal to implement STCM. When set to False,
 31 |       # STCM has to be used.
 32 | 
 33 | 
 34 | # preprocess-related
 35 | preprocess:
 36 |   # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
 37 |   core_method: 'SpVoxelPreprocessor'
 38 |   args:
 39 |     voxel_size: &voxel_size [0.4, 0.4, 4]
 40 |     max_points_per_voxel: 32
 41 |     max_voxel_train: 32000
 42 |     max_voxel_test: 70000
 43 |   # lidar range for each individual cav.
 44 |   cav_lidar_range: &cav_lidar [-140.8, -38.4, -3, 140.8, 38.4, 1]
 45 | 
 46 | data_augment:
 47 |   - NAME: random_world_flip
 48 |     ALONG_AXIS_LIST: [ 'x' ]
 49 | 
 50 |   - NAME: random_world_rotation
 51 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 52 | 
 53 |   - NAME: random_world_scaling
 54 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 55 | 
 56 | # anchor box related
 57 | postprocess:
 58 |   core_method: 'VoxelPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
 59 |   anchor_args:
 60 |     cav_lidar_range: *cav_lidar
 61 |     l: 3.9
 62 |     w: 1.6
 63 |     h: 1.56
 64 |     r: [0, 90]
 65 |     feature_stride: 4
 66 |     num: &achor_num 2
 67 |   target_args:
 68 |     pos_threshold: 0.6
 69 |     neg_threshold: 0.45
 70 |     score_threshold: 0.20
 71 |   order: 'hwl' # hwl or lwh
 72 |   max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
 73 |   nms_thresh: 0.15
 74 | 
 75 | # model related
 76 | model:
 77 |   core_method: point_pillar_fcooper
 78 |   args:
 79 |     voxel_size: *voxel_size
 80 |     lidar_range: *cav_lidar
 81 |     anchor_number: *achor_num
 82 |     max_cav: *max_cav
 83 |     compression: 0 # compression rate
 84 |     backbone_fix: false
 85 | 
 86 |     pillar_vfe:
 87 |       use_norm: true
 88 |       with_distance: false
 89 |       use_absolute_xyz: true
 90 |       num_filters: [64]
 91 |     point_pillar_scatter:
 92 |       num_features: 64
 93 | 
 94 |     base_bev_backbone:
 95 |       layer_nums: [3, 5, 8]
 96 |       layer_strides: [2, 2, 2]
 97 |       num_filters: [64, 128, 256]
 98 |       upsample_strides: [1, 2, 4]
 99 |       num_upsample_filter: [128, 128, 128]
100 |     shrink_header:
101 |       kernal_size: [3]
102 |       stride: [2]
103 |       padding: [1]
104 |       dim: [256]
105 |       input_dim: 384 # 128 * 3
106 | 
107 |       # add decoder later
108 | 
109 | loss:
110 |   core_method: point_pillar_loss
111 |   args:
112 |     cls_weight: 1.0
113 |     reg: 2.0
114 | 
115 | optimizer:
116 |   core_method: Adam
117 |   lr: 0.001
118 |   args:
119 |     eps: 1e-10
120 |     weight_decay: 1e-4
121 | 
122 | lr_scheduler:
123 |   core_method: multistep #step, multistep and Exponential support
124 |   gamma: 0.1
125 |   step_size: [15, 50]
126 | 
127 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/psm_mask.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | class Communication(nn.Module):
 7 |     def __init__(self, args):
 8 |         super(Communication, self).__init__()
 9 | 
10 |         self.smooth = False
11 |         self.thre = args['thre']
12 |         # if 'gaussian_smooth' in args:
13 |         #     # Gaussian Smooth
14 |         #     self.smooth = True
15 |         #     kernel_size = args['gaussian_smooth']['k_size']
16 |         #     c_sigma = args['gaussian_smooth']['c_sigma']
17 |         #     self.gaussian_filter = nn.Conv2d(1, 1, kernel_size=kernel_size,
18 |         #                                      stride=1,
19 |         #                                      padding=(kernel_size - 1) // 2)
20 |         #     self.init_gaussian_filter(kernel_size, c_sigma)
21 |         #     self.gaussian_filter.requires_grad = False
22 | 
23 |     def init_gaussian_filter(self, k_size=5, sigma=1):
24 |         def _gen_gaussian_kernel(k_size=5, sigma=1):
25 |             center = k_size // 2
26 |             x, y = np.mgrid[0 - center: k_size - center,
27 |                    0 - center: k_size - center]
28 |             g = 1 / (2 * np.pi * sigma) * np.exp(
29 |                 -(np.square(x) + np.square(y)) / (2 * np.square(sigma)))
30 |             return g
31 | 
32 |         gaussian_kernel = _gen_gaussian_kernel(k_size, sigma)
33 |         self.gaussian_filter.weight.data = torch.Tensor(gaussian_kernel).to(
34 |             self.gaussian_filter.weight.device).unsqueeze(0).unsqueeze(0)
35 |         self.gaussian_filter.bias.data.zero_()
36 | 
37 |     def forward(self, batch_confidence_maps, record_len):
38 |         # batch_confidence_maps:[(L1, H, W), (L2, H, W), ...]
39 |         # pairwise_t_matrix: (B,L,L,2,3)
40 |         # thre: threshold of objectiveness
41 |         # a_ji = (1 - q_i)*q_ji
42 |         B = len(record_len)
43 |         _, _, H, W = batch_confidence_maps[0].shape
44 |         
45 |         communication_masks = []
46 |         communication_rates = []
47 |         batch_communication_maps = []
48 |         for b in range(B):
49 |             # number of valid agent
50 |             N = record_len[b]
51 | 
52 |             # 在通道方向取max
53 |             ori_communication_maps = \
54 |             batch_confidence_maps[b].sigmoid().max(dim=1)[0].unsqueeze(1)  # dim1=2 represents the confidence of two anchors
55 |             
56 |             if self.smooth:
57 |                 communication_maps = self.gaussian_filter(ori_communication_maps)
58 |             else:
59 |                 communication_maps = ori_communication_maps
60 |             
61 |             ones_mask = torch.ones_like(communication_maps).to(communication_maps.device)
62 |             zeros_mask = torch.zeros_like(communication_maps).to(communication_maps.device)
63 |             communication_mask = torch.where(communication_maps > self.thre,ones_mask, zeros_mask)
64 |             # 符合thre的部分占有的比例
65 |             communication_rate = communication_mask[0].sum()/(H * W)
66 | 
67 |             ones_mask = torch.ones_like(communication_mask).to(
68 |                 communication_mask.device)
69 |             # communication_mask_nodiag[::2] = ones_mask[::2]
70 | 
71 |             communication_masks.append(communication_mask)
72 |             communication_rates.append(communication_rate)
73 |             batch_communication_maps.append(
74 |                 ori_communication_maps * communication_mask)
75 |         communication_rates = sum(communication_rates) / B
76 |         # communication_masks = torch.cat(communication_masks, dim=0)
77 |         return batch_communication_maps, communication_masks,
78 | 
79 | 
80 | # def save_mask_0(mask, i, cnt):
81 | #     plt.imsave('/data2/gjm/tmp/pi/'+str(cnt)+'_'+str(i)+'.png', mask)


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/point_pillar_v2vnet.yaml:
--------------------------------------------------------------------------------
  1 | name: point_pillar_v2vnet
  2 | root_dir: 'v2xset/train'
  3 | validate_dir: 'v2xset/validate'
  4 | 
  5 | wild_setting:
  6 |   async: false
  7 |   async_overhead: 100
  8 |   seed: 20
  9 |   loc_err: false
 10 |   xyz_std: 0.2
 11 |   ryp_std: 0.2
 12 |   data_size: 1.06 # Mb!!
 13 |   transmission_speed: 27 # Mbps!!
 14 |   backbone_delay: 10 # ms
 15 | 
 16 | yaml_parser: "load_point_pillar_params"
 17 | train_params:
 18 |   batch_size: &batch_size 4
 19 |   epoches: 60
 20 |   eval_freq: 1
 21 |   save_freq: 1
 22 |   max_cav: &max_cav 5
 23 | 
 24 | fusion:
 25 |   core_method: 'IntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
 26 |   args:
 27 |     cur_ego_pose_flag: True
 28 | 
 29 | # preprocess-related
 30 | preprocess:
 31 |   # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
 32 |   core_method: 'SpVoxelPreprocessor'
 33 |   args:
 34 |     voxel_size: &voxel_size [0.4, 0.4, 4]
 35 |     max_points_per_voxel: 32
 36 |     max_voxel_train: 32000
 37 |     max_voxel_test: 70000
 38 |   # lidar range for each individual cav.
 39 |   cav_lidar_range: &cav_lidar [-140.8, -38.4, -3, 140.8, 38.4, 1]
 40 | 
 41 | data_augment:
 42 |   - NAME: random_world_flip
 43 |     ALONG_AXIS_LIST: [ 'x' ]
 44 | 
 45 |   - NAME: random_world_rotation
 46 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 47 | 
 48 |   - NAME: random_world_scaling
 49 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 50 | 
 51 | # anchor box related
 52 | postprocess:
 53 |   core_method: 'VoxelPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
 54 |   anchor_args:
 55 |     cav_lidar_range: *cav_lidar
 56 |     l: 3.9
 57 |     w: 1.6
 58 |     h: 1.56
 59 |     r: [0, 90]
 60 |     feature_stride: 4
 61 |     num: &achor_num 2
 62 |   target_args:
 63 |     pos_threshold: 0.6
 64 |     neg_threshold: 0.45
 65 |     score_threshold: 0.20
 66 |   order: 'hwl' # hwl or lwh
 67 |   max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
 68 |   nms_thresh: 0.15
 69 | 
 70 | # model related
 71 | model:
 72 |   core_method: point_pillar_v2vnet
 73 |   args:
 74 |     voxel_size: *voxel_size
 75 |     lidar_range: *cav_lidar
 76 |     anchor_number: *achor_num
 77 |     max_cav: *max_cav
 78 |     compression: 0 # compression rate
 79 |     backbone_fix: false
 80 | 
 81 |     pillar_vfe:
 82 |       use_norm: true
 83 |       with_distance: false
 84 |       use_absolute_xyz: true
 85 |       num_filters: [64]
 86 |     point_pillar_scatter:
 87 |       num_features: 64
 88 | 
 89 |     base_bev_backbone:
 90 |       layer_nums: [3, 5, 8]
 91 |       layer_strides: [2, 2, 2]
 92 |       num_filters: [64, 128, 256]
 93 |       upsample_strides: [1, 2, 4]
 94 |       num_upsample_filter: [128, 128, 128]
 95 |     shrink_header:
 96 |       kernal_size: [3]
 97 |       stride: [2]
 98 |       padding: [1]
 99 |       dim: [256]
100 |       input_dim: 384 # 128 * 3
101 | 
102 |     v2vfusion:
103 |       use_temporal_encoding: true
104 |       voxel_size: *voxel_size
105 |       downsample_rate: 4
106 |       num_iteration: 3
107 |       in_channels: 256
108 |       gru_flag: false
109 |       agg_operator: "avg" # max or avg
110 |       conv_gru:
111 |         H: 48
112 |         W: 176
113 |         num_layers: 1
114 |         kernel_size: [[3,3]]
115 | 
116 | 
117 |       # add decoder later
118 | 
119 | loss:
120 |   core_method: point_pillar_loss
121 |   args:
122 |     cls_weight: 1.0
123 |     reg: 2.0
124 | 
125 | optimizer:
126 |   core_method: Adam
127 |   lr: 0.001
128 |   args:
129 |     eps: 1e-10
130 |     weight_decay: 1e-4
131 | 
132 | lr_scheduler:
133 |   core_method: multistep #step, multistep and Exponential support
134 |   gamma: 0.1
135 |   step_size: [15, 50]
136 | 
137 | 


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/point_pillar_when2com.yaml:
--------------------------------------------------------------------------------
  1 | name: opv2v_when2comm
  2 | root_dir: '/data/opv2v/train'
  3 | validate_dir: '/data/opv2v/validate'
  4 | 
  5 | wild_setting:
  6 |   async: false
  7 |   async_overhead: 100
  8 |   seed: 20
  9 |   loc_err: True
 10 |   xyz_std: 0.2
 11 |   ryp_std: 0.2
 12 |   # data_size: 1.06 # Mb!!
 13 |   # transmission_speed: 27 # Mbps!!
 14 |   # backbone_delay: 10 # ms
 15 | 
 16 | noise_setting:
 17 |   add_noise: false
 18 |   args: 
 19 |     pos_std: 0
 20 |     rot_std: 0
 21 |     pos_mean: 0
 22 |     rot_mean: 0
 23 | 
 24 | yaml_parser: "load_point_pillar_params"
 25 | train_params:
 26 |   batch_size: &batch_size 1
 27 |   epoches: 60
 28 |   eval_freq: 60
 29 |   save_freq: 2
 30 |   max_cav: &max_cav 5
 31 | 
 32 | 
 33 | fusion:
 34 |   core_method: 'IntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
 35 |   args:
 36 |     cur_ego_pose_flag: true
 37 | 
 38 | # preprocess-related
 39 | preprocess:
 40 |   # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
 41 |   core_method: 'SpVoxelPreprocessor'
 42 |   args:
 43 |     voxel_size: &voxel_size [0.4, 0.4, 4]
 44 |     max_points_per_voxel: 32
 45 |     max_voxel_train: 32000
 46 |     max_voxel_test: 70000
 47 |   # lidar range for each individual cav. Format: xyzxyz minmax
 48 |   cav_lidar_range: &cav_lidar  [-140.8, -40, -3, 140.8, 40, 1]
 49 | 
 50 | data_augment:
 51 |   - NAME: random_world_flip
 52 |     ALONG_AXIS_LIST: [ 'x' ]
 53 | 
 54 |   - NAME: random_world_rotation
 55 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 56 | 
 57 |   - NAME: random_world_scaling
 58 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 59 | 
 60 | # anchor box related
 61 | postprocess:
 62 |   core_method: 'VoxelPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
 63 |   gt_range: *cav_lidar
 64 |   anchor_args:
 65 |     cav_lidar_range: *cav_lidar
 66 |     l: 3.9
 67 |     w: 1.6
 68 |     h: 1.56
 69 |     r: [0, 90]
 70 |     feature_stride: 2
 71 |     num: &achor_num 2
 72 |   target_args:
 73 |     pos_threshold: 0.6
 74 |     neg_threshold: 0.45
 75 |     score_threshold: 0.2
 76 |   order: 'hwl' # hwl or lwh
 77 |   max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
 78 |   nms_thresh: 0.15
 79 | 
 80 | # model related
 81 | 
 82 | model:
 83 |   core_method: point_pillar_when2com
 84 |   args:
 85 |     voxel_size: *voxel_size
 86 |     lidar_range: *cav_lidar
 87 |     anchor_number: *achor_num
 88 |     max_cav: *max_cav
 89 |     compression: 0 # compression rate
 90 |     backbone_fix: false
 91 | 
 92 |     pillar_vfe:
 93 |       use_norm: true
 94 |       with_distance: false
 95 |       use_absolute_xyz: true
 96 |       num_filters: [64]
 97 |     point_pillar_scatter:
 98 |       num_features: 64
 99 | 
100 |     base_bev_backbone:
101 |       layer_nums: [3, 5, 8]
102 |       layer_strides: [2, 2, 2]
103 |       num_filters: [64, 128, 256]
104 |       upsample_strides: [1, 2, 4]
105 |       num_upsample_filter: [128, 128, 128]
106 |     shrink_header:
107 |       kernal_size: [ 3 ]
108 |       stride: [ 1 ]
109 |       padding: [ 1 ]
110 |       dim: [ 256 ]
111 |       input_dim: 384 # 128 * 3
112 | 
113 |     v2vfusion:
114 |       voxel_size: *voxel_size
115 |       downsample_rate: 2
116 |       in_channels: 256
117 |       H: 100
118 |       W: 352
119 |       query_size: 32
120 |       key_size: 256
121 |       mode: 'softmax'
122 | 
123 | loss:
124 |   core_method: point_pillar_loss
125 |   args:
126 |     cls_weight: 1.0
127 |     reg: 2.0
128 | 
129 | optimizer:
130 |   core_method: Adam
131 |   lr: 0.001
132 |   args:
133 |     eps: 1e-10
134 |     weight_decay: 1e-4
135 | 
136 | lr_scheduler:
137 |   core_method: multistep #step, multistep and Exponential support
138 |   gamma: 0.1
139 |   step_size: [10, 30, 50]
140 | 


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/point_pillar_single.yaml:
--------------------------------------------------------------------------------
  1 | name: point_pillar_single
  2 | root_dir: '/data/opv2v/train'
  3 | validate_dir: '/data/opv2v/validate'
  4 | wild_setting:
  5 |   async: false
  6 |   async_overhead: 100
  7 |   seed: 20
  8 |   loc_err: false
  9 |   xyz_std: 0.2
 10 |   ryp_std: 0.2
 11 |   data_size: 1.06 # Mb!!
 12 |   transmission_speed: 27 # Mbps!!
 13 |   backbone_delay: 10 # ms
 14 | 
 15 | noise_setting:
 16 |   add_noise: false
 17 |   args: 
 18 |     pos_std: 1
 19 |     rot_std: 0
 20 |     pos_mean: 0
 21 |     rot_mean: 0
 22 | 
 23 | yaml_parser: "load_point_pillar_params"
 24 | train_params:
 25 |   batch_size: &batch_size 4
 26 |   epoches: 60
 27 |   eval_freq: 1
 28 |   save_freq: 1
 29 |   max_cav: &max_cav 5
 30 | 
 31 | fusion:
 32 |   core_method: 'IntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
 33 |   args:
 34 |     cur_ego_pose_flag: True
 35 |       # when the cur_ego_pose_flag is set to True, there is no time gap
 36 |       # between  the time when the LiDAR data is captured by connected
 37 |       # agents and when the extracted features are received by
 38 |       # the ego vehicle, which is equal to implement STCM. When set to False,
 39 |       # STCM has to be used.
 40 | 
 41 | 
 42 | # preprocess-related
 43 | preprocess:
 44 |   # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
 45 |   core_method: 'SpVoxelPreprocessor'
 46 |   args:
 47 |     voxel_size: &voxel_size [0.4, 0.4, 4]
 48 |     max_points_per_voxel: 32
 49 |     max_voxel_train: 32000
 50 |     max_voxel_test: 70000
 51 |   # lidar range for each individual cav.
 52 |   cav_lidar_range: &cav_lidar [-140.8, -38.4, -3, 140.8, 38.4, 1]
 53 | 
 54 | data_augment:
 55 |   - NAME: random_world_flip
 56 |     ALONG_AXIS_LIST: [ 'x' ]
 57 | 
 58 |   - NAME: random_world_rotation
 59 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 60 | 
 61 |   - NAME: random_world_scaling
 62 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 63 | 
 64 | # anchor box related
 65 | postprocess:
 66 |   core_method: 'VoxelPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
 67 |   anchor_args:
 68 |     cav_lidar_range: *cav_lidar
 69 |     l: 3.9
 70 |     w: 1.6
 71 |     h: 1.56
 72 |     r: [0, 90]
 73 |     feature_stride: 4
 74 |     num: &achor_num 2
 75 |   target_args:
 76 |     pos_threshold: 0.6
 77 |     neg_threshold: 0.45
 78 |     score_threshold: 0.20
 79 |   order: 'hwl' # hwl or lwh
 80 |   max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
 81 |   nms_thresh: 0.15
 82 | 
 83 | # model related
 84 | model:
 85 |   core_method: point_pillar_single
 86 |   args:
 87 |     voxel_size: *voxel_size
 88 |     lidar_range: *cav_lidar
 89 |     anchor_number: *achor_num
 90 |     max_cav: *max_cav
 91 |     compression: 0 # compression rate
 92 |     backbone_fix: false
 93 | 
 94 |     pillar_vfe:
 95 |       use_norm: true
 96 |       with_distance: false
 97 |       use_absolute_xyz: true
 98 |       num_filters: [64]
 99 |     point_pillar_scatter:
100 |       num_features: 64
101 | 
102 |     base_bev_backbone:
103 |       layer_nums: [3, 5, 8]
104 |       layer_strides: [2, 2, 2]
105 |       num_filters: [64, 128, 256]
106 |       upsample_strides: [1, 2, 4]
107 |       num_upsample_filter: [128, 128, 128]
108 |     shrink_header:
109 |       kernal_size: [3]
110 |       stride: [2]
111 |       padding: [1]
112 |       dim: [256]
113 |       input_dim: 384 # 128 * 3
114 | 
115 |       # add decoder later
116 | 
117 | loss:
118 |   core_method: point_pillar_loss
119 |   args:
120 |     cls_weight: 1.0
121 |     reg: 2.0
122 | 
123 | optimizer:
124 |   core_method: Adam
125 |   lr: 0.001
126 |   args:
127 |     eps: 1e-10
128 |     weight_decay: 1e-4
129 | 
130 | lr_scheduler:
131 |   core_method: multistep #step, multistep and Exponential support
132 |   gamma: 0.1
133 |   step_size: [15, 50]
134 | 
135 | 


--------------------------------------------------------------------------------
/v2xvit/loss/pixor_loss.py:
--------------------------------------------------------------------------------
  1 | from functools import reduce
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | 
  8 | class PixorLoss(nn.Module):
  9 |     def __init__(self, args):
 10 |         super(PixorLoss, self).__init__()
 11 |         self.alpha = args["alpha"]
 12 |         self.beta = args["beta"]
 13 |         self.loss_dict = {}
 14 | 
 15 |     def forward(self, output_dict, target_dict):
 16 |         """
 17 |         Compute loss for pixor network
 18 |         Parameters
 19 |         ----------
 20 |         output_dict : dict
 21 |            The dictionary that contains the output.
 22 | 
 23 |         target_dict : dict
 24 |            The dictionary that contains the target.
 25 | 
 26 |         Returns
 27 |         -------
 28 |         total_loss : torch.Tensor
 29 |             Total loss.
 30 | 
 31 |         """
 32 |         targets = target_dict["label_map"]
 33 |         cls_preds, loc_preds = output_dict["cls"], output_dict["reg"]
 34 | 
 35 |         cls_targets, loc_targets = targets.split([1, 6], dim=1)
 36 |         pos_count = cls_targets.sum()
 37 |         neg_count = (cls_targets == 0).sum()
 38 |         w1, w2 = neg_count / (pos_count + neg_count), pos_count / (
 39 |                     pos_count + neg_count)
 40 |         weights = torch.ones_like(cls_preds.reshape(-1))
 41 |         weights[cls_targets.reshape(-1) == 1] = w1
 42 |         weights[cls_targets.reshape(-1) == 0] = w2
 43 |         # cls_targets = cls_targets.float()
 44 |         # cls_loss = F.binary_cross_entropy_with_logits(input=cls_preds.reshape(-1), target=cls_targets.reshape(-1), weight=weights,
 45 |         #                                               reduction='mean')
 46 |         cls_loss = F.binary_cross_entropy_with_logits(
 47 |             input=cls_preds, target=cls_targets,
 48 |             reduction='mean')
 49 |         pos_pixels = cls_targets.sum()
 50 | 
 51 |         loc_loss = F.smooth_l1_loss(cls_targets * loc_preds,
 52 |                                     cls_targets * loc_targets,
 53 |                                     reduction='sum')
 54 |         loc_loss = loc_loss / pos_pixels if pos_pixels > 0 else loc_loss
 55 | 
 56 |         total_loss = self.alpha * cls_loss + self.beta * loc_loss
 57 | 
 58 |         self.loss_dict.update({'total_loss': total_loss,
 59 |                                'reg_loss': loc_loss,
 60 |                                'cls_loss': cls_loss})
 61 | 
 62 |         return total_loss
 63 | 
 64 |     def logging(self, epoch, batch_id, batch_len, writer):
 65 |         """
 66 |         Print out  the loss function for current iteration.
 67 | 
 68 |         Parameters
 69 |         ----------
 70 |         epoch : int
 71 |             Current epoch for training.
 72 |         batch_id : int
 73 |             The current batch.
 74 |         batch_len : int
 75 |             Total batch length in one iteration of training,
 76 |         writer : SummaryWriter
 77 |             Used to visualize on tensorboard
 78 |         """
 79 |         total_loss = self.loss_dict['total_loss']
 80 |         reg_loss = self.loss_dict['reg_loss']
 81 |         cls_loss = self.loss_dict['cls_loss']
 82 | 
 83 |         print("[epoch %d][%d/%d], || Loss: %.4f || cls Loss: %.4f"
 84 |               " || reg Loss: %.4f" % (
 85 |                   epoch, batch_id + 1, batch_len,
 86 |                   total_loss.item(), cls_loss.item(), reg_loss.item()))
 87 | 
 88 |         writer.add_scalar('Regression_loss', reg_loss.item(),
 89 |                           epoch * batch_len + batch_id)
 90 |         writer.add_scalar('Confidence_loss', cls_loss.item(),
 91 |                           epoch * batch_len + batch_id)
 92 | 
 93 | 
 94 | def test():
 95 |     torch.manual_seed(0)
 96 |     loss = PixorLoss(None)
 97 |     pred = torch.sigmoid(torch.randn(1, 7, 2, 3))
 98 |     label = torch.zeros(1, 7, 2, 3)
 99 |     loss = loss(pred, label)
100 |     print(loss)
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     test()
105 | 


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/point_pillar_opv2v.yaml:
--------------------------------------------------------------------------------
  1 | name: point_pillar_opv2v
  2 | #root_dir: '/home/runshengxu/project/Cooperative_perception/opencood/tmp'
  3 | root_dir: '/data/opv2v/train'
  4 | validate_dir: '/data/opv2v/validate'
  5 | 
  6 | wild_setting:
  7 |   async: false
  8 |   async_overhead: 100
  9 |   seed: 20
 10 |   loc_err: false
 11 |   xyz_std: 0.2
 12 |   ryp_std: 0.2
 13 |   data_size: 1.06 # Mb!!
 14 |   transmission_speed: 27 # Mbps!!
 15 |   backbone_delay: 10 # ms
 16 | 
 17 | noise_setting:
 18 |   add_noise: True
 19 |   args: 
 20 |     pos_std: 0
 21 |     rot_std: 0
 22 |     pos_mean: 0
 23 |     rot_mean: 0
 24 | 
 25 | yaml_parser: "load_point_pillar_params"
 26 | train_params:
 27 |   batch_size: &batch_size 2
 28 |   epoches: 60
 29 |   eval_freq: 1
 30 |   save_freq: 1
 31 |   max_cav: &max_cav 5
 32 | 
 33 | fusion:
 34 |   core_method: 'IntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
 35 |   args:
 36 |     cur_ego_pose_flag: True
 37 |       # when the cur_ego_pose_flag is set to True, there is no time gap
 38 |       # between  the time when the LiDAR data is captured by connected
 39 |       # agents and when the extracted features are received by
 40 |       # the ego vehicle, which is equal to implement STCM. When set to False,
 41 |       # STCM has to be used.
 42 | 
 43 | 
 44 | # preprocess-related
 45 | preprocess:
 46 |   # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
 47 |   core_method: 'SpVoxelPreprocessor'
 48 |   args:
 49 |     voxel_size: &voxel_size [0.4, 0.4, 4]
 50 |     max_points_per_voxel: 32
 51 |     max_voxel_train: 32000
 52 |     max_voxel_test: 70000
 53 |   # lidar range for each individual cav.
 54 |   cav_lidar_range: &cav_lidar [-140.8, -38.4, -3, 140.8, 38.4, 1]
 55 | 
 56 | data_augment:
 57 |   - NAME: random_world_flip
 58 |     ALONG_AXIS_LIST: [ 'x' ]
 59 | 
 60 |   - NAME: random_world_rotation
 61 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 62 | 
 63 |   - NAME: random_world_scaling
 64 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 65 | 
 66 | # anchor box related
 67 | postprocess:
 68 |   core_method: 'VoxelPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
 69 |   anchor_args:
 70 |     cav_lidar_range: *cav_lidar
 71 |     l: 3.9
 72 |     w: 1.6
 73 |     h: 1.56
 74 |     r: [0, 90]
 75 |     feature_stride: 4
 76 |     num: &achor_num 2
 77 |   target_args:
 78 |     pos_threshold: 0.6
 79 |     neg_threshold: 0.45
 80 |     score_threshold: 0.20
 81 |   order: 'hwl' # hwl or lwh
 82 |   max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
 83 |   nms_thresh: 0.15
 84 | 
 85 | # model related
 86 | model:
 87 |   core_method: point_pillar_opv2v
 88 |   args:
 89 |     voxel_size: *voxel_size
 90 |     lidar_range: *cav_lidar
 91 |     anchor_number: *achor_num
 92 |     max_cav: *max_cav
 93 |     compression: 32 # compression rate
 94 |     backbone_fix: false
 95 | 
 96 |     pillar_vfe:
 97 |       use_norm: true
 98 |       with_distance: false
 99 |       use_absolute_xyz: true
100 |       num_filters: [64]
101 |     point_pillar_scatter:
102 |       num_features: 64
103 | 
104 |     base_bev_backbone:
105 |       layer_nums: [3, 5, 8]
106 |       layer_strides: [2, 2, 2]
107 |       num_filters: [64, 128, 256]
108 |       upsample_strides: [1, 2, 4]
109 |       num_upsample_filter: [128, 128, 128]
110 |     shrink_header:
111 |       kernal_size: [3]
112 |       stride: [2]
113 |       padding: [1]
114 |       dim: [256]
115 |       input_dim: 384 # 128 * 3
116 | 
117 |       # add decoder later
118 | 
119 | loss:
120 |   core_method: point_pillar_loss
121 |   args:
122 |     cls_weight: 1.0
123 |     reg: 2.0
124 | 
125 | optimizer:
126 |   core_method: Adam
127 |   lr: 0.001
128 |   args:
129 |     eps: 1e-10
130 |     weight_decay: 1e-4
131 | 
132 | lr_scheduler:
133 |   core_method: multistep #step, multistep and Exponential support
134 |   gamma: 0.1
135 |   step_size: [15, 50]
136 | 
137 | 


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/point_pillar_where2comm_ori.yaml:
--------------------------------------------------------------------------------
  1 | name: point_pillar_where2comm
  2 | root_dir: '/data/opv2v/train'
  3 | validate_dir:  '/data/opv2v/validate'
  4 | 
  5 | 
  6 | wild_setting:
  7 |   seed: 20
  8 |   async: False
  9 |   async_mode: 'sim'
 10 |   async_overhead: 100
 11 |   loc_err: False
 12 |   xyz_std: 0.2
 13 |   ryp_std: 0.2
 14 |   data_size: 1.06  # Mb
 15 |   transmission_speed: 27  # Mbps
 16 |   backbone_delay: 10  # ms
 17 | 
 18 | yaml_parser: 'load_point_pillar_params'
 19 | train_params:
 20 |   batch_size: &batch_size 4
 21 |   epoches: &epoches 50
 22 |   eval_freq: 1
 23 |   save_freq: 1
 24 |   max_cav: &max_cav 5
 25 | 
 26 | fusion:
 27 |   core_method: 'IntermediateFusionDataset'  # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
 28 |   args: 
 29 |     cur_ego_pose_flag: true
 30 | 
 31 | # Preprocess-related
 32 | preprocess:
 33 |   # Options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
 34 |   core_method: 'SpVoxelPreprocessor'
 35 |   args:
 36 |     voxel_size: &voxel_size [ 0.4, 0.4, 4 ]
 37 |     max_points_per_voxel: 32
 38 |     max_voxel_train: 32000
 39 |     max_voxel_test: 70000
 40 |   # LiDAR range for each individual CAV
 41 |   cav_lidar_range: &cav_lidar [-140.8, -40, -3, 140.8, 40, 1] # [-140.8, -38.4, -5, 140.8, 38.4, 3]
 42 | 
 43 | data_augment:
 44 |   - NAME: random_world_flip
 45 |     ALONG_AXIS_LIST: [ 'x' ]
 46 |   - NAME: random_world_rotation
 47 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 48 |   - NAME: random_world_scaling
 49 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 50 | 
 51 | # Anchor box related
 52 | postprocess:
 53 |   core_method: 'VoxelPostprocessor'  # VoxelPostprocessor, BevPostprocessor supported
 54 |   anchor_args:
 55 |     cav_lidar_range: *cav_lidar
 56 |     l: 3.9
 57 |     w: 1.6
 58 |     h: 1.56
 59 |     r: [ 0, 90 ]
 60 |     num: &achor_num 2
 61 |     feature_stride: 4
 62 |   target_args:
 63 |     pos_threshold: 0.6
 64 |     neg_threshold: 0.45
 65 |     score_threshold: 0.2
 66 |   order: 'hwl'  # hwl or lwh
 67 |   max_num: 100  # Maximum number of objects in a single frame. Use this number to make sure different frames has the same dimension in the same batch
 68 |   nms_thresh: 0.15
 69 | 
 70 | # Model related
 71 | model:
 72 |   core_method: point_pillar_where2comm_ori # point_pillar_where2comm_ori
 73 |   args:
 74 |     voxel_size: *voxel_size
 75 |     lidar_range: *cav_lidar
 76 |     anchor_number: *achor_num
 77 |     head_dim: 256
 78 |     max_cav: *max_cav
 79 |     compression: 0  # Compression rate
 80 |     backbone_fix: False
 81 |     pillar_vfe:
 82 |       use_norm: True
 83 |       with_distance: False
 84 |       use_absolute_xyz: True
 85 |       num_filters: [ 64 ]
 86 |     point_pillar_scatter:
 87 |       num_features: 64
 88 |     base_bev_backbone:
 89 |       layer_nums: &layer_nums [3, 4, 5] # [3, 5, 8]
 90 |       layer_strides: [ 2, 2, 2 ]
 91 |       num_filters: &num_filters [ 64, 128, 256 ]
 92 |       upsample_strides: [ 1, 2, 4 ]
 93 |       num_upsample_filter: [ 128, 128, 128 ]
 94 |     shrink_header:
 95 |       kernal_size: [ 3 ]
 96 |       stride: [ 2 ]
 97 |       padding: [ 1 ]
 98 |       dim: [ 256 ]
 99 |       input_dim: 384  # 128 * 3
100 |     where2comm_fusion:
101 |       fully: False
102 |       voxel_size: *voxel_size
103 |       downsample_rate: 4
104 |       in_channels: 256
105 |       multi_scale: True
106 |       layer_nums: *layer_nums
107 |       num_filters: *num_filters
108 |       communication:
109 |         round: 1
110 |         threshold: 0.01
111 |         gaussian_smooth:
112 |           k_size: 5
113 |           c_sigma: 1.0
114 | 
115 | loss:
116 |   core_method: point_pillar_loss
117 |   args:
118 |     cls_weight: 1.0
119 |     reg: 2.0
120 | 
121 | optimizer:
122 |   core_method: Adam
123 |   lr: 2e-4
124 |   args:
125 |     eps: 1e-10
126 |     weight_decay: 1e-2
127 | 
128 | lr_scheduler:
129 |   # core_method: cosineannealwarm # step, multistep, exponential and cosineannealwarm support
130 |   # epoches: *epoches
131 |   # warmup_lr: 2e-5
132 |   # warmup_epoches: 10
133 |   # lr_min: 5e-6
134 |   core_method: multistep #step, multistep and Exponential support
135 |   gamma: 0.1
136 |   step_size: [10, 20]


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/point_pillar_where2comm_ori_multi.yaml:
--------------------------------------------------------------------------------
  1 | name: point_pillar_where2comm
  2 | root_dir: '/data/opv2v/train'
  3 | validate_dir:  '/data/opv2v/validate'
  4 | 
  5 | 
  6 | wild_setting:
  7 |   seed: 20
  8 |   async: False
  9 |   async_mode: 'sim'
 10 |   async_overhead: 100
 11 |   loc_err: False
 12 |   xyz_std: 0.2
 13 |   ryp_std: 0.2
 14 |   data_size: 1.06  # Mb
 15 |   transmission_speed: 27  # Mbps
 16 |   backbone_delay: 10  # ms
 17 | 
 18 | yaml_parser: 'load_point_pillar_params'
 19 | train_params:
 20 |   batch_size: &batch_size 4
 21 |   epoches: &epoches 30
 22 |   eval_freq: 1
 23 |   save_freq: 1
 24 |   max_cav: &max_cav 5
 25 | 
 26 | fusion:
 27 |   core_method: 'IntermediateFusionDataset'  # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
 28 |   args: 
 29 |     cur_ego_pose_flag: true
 30 | 
 31 | # Preprocess-related
 32 | preprocess:
 33 |   # Options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
 34 |   core_method: 'SpVoxelPreprocessor'
 35 |   args:
 36 |     voxel_size: &voxel_size [ 0.4, 0.4, 4 ]
 37 |     max_points_per_voxel: 32
 38 |     max_voxel_train: 32000
 39 |     max_voxel_test: 70000
 40 |   # LiDAR range for each individual CAV
 41 |   cav_lidar_range: &cav_lidar [-140.8, -40, -3, 140.8, 40, 1] # [-140.8, -38.4, -5, 140.8, 38.4, 3]
 42 | 
 43 | data_augment:
 44 |   - NAME: random_world_flip
 45 |     ALONG_AXIS_LIST: [ 'x' ]
 46 |   - NAME: random_world_rotation
 47 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 48 |   - NAME: random_world_scaling
 49 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 50 | 
 51 | # Anchor box related
 52 | postprocess:
 53 |   core_method: 'VoxelPostprocessor'  # VoxelPostprocessor, BevPostprocessor supported
 54 |   anchor_args:
 55 |     cav_lidar_range: *cav_lidar
 56 |     l: 3.9
 57 |     w: 1.6
 58 |     h: 1.56
 59 |     r: [ 0, 90 ]
 60 |     num: &achor_num 2
 61 |     feature_stride: 4
 62 |   target_args:
 63 |     pos_threshold: 0.6
 64 |     neg_threshold: 0.45
 65 |     score_threshold: 0.2
 66 |   order: 'hwl'  # hwl or lwh
 67 |   max_num: 100  # Maximum number of objects in a single frame. Use this number to make sure different frames has the same dimension in the same batch
 68 |   nms_thresh: 0.15
 69 | 
 70 | # Model related
 71 | model:
 72 |   core_method: point_pillar_where2comm_ori # point_pillar_where2comm_ori
 73 |   args:
 74 |     voxel_size: *voxel_size
 75 |     lidar_range: *cav_lidar
 76 |     anchor_number: *achor_num
 77 |     head_dim: 256
 78 |     max_cav: *max_cav
 79 |     compression: 0  # Compression rate
 80 |     backbone_fix: False
 81 |     pillar_vfe:
 82 |       use_norm: True
 83 |       with_distance: False
 84 |       use_absolute_xyz: True
 85 |       num_filters: [ 64 ]
 86 |     point_pillar_scatter:
 87 |       num_features: 64
 88 |     base_bev_backbone:
 89 |       layer_nums: &layer_nums [3, 4, 5] # [3, 5, 8]
 90 |       layer_strides: [ 2, 2, 2 ]
 91 |       num_filters: &num_filters [ 64, 128, 256 ]
 92 |       upsample_strides: [ 1, 2, 4 ]
 93 |       num_upsample_filter: [ 128, 128, 128 ]
 94 |     shrink_header:
 95 |       kernal_size: [ 3 ]
 96 |       stride: [ 2 ]
 97 |       padding: [ 1 ]
 98 |       dim: [ 256 ]
 99 |       input_dim: 384  # 128 * 3
100 |     where2comm_fusion:
101 |       fully: False
102 |       voxel_size: *voxel_size
103 |       downsample_rate: 4
104 |       in_channels: 256
105 |       multi_scale: True
106 |       layer_nums: *layer_nums
107 |       num_filters: *num_filters
108 |       communication:
109 |         round: 1
110 |         threshold: 0.01
111 |         gaussian_smooth:
112 |           k_size: 5
113 |           c_sigma: 1.0
114 | 
115 | loss:
116 |   core_method: point_pillar_loss
117 |   args:
118 |     cls_weight: 1.0
119 |     reg: 2.0
120 | 
121 | optimizer:
122 |   core_method: Adam
123 |   lr: 2e-4
124 |   args:
125 |     eps: 1e-10
126 |     weight_decay: 1e-2
127 | 
128 | lr_scheduler:
129 |   # core_method: cosineannealwarm # step, multistep, exponential and cosineannealwarm support
130 |   # epoches: *epoches
131 |   # warmup_lr: 2e-5
132 |   # warmup_epoches: 10
133 |   # lr_min: 5e-6
134 |   core_method: multistep #step, multistep and Exponential support
135 |   gamma: 0.1
136 |   step_size: [10, 20]


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/point_pillar_where2comm_ori_single.yaml:
--------------------------------------------------------------------------------
  1 | name: point_pillar_where2comm
  2 | root_dir: '/data/opv2v/train'
  3 | validate_dir:  '/data/opv2v/validate'
  4 | 
  5 | 
  6 | wild_setting:
  7 |   seed: 20
  8 |   async: False
  9 |   async_mode: 'sim'
 10 |   async_overhead: 100
 11 |   loc_err: False
 12 |   xyz_std: 0.2
 13 |   ryp_std: 0.2
 14 |   data_size: 1.06  # Mb
 15 |   transmission_speed: 27  # Mbps
 16 |   backbone_delay: 10  # ms
 17 | 
 18 | yaml_parser: 'load_point_pillar_params'
 19 | train_params:
 20 |   batch_size: &batch_size 4
 21 |   epoches: &epoches 30
 22 |   eval_freq: 1
 23 |   save_freq: 1
 24 |   max_cav: &max_cav 5
 25 | 
 26 | fusion:
 27 |   core_method: 'IntermediateFusionDataset'  # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
 28 |   args: 
 29 |     cur_ego_pose_flag: true
 30 | 
 31 | # Preprocess-related
 32 | preprocess:
 33 |   # Options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
 34 |   core_method: 'SpVoxelPreprocessor'
 35 |   args:
 36 |     voxel_size: &voxel_size [ 0.4, 0.4, 4 ]
 37 |     max_points_per_voxel: 32
 38 |     max_voxel_train: 32000
 39 |     max_voxel_test: 70000
 40 |   # LiDAR range for each individual CAV
 41 |   cav_lidar_range: &cav_lidar [-140.8, -40, -3, 140.8, 40, 1] # [-140.8, -38.4, -5, 140.8, 38.4, 3]
 42 | 
 43 | data_augment:
 44 |   - NAME: random_world_flip
 45 |     ALONG_AXIS_LIST: [ 'x' ]
 46 |   - NAME: random_world_rotation
 47 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 48 |   - NAME: random_world_scaling
 49 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 50 | 
 51 | # Anchor box related
 52 | postprocess:
 53 |   core_method: 'VoxelPostprocessor'  # VoxelPostprocessor, BevPostprocessor supported
 54 |   anchor_args:
 55 |     cav_lidar_range: *cav_lidar
 56 |     l: 3.9
 57 |     w: 1.6
 58 |     h: 1.56
 59 |     r: [ 0, 90 ]
 60 |     num: &achor_num 2
 61 |     feature_stride: 4
 62 |   target_args:
 63 |     pos_threshold: 0.6
 64 |     neg_threshold: 0.45
 65 |     score_threshold: 0.2
 66 |   order: 'hwl'  # hwl or lwh
 67 |   max_num: 100  # Maximum number of objects in a single frame. Use this number to make sure different frames has the same dimension in the same batch
 68 |   nms_thresh: 0.15
 69 | 
 70 | # Model related
 71 | model:
 72 |   core_method: point_pillar_where2comm_ori # point_pillar_where2comm_ori
 73 |   args:
 74 |     voxel_size: *voxel_size
 75 |     lidar_range: *cav_lidar
 76 |     anchor_number: *achor_num
 77 |     head_dim: 256
 78 |     max_cav: *max_cav
 79 |     compression: 0  # Compression rate
 80 |     backbone_fix: False
 81 |     pillar_vfe:
 82 |       use_norm: True
 83 |       with_distance: False
 84 |       use_absolute_xyz: True
 85 |       num_filters: [ 64 ]
 86 |     point_pillar_scatter:
 87 |       num_features: 64
 88 |     base_bev_backbone:
 89 |       layer_nums: &layer_nums [3, 4, 5] # [3, 5, 8]
 90 |       layer_strides: [ 2, 2, 2 ]
 91 |       num_filters: &num_filters [ 64, 128, 256 ]
 92 |       upsample_strides: [ 1, 2, 4 ]
 93 |       num_upsample_filter: [ 128, 128, 128 ]
 94 |     shrink_header:
 95 |       kernal_size: [ 3 ]
 96 |       stride: [ 2 ]
 97 |       padding: [ 1 ]
 98 |       dim: [ 256 ]
 99 |       input_dim: 384  # 128 * 3
100 |     where2comm_fusion:
101 |       fully: False
102 |       voxel_size: *voxel_size
103 |       downsample_rate: 4
104 |       in_channels: 256
105 |       multi_scale: False # True
106 |       layer_nums: *layer_nums
107 |       num_filters: *num_filters
108 |       communication:
109 |         round: 1
110 |         threshold: 0.01
111 |         gaussian_smooth:
112 |           k_size: 5
113 |           c_sigma: 1.0
114 | 
115 | loss:
116 |   core_method: point_pillar_loss
117 |   args:
118 |     cls_weight: 1.0
119 |     reg: 2.0
120 | 
121 | optimizer:
122 |   core_method: Adam
123 |   lr: 2e-4
124 |   args:
125 |     eps: 1e-10
126 |     weight_decay: 1e-2
127 | 
128 | lr_scheduler:
129 |   # core_method: cosineannealwarm # step, multistep, exponential and cosineannealwarm support
130 |   # epoches: *epoches
131 |   # warmup_lr: 2e-5
132 |   # warmup_epoches: 10
133 |   # lr_min: 5e-6
134 |   core_method: multistep #step, multistep and Exponential support
135 |   gamma: 0.1
136 |   step_size: [10, 20]


--------------------------------------------------------------------------------
/v2xvit/utils/pose_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.distributions as dist
  4 | 
  5 | def add_noise_data_dict(data_dict, noise_setting):
  6 |     """ Update the base data dict. 
  7 |         We retrieve lidar_pose and add_noise to it.
  8 |         And set a clean pose.
  9 |     """
 10 |     # path = 'pose_noise.txt'
 11 |     # file = open(path,'w+')
 12 |     if noise_setting['add_noise']:
 13 |         for cav_id, cav_content in data_dict.items():
 14 |             cav_content['params']['lidar_pose_clean'] = cav_content['params']['lidar_pose'] # 6 dof pose
 15 |             if cav_content['ego']:
 16 |                 continue
 17 |             pose_noise = generate_noise(
 18 |                                                         noise_setting['args']['pos_std'],
 19 |                                                         noise_setting['args']['rot_std'],
 20 |                                                         noise_setting['args']['pos_mean'],
 21 |                                                         noise_setting['args']['rot_mean']
 22 |                                                     )
 23 |             cav_content['params']['lidar_pose'] = cav_content['params']['lidar_pose'] + pose_noise
 24 |             # print(pose_noise)
 25 |             # file.write(str(pose_noise[0])+'  ')
 26 |     else:
 27 |         for cav_id, cav_content in data_dict.items():
 28 |             cav_content['params']['lidar_pose_clean'] = cav_content['params']['lidar_pose'] # 6 dof pose
 29 | 
 30 |     # file.write('\n')
 31 |     # file.close()    
 32 |     return data_dict
 33 | 
 34 | def generate_noise(pos_std, rot_std, pos_mean=0, rot_mean=0):
 35 |     """ Add localization error to the 6dof pose
 36 |         Noise includes position (x,y) and rotation (yaw).
 37 |         We use gaussian distribution to generate noise.
 38 |     
 39 |     Args:
 40 | 
 41 |         pos_std : float 
 42 |             std of gaussian dist, in meter
 43 | 
 44 |         rot_std : float
 45 |             std of gaussian dist, in degree
 46 | 
 47 |         pos_mean : float
 48 |             mean of gaussian dist, in meter
 49 | 
 50 |         rot_mean : float
 51 |             mean of gaussian dist, in degree
 52 |     
 53 |     Returns:
 54 |         pose_noise: np.ndarray, [6,]
 55 |             [x, y, z, roll, yaw, pitch]
 56 |     """
 57 | 
 58 |     xy = np.random.normal(pos_mean, pos_std, size=(2))
 59 |     yaw = np.random.normal(rot_mean, rot_std, size=(1))
 60 | 
 61 |     pose_noise = np.array([xy[0], xy[1], 0, 0, yaw[0], 0])
 62 |     # pose_noise = np.array([xy[0], xy[1], 0, 0, 0, yaw[0]])
 63 |     # print(pose_noise)
 64 |     return pose_noise
 65 | 
 66 | 
 67 | 
 68 | def generate_noise_torch(pose, pos_std, rot_std, pos_mean=0, rot_mean=0):
 69 |     """ only used for v2vnet robust.
 70 |         rotation noise is sampled from von_mises distribution
 71 |     
 72 |     Args:
 73 |         pose : Tensor, [N. 6]
 74 |             including [x, y, z, roll, yaw, pitch]
 75 | 
 76 |         pos_std : float 
 77 |             std of gaussian dist, in meter
 78 | 
 79 |         rot_std : float
 80 |             std of gaussian dist, in degree
 81 | 
 82 |         pos_mean : float
 83 |             mean of gaussian dist, in meter
 84 | 
 85 |         rot_mean : float
 86 |             mean of gaussian dist, in degree
 87 |     
 88 |     Returns:
 89 |         pose_noisy: Tensor, [N, 6]
 90 |             noisy pose
 91 |     """
 92 | 
 93 |     N = pose.shape[0]
 94 |     noise = torch.zeros_like(pose, device=pose.device)
 95 |     concentration = (180 / (np.pi * rot_std)) ** 2
 96 | 
 97 |     noise[:, :2] = torch.normal(pos_mean, pos_std, size=(N, 2), device=pose.device)
 98 |     noise[:, 4] = dist.von_mises.VonMises(loc=rot_mean, concentration=concentration).sample((N,)).to(noise.device)
 99 | 
100 | 
101 |     return noise
102 | 
103 | 
104 | def remove_z_axis(T):
105 |     """ remove rotation/translation related to z-axis
106 |     Args:
107 |         T: np.ndarray
108 |             [4, 4]
109 |     Returns:
110 |         T: np.ndarray
111 |             [4, 4]
112 |     """
113 |     T[2,3] = 0 # z-trans
114 |     T[0,2] = 0
115 |     T[1,2] = 0
116 |     T[2,0] = 0
117 |     T[2,1] = 0
118 |     T[2,2] = 1
119 |     
120 |     return T


--------------------------------------------------------------------------------
/v2xvit/data_utils/pre_processor/bev_preprocessor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Convert lidar to bev
  3 | """
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | from v2xvit.data_utils.pre_processor.base_preprocessor import \
  8 |     BasePreprocessor
  9 | 
 10 | class BevPreprocessor(BasePreprocessor):
 11 |     def __init__(self, preprocess_params, train):
 12 |         super(BevPreprocessor, self).__init__(preprocess_params, train)
 13 |         self.lidar_range = self.params['cav_lidar_range']
 14 |         self.geometry_param = preprocess_params["geometry_param"]
 15 | 
 16 |     def preprocess(self, pcd_raw):
 17 |         """
 18 |         Preprocess the lidar points to BEV representations.
 19 | 
 20 |         Parameters
 21 |         ----------
 22 |         pcd_raw : np.ndarray
 23 |             The raw lidar.
 24 | 
 25 |         Returns
 26 |         -------
 27 |         data_dict : the structured output dictionary.
 28 |         """
 29 |         bev = np.zeros(self.geometry_param['input_shape'], dtype=np.float32)
 30 |         intensity_map_count = np.zeros((bev.shape[0], bev.shape[1]), dtype=np.int)
 31 |         bev_origin = np.array(
 32 |             [self.geometry_param["L1"], self.geometry_param["W1"],
 33 |              self.geometry_param["H1"]]).reshape(1, -1)
 34 | 
 35 |         indices = ((pcd_raw[:, :3] - bev_origin) / self.geometry_param[
 36 |             "res"]).astype(int)
 37 |         ## bev[indices[:, 0], indices[:, 1], indices[:, 2]] = 1
 38 |         # np.add.at(bev, (indices[:, 0], indices[:, 1], indices[:, 2]), 1)
 39 |         # bev[indices[:, 0], indices[:, 1], -1] += pcd_raw[:, 3]
 40 |         # intensity_map_count[indices[:, 0], indices[:, 1]] += 1
 41 | 
 42 |         for i in range(indices.shape[0]):
 43 |             bev[indices[i, 0], indices[i, 1], indices[i, 2]] = 1
 44 |             bev[indices[i, 0], indices[i, 1], -1] += pcd_raw[i, 3]
 45 |             intensity_map_count[indices[i, 0], indices[i, 1]] += 1
 46 |         divide_mask = intensity_map_count!=0
 47 |         bev[divide_mask, -1] = np.divide(bev[divide_mask, -1], intensity_map_count[divide_mask])
 48 | 
 49 |         data_dict = {
 50 |             "bev_input": np.transpose(bev, (2, 0, 1))
 51 |         }
 52 |         return data_dict
 53 | 
 54 |     @staticmethod
 55 |     def collate_batch_list(batch):
 56 |         """
 57 |         Customized pytorch data loader collate function.
 58 | 
 59 |         Parameters
 60 |         ----------
 61 |         batch : list
 62 |             List of dictionary. Each dictionary represent a single frame.
 63 | 
 64 |         Returns
 65 |         -------
 66 |         processed_batch : dict
 67 |             Updated lidar batch.
 68 |         """
 69 |         bev_input_list = [
 70 |             x["bev_input"][np.newaxis, ...] for x in batch
 71 |         ]
 72 |         processed_batch = {
 73 |             "bev_input": torch.from_numpy(
 74 |                 np.concatenate(bev_input_list, axis=0))
 75 |         }
 76 |         return processed_batch
 77 |     @staticmethod
 78 |     def collate_batch_dict(batch):
 79 |         """
 80 |         Customized pytorch data loader collate function.
 81 | 
 82 |         Parameters
 83 |         ----------
 84 |         batch : dict
 85 |             Dict of list. Each element represents a CAV.
 86 | 
 87 |         Returns
 88 |         -------
 89 |         processed_batch : dict
 90 |             Updated lidar batch.
 91 |         """
 92 |         bev_input_list = [
 93 |             x[np.newaxis, ...] for x in batch["bev_input"]
 94 |         ]
 95 |         processed_batch = {
 96 |             "bev_input": torch.from_numpy(
 97 |                 np.concatenate(bev_input_list, axis=0))
 98 |         }
 99 |         return processed_batch
100 | 
101 |     def collate_batch(self, batch):
102 |         """
103 |         Customized pytorch data loader collate function.
104 | 
105 |         Parameters
106 |         ----------
107 |         batch : list / dict
108 |             Batched data.
109 |         Returns
110 |         -------
111 |         processed_batch : dict
112 |             Updated lidar batch.
113 |         """
114 |         if isinstance(batch, list):
115 |             return self.collate_batch_list(batch)
116 |         elif isinstance(batch, dict):
117 |             return self.collate_batch_dict(batch)
118 |         else:
119 |             raise NotImplemented
120 | 
121 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/augmentor/data_augmentor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Class for data augmentation
  3 | """
  4 | from functools import partial
  5 | 
  6 | from v2xvit.data_utils.augmentor import augment_utils
  7 | 
  8 | 
  9 | class DataAugmentor(object):
 10 |     """
 11 |     Data Augmentor.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     augment_config : list
 16 |         A list of augmentation configuration.
 17 | 
 18 |     Attributes
 19 |     ----------
 20 |     data_augmentor_queue : list
 21 |         The list of data augmented functions.
 22 |     """
 23 | 
 24 |     def __init__(self, augment_config, train=True):
 25 |         self.data_augmentor_queue = []
 26 |         self.train = train
 27 | 
 28 |         for cur_cfg in augment_config:
 29 |             cur_augmentor = getattr(self, cur_cfg['NAME'])(config=cur_cfg)
 30 |             self.data_augmentor_queue.append(cur_augmentor)
 31 | 
 32 |     def random_world_flip(self, data_dict=None, config=None):
 33 |         if data_dict is None:
 34 |             return partial(self.random_world_flip, config=config)
 35 | 
 36 |         gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \
 37 |                                     data_dict['object_bbx_mask'], \
 38 |                                     data_dict['lidar_np']
 39 |         gt_boxes_valid = gt_boxes[gt_mask == 1]
 40 | 
 41 |         for cur_axis in config['ALONG_AXIS_LIST']:
 42 |             assert cur_axis in ['x', 'y']
 43 |             gt_boxes_valid, points = getattr(augment_utils,
 44 |                                              'random_flip_along_%s' % cur_axis)(
 45 |                 gt_boxes_valid, points,
 46 |             )
 47 | 
 48 |         gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid
 49 | 
 50 |         data_dict['object_bbx_center'] = gt_boxes
 51 |         data_dict['object_bbx_mask'] = gt_mask
 52 |         data_dict['lidar_np'] = points
 53 | 
 54 |         return data_dict
 55 | 
 56 |     def random_world_rotation(self, data_dict=None, config=None):
 57 |         if data_dict is None:
 58 |             return partial(self.random_world_rotation, config=config)
 59 | 
 60 |         rot_range = config['WORLD_ROT_ANGLE']
 61 |         if not isinstance(rot_range, list):
 62 |             rot_range = [-rot_range, rot_range]
 63 | 
 64 |         gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \
 65 |                                     data_dict['object_bbx_mask'], \
 66 |                                     data_dict['lidar_np']
 67 |         gt_boxes_valid = gt_boxes[gt_mask == 1]
 68 |         gt_boxes_valid, points = augment_utils.global_rotation(
 69 |             gt_boxes_valid, points, rot_range=rot_range
 70 |         )
 71 |         gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid
 72 | 
 73 |         data_dict['object_bbx_center'] = gt_boxes
 74 |         data_dict['object_bbx_mask'] = gt_mask
 75 |         data_dict['lidar_np'] = points
 76 | 
 77 |         return data_dict
 78 | 
 79 |     def random_world_scaling(self, data_dict=None, config=None):
 80 |         if data_dict is None:
 81 |             return partial(self.random_world_scaling, config=config)
 82 | 
 83 |         gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \
 84 |                                     data_dict['object_bbx_mask'], \
 85 |                                     data_dict['lidar_np']
 86 |         gt_boxes_valid = gt_boxes[gt_mask == 1]
 87 | 
 88 |         gt_boxes_valid, points = augment_utils.global_scaling(
 89 |             gt_boxes_valid, points, config['WORLD_SCALE_RANGE']
 90 |         )
 91 |         gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid
 92 | 
 93 |         data_dict['object_bbx_center'] = gt_boxes
 94 |         data_dict['object_bbx_mask'] = gt_mask
 95 |         data_dict['lidar_np'] = points
 96 | 
 97 |         return data_dict
 98 | 
 99 |     def forward(self, data_dict):
100 |         """
101 |         Args:
102 |             data_dict:
103 |                 points: (N, 3 + C_in)
104 |                 gt_boxes: optional, (N, 7) [x, y, z, dx, dy, dz, heading]
105 |                 gt_names: optional, (N), string
106 |                 ...
107 | 
108 |         Returns:
109 |         """
110 |         if self.train:
111 |             for cur_augmentor in self.data_augmentor_queue:
112 |                 data_dict = cur_augmentor(data_dict=data_dict)
113 | 
114 |         return data_dict
115 | 


--------------------------------------------------------------------------------
/v2xvit/models/point_pillar_fcooper.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | 
  3 | from v2xvit.models.sub_modules.pillar_vfe import PillarVFE
  4 | from v2xvit.models.sub_modules.point_pillar_scatter import PointPillarScatter
  5 | from v2xvit.models.sub_modules.base_bev_backbone import BaseBEVBackbone
  6 | from v2xvit.models.sub_modules.downsample_conv import DownsampleConv
  7 | from v2xvit.models.sub_modules.naive_compress import NaiveCompressor
  8 | from v2xvit.models.sub_modules.f_cooper_fuse import SpatialFusion
  9 | 
 10 | 
 11 | class PointPillarFCooper(nn.Module):
 12 |     def __init__(self, args):
 13 |         super(PointPillarFCooper, self).__init__()
 14 | 
 15 |         self.max_cav = args['max_cav']
 16 |         # PIllar VFE
 17 |         self.pillar_vfe = PillarVFE(args['pillar_vfe'],
 18 |                                     num_point_features=4,
 19 |                                     voxel_size=args['voxel_size'],
 20 |                                     point_cloud_range=args['lidar_range'])
 21 |         self.scatter = PointPillarScatter(args['point_pillar_scatter'])
 22 |         self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
 23 |         # used to downsample the feature map for efficient computation
 24 |         self.shrink_flag = False
 25 |         if 'shrink_header' in args:
 26 |             self.shrink_flag = True
 27 |             self.shrink_conv = DownsampleConv(args['shrink_header'])
 28 |         self.compression = False
 29 | 
 30 |         if args['compression'] > 0:
 31 |             self.compression = True
 32 |             self.naive_compressor = NaiveCompressor(256, args['compression'])
 33 | 
 34 |         self.fusion_net = SpatialFusion()
 35 | 
 36 |         self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'],
 37 |                                   kernel_size=1)
 38 |         self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'],
 39 |                                   kernel_size=1)
 40 | 
 41 |         if args['backbone_fix']:
 42 |             self.backbone_fix()
 43 | 
 44 |     def backbone_fix(self):
 45 |         """
 46 |         Fix the parameters of backbone during finetune on timedelay。
 47 |         """
 48 |         for p in self.pillar_vfe.parameters():
 49 |             p.requires_grad = False
 50 | 
 51 |         for p in self.scatter.parameters():
 52 |             p.requires_grad = False
 53 | 
 54 |         for p in self.backbone.parameters():
 55 |             p.requires_grad = False
 56 | 
 57 |         if self.compression:
 58 |             for p in self.naive_compressor.parameters():
 59 |                 p.requires_grad = False
 60 |         if self.shrink_flag:
 61 |             for p in self.shrink_conv.parameters():
 62 |                 p.requires_grad = False
 63 | 
 64 |         for p in self.cls_head.parameters():
 65 |             p.requires_grad = False
 66 |         for p in self.reg_head.parameters():
 67 |             p.requires_grad = False
 68 | 
 69 |     def forward(self, data_dict):
 70 |         voxel_features = data_dict['processed_lidar']['voxel_features']
 71 |         voxel_coords = data_dict['processed_lidar']['voxel_coords']
 72 |         voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
 73 |         record_len = data_dict['record_len']
 74 |         spatial_correction_matrix = data_dict['spatial_correction_matrix']
 75 | 
 76 |         batch_dict = {'voxel_features': voxel_features,
 77 |                       'voxel_coords': voxel_coords,
 78 |                       'voxel_num_points': voxel_num_points,
 79 |                       'record_len': record_len}
 80 |         # n, 4 -> n, c
 81 |         batch_dict = self.pillar_vfe(batch_dict)
 82 |         # n, c -> N, C, H, W
 83 |         batch_dict = self.scatter(batch_dict)
 84 |         batch_dict = self.backbone(batch_dict)
 85 | 
 86 |         spatial_features_2d = batch_dict['spatial_features_2d']
 87 |         # downsample feature to reduce memory
 88 |         if self.shrink_flag:
 89 |             spatial_features_2d = self.shrink_conv(spatial_features_2d)
 90 |         # compressor
 91 |         if self.compression:
 92 |             spatial_features_2d = self.naive_compressor(spatial_features_2d)
 93 | 
 94 |         fused_feature = self.fusion_net(spatial_features_2d, record_len)
 95 | 
 96 |         psm = self.cls_head(fused_feature)
 97 |         rm = self.reg_head(fused_feature)
 98 | 
 99 |         output_dict = {'psm': psm,
100 |                        'rm': rm}
101 | 
102 |         return output_dict
103 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/base_transformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | 
  4 | from einops import rearrange
  5 | 
  6 | 
  7 | class PreNormResidual(nn.Module):
  8 |     def __init__(self, dim, fn):
  9 |         super().__init__()
 10 |         self.norm = nn.LayerNorm(dim)
 11 |         self.fn = fn
 12 | 
 13 |     def forward(self, x, **kwargs):
 14 |         return self.fn(self.norm(x), **kwargs) + x
 15 | 
 16 | 
 17 | class PreNorm(nn.Module):
 18 |     def __init__(self, dim, fn):
 19 |         super().__init__()
 20 |         self.norm = nn.LayerNorm(dim)
 21 |         self.fn = fn
 22 | 
 23 |     def forward(self, x, **kwargs):
 24 |         return self.fn(self.norm(x), **kwargs)
 25 | 
 26 | 
 27 | class FeedForward(nn.Module):
 28 |     def __init__(self, dim, hidden_dim, dropout=0.):
 29 |         super().__init__()
 30 |         self.net = nn.Sequential(
 31 |             nn.Linear(dim, hidden_dim),
 32 |             nn.GELU(),
 33 |             nn.Dropout(dropout),
 34 |             nn.Linear(hidden_dim, dim),
 35 |             nn.Dropout(dropout)
 36 |         )
 37 | 
 38 |     def forward(self, x):
 39 |         return self.net(x)
 40 | 
 41 | 
 42 | class CavAttention(nn.Module):
 43 |     """
 44 |     Vanilla CAV attention.
 45 |     """
 46 |     def __init__(self, dim, heads, dim_head=64, dropout=0.1):
 47 |         super().__init__()
 48 |         inner_dim = heads * dim_head
 49 | 
 50 |         self.heads = heads
 51 |         self.scale = dim_head ** -0.5
 52 | 
 53 |         self.attend = nn.Softmax(dim=-1)
 54 |         self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
 55 | 
 56 |         self.to_out = nn.Sequential(
 57 |             nn.Linear(inner_dim, dim),
 58 |             nn.Dropout(dropout)
 59 |         )
 60 | 
 61 |     def forward(self, x, mask, prior_encoding):
 62 |         # x: (B, L, H, W, C) -> (B, H, W, L, C)
 63 |         # mask: (B, L)
 64 |         x = x.permute(0, 2, 3, 1, 4)
 65 |         # mask: (B, 1, H, W, L, 1)
 66 |         mask = mask.unsqueeze(1)
 67 | 
 68 |         # qkv: [(B, H, W, L, C_inner) *3]
 69 |         qkv = self.to_qkv(x).chunk(3, dim=-1)
 70 |         # q: (B, M, H, W, L, C)
 71 |         q, k, v = map(lambda t: rearrange(t, 'b h w l (m c) -> b m h w l c',
 72 |                                           m=self.heads), qkv)
 73 | 
 74 |         # attention, (B, M, H, W, L, L)
 75 |         att_map = torch.einsum('b m h w i c, b m h w j c -> b m h w i j',
 76 |                                q, k) * self.scale
 77 |         # add mask
 78 |         att_map = att_map.masked_fill(mask == 0, -float('inf'))
 79 |         # softmax
 80 |         att_map = self.attend(att_map)
 81 | 
 82 |         # out:(B, M, H, W, L, C_head)
 83 |         out = torch.einsum('b m h w i j, b m h w j c -> b m h w i c', att_map,
 84 |                            v)
 85 |         out = rearrange(out, 'b m h w l c -> b h w l (m c)',
 86 |                         m=self.heads)
 87 |         out = self.to_out(out)
 88 |         # (B L H W C)
 89 |         out = out.permute(0, 3, 1, 2, 4)
 90 |         return out
 91 | 
 92 | 
 93 | class BaseEncoder(nn.Module):
 94 |     def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
 95 |         super().__init__()
 96 |         self.layers = nn.ModuleList([])
 97 |         for _ in range(depth):
 98 |             self.layers.append(nn.ModuleList([
 99 |                 PreNorm(dim, CavAttention(dim,
100 |                                           heads=heads,
101 |                                           dim_head=dim_head,
102 |                                           dropout=dropout)),
103 |                 PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout))
104 |             ]))
105 | 
106 |     def forward(self, x, mask):
107 |         for attn, ff in self.layers:
108 |             x = attn(x, mask=mask) + x
109 |             x = ff(x) + x
110 |         return x
111 | 
112 | 
113 | class BaseTransformer(nn.Module):
114 |     def __init__(self, args):
115 |         super().__init__()
116 | 
117 |         dim = args['dim']
118 |         depth = args['depth']
119 |         heads = args['heads']
120 |         dim_head = args['dim_head']
121 |         mlp_dim = args['mlp_dim']
122 |         dropout = args['dropout']
123 |         max_cav = args['max_cav']
124 | 
125 |         self.encoder = BaseEncoder(dim, depth, heads, dim_head, mlp_dim,
126 |                                    dropout)
127 | 
128 |     def forward(self, x, mask):
129 |         # B, L, H, W, C
130 |         output = self.encoder(x, mask)
131 |         # B, H, W, C
132 |         output = output[:, 0]
133 | 
134 |         return output


--------------------------------------------------------------------------------
/v2xvit/models/point_pillar_opv2v.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | 
  3 | from v2xvit.models.sub_modules.pillar_vfe import PillarVFE
  4 | from v2xvit.models.sub_modules.point_pillar_scatter import PointPillarScatter
  5 | from v2xvit.models.sub_modules.base_bev_backbone import BaseBEVBackbone
  6 | from v2xvit.models.sub_modules.downsample_conv import DownsampleConv
  7 | from v2xvit.models.sub_modules.naive_compress import NaiveCompressor
  8 | from v2xvit.models.sub_modules.self_attn import AttFusion
  9 | 
 10 | 
 11 | class PointPillarOPV2V(nn.Module):
 12 |     def __init__(self, args):
 13 |         super(PointPillarOPV2V, self).__init__()
 14 | 
 15 |         self.max_cav = 5
 16 |         # PIllar VFE
 17 |         self.pillar_vfe = PillarVFE(args['pillar_vfe'],
 18 |                                     num_point_features=4,
 19 |                                     voxel_size=args['voxel_size'],
 20 |                                     point_cloud_range=args['lidar_range'])
 21 |         self.scatter = PointPillarScatter(args['point_pillar_scatter'])
 22 |         self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
 23 |         # used to downsample the feature map for efficient computation
 24 |         self.shrink_flag = False
 25 |         if 'shrink_header' in args:
 26 |             self.shrink_flag = True
 27 |             self.shrink_conv = DownsampleConv(args['shrink_header'])
 28 |         self.compression = False
 29 | 
 30 |         if args['compression'] > 0:
 31 |             self.compression = True
 32 |             self.naive_compressor = NaiveCompressor(256, args['compression'])
 33 | 
 34 |         self.fusion_net = AttFusion(256)
 35 | 
 36 |         self.cls_head = nn.Conv2d(192 * 2, args['anchor_number'],
 37 |                                   kernel_size=1)
 38 |         self.reg_head = nn.Conv2d(192 * 2, 7 * args['anchor_number'],
 39 |                                   kernel_size=1)
 40 | 
 41 |         if args['backbone_fix']:
 42 |             self.backbone_fix()
 43 | 
 44 |     def backbone_fix(self):
 45 |         """
 46 |         Fix the parameters of backbone during finetune on timedelay。
 47 |         """
 48 |         for p in self.pillar_vfe.parameters():
 49 |             p.requires_grad = False
 50 | 
 51 |         for p in self.scatter.parameters():
 52 |             p.requires_grad = False
 53 | 
 54 |         for p in self.backbone.parameters():
 55 |             p.requires_grad = False
 56 | 
 57 |         if self.compression:
 58 |             for p in self.naive_compressor.parameters():
 59 |                 p.requires_grad = False
 60 |         if self.shrink_flag:
 61 |             for p in self.shrink_conv.parameters():
 62 |                 p.requires_grad = False
 63 | 
 64 |         for p in self.cls_head.parameters():
 65 |             p.requires_grad = False
 66 |         for p in self.reg_head.parameters():
 67 |             p.requires_grad = False
 68 | 
 69 |     def forward(self, data_dict):
 70 |         voxel_features = data_dict['processed_lidar']['voxel_features']
 71 |         voxel_coords = data_dict['processed_lidar']['voxel_coords']
 72 |         voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
 73 |         record_len = data_dict['record_len']
 74 |         spatial_correction_matrix = data_dict['spatial_correction_matrix']
 75 | 
 76 |         # B, max_cav, 3(dt dv infra), 1, 1
 77 |         prior_encoding =\
 78 |             data_dict['prior_encoding'].unsqueeze(-1).unsqueeze(-1)
 79 | 
 80 |         batch_dict = {'voxel_features': voxel_features,
 81 |                       'voxel_coords': voxel_coords,
 82 |                       'voxel_num_points': voxel_num_points,
 83 |                       'record_len': record_len}
 84 |         # n, 4 -> n, c
 85 |         batch_dict = self.pillar_vfe(batch_dict)
 86 |         # n, c -> N, C, H, W
 87 |         batch_dict = self.scatter(batch_dict)
 88 |         batch_dict = self.backbone(batch_dict)
 89 | 
 90 |         spatial_features_2d = batch_dict['spatial_features_2d']
 91 |         # downsample feature to reduce memory
 92 |         if self.shrink_flag:
 93 |             spatial_features_2d = self.shrink_conv(spatial_features_2d)
 94 |         # compressor
 95 |         if self.compression:
 96 |             spatial_features_2d = self.naive_compressor(spatial_features_2d)
 97 | 
 98 |         fused_feature = self.fusion_net(spatial_features_2d, record_len)
 99 | 
100 |         psm = self.cls_head(fused_feature)
101 |         rm = self.reg_head(fused_feature)
102 | 
103 |         output_dict = {'psm': psm,
104 |                        'rm': rm}
105 | 
106 |         return output_dict
107 | 


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/where2comm_transformer_multiscale_resnet.yaml:
--------------------------------------------------------------------------------
  1 | name: opv2v_where2comm_transformer_multiscale_resnet
  2 | 
  3 | root_dir: '/data/gjm/OPV2V/train' # '/data2/gjm/v2xset/train'
  4 | validate_dir: '/data/gjm/OPV2V/validate' # '/data2/gjm/v2xset/validate'
  5 | 
  6 | wild_setting:
  7 |   async: False
  8 |   async_overhead: 100
  9 |   seed: 20
 10 |   loc_err: false
 11 |   xyz_std: 0.2
 12 |   ryp_std: 0.2
 13 |   data_size: 1.06 # Mb!!
 14 |   transmission_speed: 27 # Mbps!!
 15 |   backbone_delay: 10 # ms
 16 | 
 17 | noise_setting:
 18 |   add_noise: false
 19 |   args: 
 20 |     pos_std: 1
 21 |     rot_std: 0
 22 |     pos_mean: 0
 23 |     rot_mean: 0
 24 | 
 25 | yaml_parser: "load_point_pillar_params"
 26 | train_params:
 27 |   batch_size: &batch_size 4
 28 |   epoches: 100
 29 |   eval_freq: 2
 30 |   save_freq: 2
 31 |   max_cav: &max_cav 5
 32 | 
 33 | 
 34 | fusion:
 35 |   core_method: 'IntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
 36 |   args:
 37 |     cur_ego_pose_flag: true
 38 | # preprocess-related
 39 | preprocess:
 40 |   # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
 41 |   core_method: 'SpVoxelPreprocessor'
 42 |   args:
 43 |     voxel_size: &voxel_size [0.4, 0.4, 4]
 44 |     max_points_per_voxel: 32
 45 |     max_voxel_train: 32000
 46 |     max_voxel_test: 70000
 47 |   # lidar range for each individual cav. Format: xyzxyz minmax
 48 |   cav_lidar_range: &cav_lidar  [-140.8, -40, -3, 140.8, 40, 1]
 49 | 
 50 | data_augment:
 51 |   - NAME: random_world_flip
 52 |     ALONG_AXIS_LIST: [ 'x' ]
 53 | 
 54 |   - NAME: random_world_rotation
 55 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 56 | 
 57 |   - NAME: random_world_scaling
 58 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 59 | 
 60 | # anchor box related
 61 | postprocess:
 62 |   core_method: 'VoxelPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
 63 |   gt_range: *cav_lidar
 64 |   anchor_args:
 65 |     cav_lidar_range: *cav_lidar
 66 |     l: 3.9
 67 |     w: 1.6
 68 |     h: 1.56
 69 |     r: [0, 90]
 70 |     feature_stride: 2
 71 |     num: &achor_num 2
 72 |   target_args:
 73 |     pos_threshold: 0.6
 74 |     neg_threshold: 0.45
 75 |     score_threshold: 0.25
 76 |   order: 'hwl' # hwl or lwh
 77 |   max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
 78 |   nms_thresh: 0.15
 79 | 
 80 | # model related
 81 | model:
 82 |   core_method: point_pillar_where2comm
 83 |   
 84 |   args:
 85 |     communication:
 86 |       thre: 0.01
 87 |       gaussian_smooth:
 88 |         k_size: 5
 89 |         c_sigma: 1.0
 90 |     voxel_size: *voxel_size
 91 |     lidar_range: *cav_lidar
 92 |     anchor_number: *achor_num
 93 |     max_cav: *max_cav
 94 |     compression: 0 # compression rate
 95 |     backbone_fix: false
 96 | 
 97 |     pillar_vfe:
 98 |       use_norm: true
 99 |       with_distance: false
100 |       use_absolute_xyz: true
101 |       num_filters: [64]
102 |     point_pillar_scatter:
103 |       num_features: 64
104 | 
105 |     base_bev_backbone:
106 |       resnet: True
107 |       layer_nums: &layer_nums [3, 4, 5]
108 |       layer_strides: [2, 2, 2]
109 |       num_filters: &num_filters [64, 128, 256]
110 |       upsample_strides: [1, 2, 4]
111 |       num_upsample_filter: [128, 128, 128]
112 |       compression: 0
113 |       voxel_size: *voxel_size
114 |     shrink_header:
115 |       kernal_size: [ 3 ]
116 |       stride: [ 1 ]
117 |       padding: [ 1 ]
118 |       dim: [ 256 ]
119 |       input_dim: 384 # 128 * 3
120 |     
121 |     # dcn:
122 |     #   in_channels: [384]
123 |     #   out_channels: [256]
124 |     #   stride: [1]
125 |     #   padding: [1]
126 |     #   kernel_size : [3]
127 |     #   n_blocks: 1
128 | 
129 |     fusion_args:
130 |       voxel_size: *voxel_size
131 |       downsample_rate: 1
132 |       in_channels: 256
133 |       n_head: 8
134 |       dropout_rate: 0
135 |       only_attention: true
136 |       agg_operator:
137 |         # mode: 'SF'
138 |         mode: 'ATTEN'
139 |         # mode: 'Transformer'
140 |         feature_dim: 256
141 |         n_head: 8
142 |         with_spe: false
143 |         with_scm: false
144 |       multi_scale: true
145 |       layer_nums: *layer_nums
146 |       num_filters: *num_filters
147 |       batch_size:  *batch_size
148 | 
149 | loss:
150 |   core_method: point_pillar_loss
151 |   args:
152 |     cls_weight: 1.0
153 |     reg: 2.0
154 | 
155 | optimizer:
156 |   core_method: Adam
157 |   lr: 0.001
158 |   args:
159 |     eps: 1e-10
160 |     weight_decay: 1e-4
161 | 
162 | lr_scheduler:
163 |   core_method: multistep #step, multistep and Exponential support
164 |   gamma: 0.1
165 |   step_size: [10, 20]
166 | 
167 | 


--------------------------------------------------------------------------------
/v2xvit/models/point_pillar_single.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | 
  3 | from v2xvit.models.sub_modules.pillar_vfe import PillarVFE
  4 | from v2xvit.models.sub_modules.point_pillar_scatter import PointPillarScatter
  5 | from v2xvit.models.sub_modules.base_bev_backbone import BaseBEVBackbone
  6 | from v2xvit.models.sub_modules.downsample_conv import DownsampleConv
  7 | from v2xvit.models.sub_modules.naive_compress import NaiveCompressor
  8 | from v2xvit.models.sub_modules.f_cooper_fuse import SpatialFusion
  9 | import torch
 10 | 
 11 | 
 12 | 
 13 | class PointPillarSingle(nn.Module):
 14 |     def __init__(self, args):
 15 |         super(PointPillarSingle, self).__init__()
 16 | 
 17 |         self.max_cav = args['max_cav']
 18 |         # PIllar VFE
 19 |         self.pillar_vfe = PillarVFE(args['pillar_vfe'],
 20 |                                     num_point_features=4,
 21 |                                     voxel_size=args['voxel_size'],
 22 |                                     point_cloud_range=args['lidar_range'])
 23 |         self.scatter = PointPillarScatter(args['point_pillar_scatter'])
 24 |         self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
 25 |         # used to downsample the feature map for efficient computation
 26 |         self.shrink_flag = False
 27 |         if 'shrink_header' in args:
 28 |             self.shrink_flag = True
 29 |             self.shrink_conv = DownsampleConv(args['shrink_header'])
 30 |         self.compression = False
 31 | 
 32 |         if args['compression'] > 0:
 33 |             self.compression = True
 34 |             self.naive_compressor = NaiveCompressor(256, args['compression'])
 35 | 
 36 |         self.fusion_net = SpatialFusion()
 37 | 
 38 |         self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'],
 39 |                                   kernel_size=1)
 40 |         self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'],
 41 |                                   kernel_size=1)
 42 | 
 43 |         if args['backbone_fix']:
 44 |             self.backbone_fix()
 45 | 
 46 |     def backbone_fix(self):
 47 |         """
 48 |         Fix the parameters of backbone during finetune on timedelay。
 49 |         """
 50 |         for p in self.pillar_vfe.parameters():
 51 |             p.requires_grad = False
 52 | 
 53 |         for p in self.scatter.parameters():
 54 |             p.requires_grad = False
 55 | 
 56 |         for p in self.backbone.parameters():
 57 |             p.requires_grad = False
 58 | 
 59 |         if self.compression:
 60 |             for p in self.naive_compressor.parameters():
 61 |                 p.requires_grad = False
 62 |         if self.shrink_flag:
 63 |             for p in self.shrink_conv.parameters():
 64 |                 p.requires_grad = False
 65 | 
 66 |         for p in self.cls_head.parameters():
 67 |             p.requires_grad = False
 68 |         for p in self.reg_head.parameters():
 69 |             p.requires_grad = False
 70 | 
 71 |     def regroup(self, x, record_len):
 72 |         cum_sum_len = torch.cumsum(record_len, dim=0)
 73 |         split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
 74 |         return split_x
 75 | 
 76 |     def forward(self, data_dict):
 77 |         voxel_features = data_dict['processed_lidar']['voxel_features']
 78 |         voxel_coords = data_dict['processed_lidar']['voxel_coords']
 79 |         voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
 80 |         record_len = data_dict['record_len']
 81 |         spatial_correction_matrix = data_dict['spatial_correction_matrix']
 82 | 
 83 |         batch_dict = {'voxel_features': voxel_features,
 84 |                       'voxel_coords': voxel_coords,
 85 |                       'voxel_num_points': voxel_num_points,
 86 |                       'record_len': record_len}
 87 |         # n, 4 -> n, c
 88 |         batch_dict = self.pillar_vfe(batch_dict)
 89 |         # n, c -> N, C, H, W
 90 |         batch_dict = self.scatter(batch_dict)
 91 |         batch_dict = self.backbone(batch_dict)
 92 | 
 93 |         spatial_features_2d = batch_dict['spatial_features_2d']
 94 |         # print(spatial_features_2d.shape)
 95 |         # downsample feature to reduce memory
 96 |         if self.shrink_flag:
 97 |             spatial_features_2d = self.shrink_conv(spatial_features_2d)
 98 |         # # compressor
 99 |         if self.compression:
100 |             spatial_features_2d = self.naive_compressor(spatial_features_2d)
101 |         
102 | 
103 |         split_x = self.regroup(spatial_features_2d, record_len)
104 |         out = []
105 | 
106 |         for xx in split_x:
107 |             out.append(xx[0].unsqueeze(0))
108 |         fused_feature =  torch.cat(out, dim=0)
109 |         # print(fused_feature.shape)
110 |         # exit()
111 |         psm = self.cls_head(fused_feature)
112 |         rm = self.reg_head(fused_feature)
113 | 
114 |         output_dict = {'psm': psm,
115 |                        'rm': rm}
116 | 
117 |         return output_dict
118 | 


--------------------------------------------------------------------------------
/v2xvit/utils/eval_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | from v2xvit.utils import common_utils
  7 | from v2xvit.hypes_yaml import yaml_utils
  8 | 
  9 | 
 10 | def voc_ap(rec, prec):
 11 |     """
 12 |     VOC 2010 Average Precision.
 13 |     """
 14 |     rec.insert(0, 0.0)
 15 |     rec.append(1.0)
 16 |     mrec = rec[:]
 17 | 
 18 |     prec.insert(0, 0.0)
 19 |     prec.append(0.0)
 20 |     mpre = prec[:]
 21 | 
 22 |     for i in range(len(mpre) - 2, -1, -1):
 23 |         mpre[i] = max(mpre[i], mpre[i + 1])
 24 | 
 25 |     i_list = []
 26 |     for i in range(1, len(mrec)):
 27 |         if mrec[i] != mrec[i - 1]:
 28 |             i_list.append(i)
 29 | 
 30 |     ap = 0.0
 31 |     for i in i_list:
 32 |         ap += ((mrec[i] - mrec[i - 1]) * mpre[i])
 33 |     return ap, mrec, mpre
 34 | 
 35 | 
 36 | def caluclate_tp_fp(det_boxes, det_score, gt_boxes, result_stat, iou_thresh):
 37 |     """
 38 |     Calculate the true positive and false positive numbers of the current
 39 |     frames.
 40 | 
 41 |     Parameters
 42 |     ----------
 43 |     det_boxes : torch.Tensor
 44 |         The detection bounding box, shape (N, 8, 3) or (N, 4, 2).
 45 |     det_score :torch.Tensor
 46 |         The confidence score for each preditect bounding box.
 47 |     gt_boxes : torch.Tensor
 48 |         The groundtruth bounding box.
 49 |     result_stat: dict
 50 |         A dictionary contains fp, tp and gt number.
 51 |     iou_thresh : float
 52 |         The iou thresh.
 53 |     """
 54 |     # fp, tp and gt in the current frame
 55 |     fp = []
 56 |     tp = []
 57 |     gt = gt_boxes.shape[0]
 58 |     if det_boxes is not None:
 59 |         # convert bounding boxes to numpy array
 60 |         det_boxes = common_utils.torch_tensor_to_numpy(det_boxes)
 61 |         det_score = common_utils.torch_tensor_to_numpy(det_score)
 62 |         gt_boxes = common_utils.torch_tensor_to_numpy(gt_boxes)
 63 | 
 64 |         # sort the prediction bounding box by score
 65 |         score_order_descend = np.argsort(-det_score)
 66 |         det_polygon_list = list(common_utils.convert_format(det_boxes))
 67 |         gt_polygon_list = list(common_utils.convert_format(gt_boxes))
 68 | 
 69 |         # match prediction and gt bounding box
 70 |         for i in range(score_order_descend.shape[0]):
 71 |             det_polygon = det_polygon_list[score_order_descend[i]]
 72 |             ious = common_utils.compute_iou(det_polygon, gt_polygon_list)
 73 | 
 74 |             if len(gt_polygon_list) == 0 or np.max(ious) < iou_thresh:
 75 |                 fp.append(1)
 76 |                 tp.append(0)
 77 |                 continue
 78 | 
 79 |             fp.append(0)
 80 |             tp.append(1)
 81 | 
 82 |             gt_index = np.argmax(ious)
 83 |             gt_polygon_list.pop(gt_index)
 84 | 
 85 |     result_stat[iou_thresh]['fp'] += fp
 86 |     result_stat[iou_thresh]['tp'] += tp
 87 |     result_stat[iou_thresh]['gt'] += gt
 88 | 
 89 | 
 90 | def calculate_ap(result_stat, iou):
 91 |     """
 92 |     Calculate the average precision and recall, and save them into a txt.
 93 | 
 94 |     Parameters
 95 |     ----------
 96 |     result_stat : dict
 97 |         A dictionary contains fp, tp and gt number.
 98 |     iou : float
 99 |     """
100 |     iou_5 = result_stat[iou]
101 | 
102 |     fp = iou_5['fp']
103 |     tp = iou_5['tp']
104 |     assert len(fp) == len(tp)
105 | 
106 |     gt_total = iou_5['gt']
107 | 
108 |     cumsum = 0
109 |     for idx, val in enumerate(fp):
110 |         fp[idx] += cumsum
111 |         cumsum += val
112 | 
113 |     cumsum = 0
114 |     for idx, val in enumerate(tp):
115 |         tp[idx] += cumsum
116 |         cumsum += val
117 | 
118 |     rec = tp[:]
119 |     for idx, val in enumerate(tp):
120 |         rec[idx] = float(tp[idx]) / gt_total
121 | 
122 |     prec = tp[:]
123 |     for idx, val in enumerate(tp):
124 |         prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx])
125 | 
126 |     ap, mrec, mprec = voc_ap(rec[:], prec[:])
127 | 
128 |     return ap, mrec, mprec
129 | 
130 | 
131 | def eval_final_results(result_stat, save_path):
132 |     dump_dict = {}
133 | 
134 |     ap_30, mrec_30, mpre_30 = calculate_ap(result_stat, 0.30)
135 |     ap_50, mrec_50, mpre_50 = calculate_ap(result_stat, 0.50)
136 |     ap_70, mrec_70, mpre_70 = calculate_ap(result_stat, 0.70)
137 | 
138 |     dump_dict.update({'ap30': ap_30,
139 |                       'ap_50': ap_50,
140 |                       'ap_70': ap_70,
141 |                       'mpre_50': mpre_50,
142 |                       'mrec_50': mrec_50,
143 |                       'mpre_70': mpre_70,
144 |                       'mrec_70': mrec_70,
145 |                       })
146 |     yaml_utils.save_yaml(dump_dict, os.path.join(save_path, 'eval.yaml'))
147 | 
148 |     print('The Average Precision at IOU 0.3 is %.2f, '
149 |           'The Average Precision at IOU 0.5 is %.2f, '
150 |           'The Average Precision at IOU 0.7 is %.2f' % (ap_30, ap_50, ap_70))
151 |     return ap_30, ap_50, ap_70
152 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/mswin.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Multi-scale window transformer
  3 | """
  4 | import torch
  5 | import torch.nn as nn
  6 | import numpy as np
  7 | 
  8 | from einops import rearrange
  9 | from v2xvit.models.sub_modules.split_attn import SplitAttn
 10 | 
 11 | 
 12 | def get_relative_distances(window_size):
 13 |     indices = torch.tensor(np.array(
 14 |         [[x, y] for x in range(window_size) for y in range(window_size)]))
 15 |     distances = indices[None, :, :] - indices[:, None, :]
 16 |     return distances
 17 | 
 18 | 
 19 | class BaseWindowAttention(nn.Module):
 20 |     def __init__(self, dim, heads, dim_head, drop_out, window_size,
 21 |                  relative_pos_embedding):
 22 |         super().__init__()
 23 |         inner_dim = dim_head * heads
 24 | 
 25 |         self.heads = heads
 26 |         self.scale = dim_head ** -0.5
 27 |         self.window_size = window_size
 28 |         self.relative_pos_embedding = relative_pos_embedding
 29 | 
 30 |         self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
 31 | 
 32 |         if self.relative_pos_embedding:
 33 |             self.relative_indices = get_relative_distances(window_size) + \
 34 |                                     window_size - 1
 35 |             self.pos_embedding = nn.Parameter(torch.randn(2 * window_size - 1,
 36 |                                                           2 * window_size - 1))
 37 |         else:
 38 |             self.pos_embedding = nn.Parameter(torch.randn(window_size ** 2,
 39 |                                                           window_size ** 2))
 40 | 
 41 |         self.to_out = nn.Sequential(
 42 |             nn.Linear(inner_dim, dim),
 43 |             nn.Dropout(drop_out)
 44 |         )
 45 | 
 46 |     def forward(self, x):
 47 |         b, l, h, w, c, m = *x.shape, self.heads
 48 | 
 49 |         qkv = self.to_qkv(x).chunk(3, dim=-1)
 50 |         new_h = h // self.window_size
 51 |         new_w = w // self.window_size
 52 | 
 53 |         # q : (b, l, m, new_h*new_w, window_size^2, c_head)
 54 |         q, k, v = map(
 55 |             lambda t: rearrange(t,
 56 |                                 'b l (new_h w_h) (new_w w_w) (m c) -> b l m (new_h new_w) (w_h w_w) c',
 57 |                                 m=m, w_h=self.window_size,
 58 |                                 w_w=self.window_size), qkv)
 59 |         # b l m h window_size window_size
 60 |         dots = torch.einsum('b l m h i c, b l m h j c -> b l m h i j',
 61 |                             q, k, ) * self.scale
 62 |         # consider prior knowledge of the local window
 63 |         if self.relative_pos_embedding:
 64 |             dots += self.pos_embedding[self.relative_indices[:, :, 0],
 65 |                                        self.relative_indices[:, :, 1]]
 66 |         else:
 67 |             dots += self.pos_embedding
 68 | 
 69 |         attn = dots.softmax(dim=-1)
 70 | 
 71 |         out = torch.einsum('b l m h i j, b l m h j c -> b l m h i c', attn, v)
 72 |         # b l h w c
 73 |         out = rearrange(out,
 74 |                         'b l m (new_h new_w) (w_h w_w) c -> b l (new_h w_h) (new_w w_w) (m c)',
 75 |                         m=self.heads, w_h=self.window_size,
 76 |                         w_w=self.window_size,
 77 |                         new_w=new_w, new_h=new_h)
 78 |         out = self.to_out(out)
 79 | 
 80 |         return out
 81 | 
 82 | 
 83 | class PyramidWindowAttention(nn.Module):
 84 |     def __init__(self, dim, heads, dim_heads, drop_out, window_size,
 85 |                  relative_pos_embedding, fuse_method='naive'):
 86 |         super().__init__()
 87 | 
 88 |         assert isinstance(window_size, list)
 89 |         assert isinstance(heads, list)
 90 |         assert isinstance(dim_heads, list)
 91 |         assert len(dim_heads) == len(heads)
 92 | 
 93 |         self.pwmsa = nn.ModuleList([])
 94 | 
 95 |         for (head, dim_head, ws) in zip(heads, dim_heads, window_size):
 96 |             self.pwmsa.append(BaseWindowAttention(dim,
 97 |                                                   head,
 98 |                                                   dim_head,
 99 |                                                   drop_out,
100 |                                                   ws,
101 |                                                   relative_pos_embedding))
102 |         self.fuse_mehod = fuse_method
103 |         if fuse_method == 'split_attn':
104 |             self.split_attn = SplitAttn(256)
105 | 
106 |     def forward(self, x):
107 |         output = None
108 |         # naive fusion will just sum up all window attention output and do a
109 |         # mean
110 |         if self.fuse_mehod == 'naive':
111 |             for wmsa in self.pwmsa:
112 |                 output = wmsa(x) if output is None else output + wmsa(x)
113 |             return output / len(self.pwmsa)
114 | 
115 |         elif self.fuse_mehod == 'split_attn':
116 |             window_list = []
117 |             for wmsa in self.pwmsa:
118 |                 window_list.append(wmsa(x))
119 |             return self.split_attn(window_list)


--------------------------------------------------------------------------------
/v2xvit/models/fuse_modules/mswin.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Multi-scale window transformer
  3 | """
  4 | import torch
  5 | import torch.nn as nn
  6 | import numpy as np
  7 | 
  8 | from einops import rearrange
  9 | from opencood.models.sub_modules.split_attn import SplitAttn
 10 | 
 11 | 
 12 | def get_relative_distances(window_size):
 13 |     indices = torch.tensor(np.array(
 14 |         [[x, y] for x in range(window_size) for y in range(window_size)]))
 15 |     distances = indices[None, :, :] - indices[:, None, :]
 16 |     return distances
 17 | 
 18 | 
 19 | class BaseWindowAttention(nn.Module):
 20 |     def __init__(self, dim, heads, dim_head, drop_out, window_size,
 21 |                  relative_pos_embedding):
 22 |         super().__init__()
 23 |         inner_dim = dim_head * heads
 24 | 
 25 |         self.heads = heads
 26 |         self.scale = dim_head ** -0.5
 27 |         self.window_size = window_size
 28 |         self.relative_pos_embedding = relative_pos_embedding
 29 | 
 30 |         self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
 31 | 
 32 |         if self.relative_pos_embedding:
 33 |             self.relative_indices = get_relative_distances(window_size) + \
 34 |                                     window_size - 1
 35 |             self.pos_embedding = nn.Parameter(torch.randn(2 * window_size - 1,
 36 |                                                           2 * window_size - 1))
 37 |         else:
 38 |             self.pos_embedding = nn.Parameter(torch.randn(window_size ** 2,
 39 |                                                           window_size ** 2))
 40 | 
 41 |         self.to_out = nn.Sequential(
 42 |             nn.Linear(inner_dim, dim),
 43 |             nn.Dropout(drop_out)
 44 |         )
 45 | 
 46 |     def forward(self, x):
 47 |         b, l, h, w, c, m = *x.shape, self.heads
 48 | 
 49 |         qkv = self.to_qkv(x).chunk(3, dim=-1)
 50 |         new_h = h // self.window_size
 51 |         new_w = w // self.window_size
 52 | 
 53 |         # q : (b, l, m, new_h*new_w, window_size^2, c_head)
 54 |         q, k, v = map(
 55 |             lambda t: rearrange(t,
 56 |                                 'b l (new_h w_h) (new_w w_w) (m c) -> b l m (new_h new_w) (w_h w_w) c',
 57 |                                 m=m, w_h=self.window_size,
 58 |                                 w_w=self.window_size), qkv)
 59 |         # b l m h window_size window_size
 60 |         dots = torch.einsum('b l m h i c, b l m h j c -> b l m h i j',
 61 |                             q, k, ) * self.scale
 62 |         # consider prior knowledge of the local window
 63 |         if self.relative_pos_embedding:
 64 |             dots += self.pos_embedding[self.relative_indices[:, :, 0],
 65 |                                        self.relative_indices[:, :, 1]]
 66 |         else:
 67 |             dots += self.pos_embedding
 68 | 
 69 |         attn = dots.softmax(dim=-1)
 70 | 
 71 |         out = torch.einsum('b l m h i j, b l m h j c -> b l m h i c', attn, v)
 72 |         # b l h w c
 73 |         out = rearrange(out,
 74 |                         'b l m (new_h new_w) (w_h w_w) c -> b l (new_h w_h) (new_w w_w) (m c)',
 75 |                         m=self.heads, w_h=self.window_size,
 76 |                         w_w=self.window_size,
 77 |                         new_w=new_w, new_h=new_h)
 78 |         out = self.to_out(out)
 79 | 
 80 |         return out
 81 | 
 82 | 
 83 | class PyramidWindowAttention(nn.Module):
 84 |     def __init__(self, dim, heads, dim_heads, drop_out, window_size,
 85 |                  relative_pos_embedding, fuse_method='naive'):
 86 |         super().__init__()
 87 | 
 88 |         assert isinstance(window_size, list)
 89 |         assert isinstance(heads, list)
 90 |         assert isinstance(dim_heads, list)
 91 |         assert len(dim_heads) == len(heads)
 92 | 
 93 |         self.pwmsa = nn.ModuleList([])
 94 | 
 95 |         for (head, dim_head, ws) in zip(heads, dim_heads, window_size):
 96 |             self.pwmsa.append(BaseWindowAttention(dim,
 97 |                                                   head,
 98 |                                                   dim_head,
 99 |                                                   drop_out,
100 |                                                   ws,
101 |                                                   relative_pos_embedding))
102 |         self.fuse_mehod = fuse_method
103 |         if fuse_method == 'split_attn':
104 |             self.split_attn = SplitAttn(256)
105 | 
106 |     def forward(self, x):
107 |         output = None
108 |         # naive fusion will just sum up all window attention output and do a
109 |         # mean
110 |         if self.fuse_mehod == 'naive':
111 |             for wmsa in self.pwmsa:
112 |                 output = wmsa(x) if output is None else output + wmsa(x)
113 |             return output / len(self.pwmsa)
114 | 
115 |         elif self.fuse_mehod == 'split_attn':
116 |             window_list = []
117 |             for wmsa in self.pwmsa:
118 |                 window_list.append(wmsa(x))
119 |             return self.split_attn(window_list)


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/point_pillar_v2xvit.yaml:
--------------------------------------------------------------------------------
  1 | name: point_pillar_v2xvit
  2 | root_dir: '/data/opv2v/train'
  3 | validate_dir: '/data/opv2v/validate'
  4 | 
  5 | wild_setting:
  6 |   async: false
  7 |   async_mode: 'sim'
  8 |   async_overhead: 100
  9 |   seed: 25
 10 |   loc_err: false
 11 |   xyz_std: 0.2
 12 |   ryp_std: 0.2
 13 |   data_size: 1.06 # Mb!!
 14 |   transmission_speed: 27 # Mbps!!
 15 |   backbone_delay: 10 # ms
 16 | 
 17 | yaml_parser: "load_point_pillar_params"
 18 | train_params:
 19 |   batch_size: &batch_size 2
 20 |   epoches: 60
 21 |   eval_freq: 1
 22 |   save_freq: 1
 23 |   max_cav: &max_cav 5
 24 | 
 25 | fusion:
 26 |   core_method: 'IntermediateFusionDataset' # LateFusionDataset, EarlyFusionDataset, IntermediateFusionDataset supported
 27 |   args:
 28 |     cur_ego_pose_flag: False
 29 |       # when the cur_ego_pose_flag is set to True, there is no time gap
 30 |       # between  the time when the LiDAR data is captured by connected
 31 |       # agents and when the extracted features are received by
 32 |       # the ego vehicle, which is equal to implement STCM. When set to False,
 33 |       # STCM has to be used.
 34 | 
 35 | # preprocess-related
 36 | preprocess:
 37 |   # options: BasePreprocessor, VoxelPreprocessor, BevPreprocessor
 38 |   core_method: 'SpVoxelPreprocessor'
 39 |   args:
 40 |     voxel_size: &voxel_size [0.4, 0.4, 4]
 41 |     max_points_per_voxel: 32
 42 |     max_voxel_train: 32000
 43 |     max_voxel_test: 70000
 44 |   # lidar range for each individual cav.
 45 |   cav_lidar_range: &cav_lidar [-140.8, -38.4, -3, 140.8, 38.4, 1]
 46 | 
 47 | data_augment:
 48 |   - NAME: random_world_flip
 49 |     ALONG_AXIS_LIST: [ 'x' ]
 50 | 
 51 |   - NAME: random_world_rotation
 52 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 53 | 
 54 |   - NAME: random_world_scaling
 55 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 56 | 
 57 | # anchor box related
 58 | postprocess:
 59 |   core_method: 'VoxelPostprocessor' # VoxelPostprocessor, BevPostprocessor supported
 60 |   anchor_args:
 61 |     cav_lidar_range: *cav_lidar
 62 |     l: 3.9
 63 |     w: 1.6
 64 |     h: 1.56
 65 |     r: [0, 90]
 66 |     feature_stride: 4
 67 |     num: &achor_num 2
 68 |   target_args:
 69 |     pos_threshold: 0.6
 70 |     neg_threshold: 0.45
 71 |     score_threshold: 0.27
 72 |   order: 'hwl' # hwl or lwh
 73 |   max_num: 100 # maximum number of objects in a single frame. use this number to make sure different frames has the same dimension in the same batch
 74 |   nms_thresh: 0.15
 75 | 
 76 | # model related
 77 | model:
 78 |   core_method: point_pillar_transformer
 79 |   args:
 80 |     voxel_size: *voxel_size
 81 |     lidar_range: *cav_lidar
 82 |     anchor_number: *achor_num
 83 |     max_cav: *max_cav
 84 |     compression: 0 # compression rate
 85 |     backbone_fix: false
 86 | 
 87 |     pillar_vfe:
 88 |       use_norm: true
 89 |       with_distance: false
 90 |       use_absolute_xyz: true
 91 |       num_filters: [64]
 92 |     point_pillar_scatter:
 93 |       num_features: 64
 94 | 
 95 |     base_bev_backbone:
 96 |       layer_nums: [3, 5, 8]
 97 |       layer_strides: [2, 2, 2]
 98 |       num_filters: [64, 128, 256]
 99 |       upsample_strides: [1, 2, 4]
100 |       num_upsample_filter: [128, 128, 128]
101 |     shrink_header:
102 |       kernal_size: [3]
103 |       stride: [2]
104 |       padding: [1]
105 |       dim: [256]
106 |       input_dim: 384 # 128 * 3
107 | 
108 |     transformer:
109 |       encoder: &encoder
110 |         # number of fusion blocks per encoder layer
111 |         num_blocks: 1
112 |         # number of encoder layers
113 |         depth: 3
114 |         use_roi_mask: true
115 |         use_RTE: &use_RTE true
116 |         RTE_ratio: &RTE_ratio 2 # 2 means the dt has 100ms interval while 1 means 50 ms interval
117 |         # agent-wise attention
118 |         cav_att_config: &cav_att_config
119 |           dim: 256
120 |           use_hetero: true
121 |           use_RTE: *use_RTE
122 |           RTE_ratio: *RTE_ratio
123 |           heads: 8
124 |           dim_head: 32
125 |           dropout: 0.3
126 |         # spatial-wise attention
127 |         pwindow_att_config: &pwindow_att_config
128 |           dim: 256
129 |           heads: [16, 8, 4]
130 |           dim_head: [16, 32, 64]
131 |           dropout: 0.3
132 |           window_size: [4, 8, 16]
133 |           relative_pos_embedding: true
134 |           fusion_method: 'split_attn'
135 |         # feedforward condition
136 |         feed_forward: &feed_forward
137 |           mlp_dim: 256
138 |           dropout: 0.3
139 |         sttf: &sttf
140 |           voxel_size: *voxel_size
141 |           downsample_rate: 4
142 | 
143 |       # add decoder later
144 | 
145 | loss:
146 |   core_method: point_pillar_loss
147 |   args:
148 |     cls_weight: 1.0
149 |     reg: 2.0
150 | 
151 | optimizer:
152 |   core_method: Adam
153 |   lr: 0.001
154 |   args:
155 |     eps: 1e-10
156 |     weight_decay: 1e-4
157 | 
158 | lr_scheduler:
159 |   core_method: multistep #step, multistep and Exponential support
160 |   gamma: 0.1
161 |   step_size: [15, 50]
162 | 
163 | 


--------------------------------------------------------------------------------
/v2xvit/utils/common_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Common utilities
  3 | """
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | from shapely.geometry import Polygon
  8 | 
  9 | 
 10 | def check_numpy_to_torch(x):
 11 |     if isinstance(x, np.ndarray):
 12 |         return torch.from_numpy(x).float(), True
 13 |     return x, False
 14 | 
 15 | 
 16 | def check_contain_nan(x):
 17 |     if isinstance(x, dict):
 18 |         return any(check_contain_nan(v) for k, v in x.items())
 19 |     if isinstance(x, list):
 20 |         return any(check_contain_nan(itm) for itm in x)
 21 |     if isinstance(x, int) or isinstance(x, float):
 22 |         return False
 23 |     if isinstance(x, np.ndarray):
 24 |         return np.any(np.isnan(x))
 25 |     return torch.any(x.isnan()).detach().cpu().item()
 26 | 
 27 | 
 28 | def rotate_points_along_z(points, angle):
 29 |     """
 30 |     Args:
 31 |         points: (B, N, 3 + C)
 32 |         angle: (B), radians, angle along z-axis, angle increases x ==> y
 33 |     Returns:
 34 | 
 35 |     """
 36 |     points, is_numpy = check_numpy_to_torch(points)
 37 |     angle, _ = check_numpy_to_torch(angle)
 38 | 
 39 |     cosa = torch.cos(angle)
 40 |     sina = torch.sin(angle)
 41 |     zeros = angle.new_zeros(points.shape[0])
 42 |     ones = angle.new_ones(points.shape[0])
 43 |     rot_matrix = torch.stack((
 44 |         cosa, sina, zeros,
 45 |         -sina, cosa, zeros,
 46 |         zeros, zeros, ones
 47 |     ), dim=1).view(-1, 3, 3).float()
 48 |     points_rot = torch.matmul(points[:, :, 0:3].float(), rot_matrix)
 49 |     points_rot = torch.cat((points_rot, points[:, :, 3:]), dim=-1)
 50 |     return points_rot.numpy() if is_numpy else points_rot
 51 | 
 52 | 
 53 | def rotate_points_along_z_2d(points, angle):
 54 |     """
 55 |     Rorate the points along z-axis.
 56 |     Parameters
 57 |     ----------
 58 |     points : torch.Tensor / np.ndarray
 59 |         (N, 2).
 60 |     angle : torch.Tensor / np.ndarray
 61 |         (N,)
 62 | 
 63 |     Returns
 64 |     -------
 65 |     points_rot : torch.Tensor / np.ndarray
 66 |         Rorated points with shape (N, 2)
 67 | 
 68 |     """
 69 |     points, is_numpy = check_numpy_to_torch(points)
 70 |     angle, _ = check_numpy_to_torch(angle)
 71 |     cosa = torch.cos(angle)
 72 |     sina = torch.sin(angle)
 73 |     # (N, 2, 2)
 74 |     rot_matrix = torch.stack((cosa, sina, -sina, cosa), dim=1).view(-1, 2,
 75 |                                                                     2).float()
 76 |     points_rot = torch.einsum("ik, ikj->ij", points.float(), rot_matrix)
 77 |     return points_rot.numpy() if is_numpy else points_rot
 78 | 
 79 | 
 80 | def remove_ego_from_objects(objects, ego_id):
 81 |     """
 82 |     Avoid adding ego vehicle to the object dictionary.
 83 | 
 84 |     Parameters
 85 |     ----------
 86 |     objects : dict
 87 |         The dictionary contained all objects.
 88 | 
 89 |     ego_id : int
 90 |         Ego id.
 91 |     """
 92 |     if ego_id in objects:
 93 |         del objects[ego_id]
 94 | 
 95 | 
 96 | def retrieve_ego_id(base_data_dict):
 97 |     """
 98 |     Retrieve the ego vehicle id from sample(origin format).
 99 | 
100 |     Parameters
101 |     ----------
102 |     base_data_dict : dict
103 |         Data sample in origin format.
104 | 
105 |     Returns
106 |     -------
107 |     ego_id : str
108 |         The id of ego vehicle.
109 |     """
110 |     ego_id = None
111 | 
112 |     for cav_id, cav_content in base_data_dict.items():
113 |         if cav_content['ego']:
114 |             ego_id = cav_id
115 |             break
116 |     return ego_id
117 | 
118 | 
119 | def compute_iou(box, boxes):
120 |     """
121 |     Compute iou between box and boxes list
122 |     Parameters
123 |     ----------
124 |     box : shapely.geometry.Polygon
125 |         Bounding box Polygon.
126 | 
127 |     boxes : list
128 |         List of shapely.geometry.Polygon.
129 | 
130 |     Returns
131 |     -------
132 |     iou : np.ndarray
133 |         Array of iou between box and boxes.
134 | 
135 |     """
136 |     # Calculate intersection areas
137 |     iou = [box.intersection(b).area / box.union(b).area for b in boxes]
138 | 
139 |     return np.array(iou, dtype=np.float32)
140 | 
141 | 
142 | def convert_format(boxes_array):
143 |     """
144 |     Convert boxes array to shapely.geometry.Polygon format.
145 |     Parameters
146 |     ----------
147 |     boxes_array : np.ndarray
148 |         (N, 4, 2) or (N, 8, 3).
149 | 
150 |     Returns
151 |     -------
152 |         list of converted shapely.geometry.Polygon object.
153 | 
154 |     """
155 |     polygons = [Polygon([(box[i, 0], box[i, 1]) for i in range(4)]) for box in
156 |                 boxes_array]
157 |     return np.array(polygons)
158 | 
159 | 
160 | def torch_tensor_to_numpy(torch_tensor):
161 |     """
162 |     Convert a torch tensor to numpy.
163 | 
164 |     Parameters
165 |     ----------
166 |     torch_tensor : torch.Tensor
167 | 
168 |     Returns
169 |     -------
170 |     A numpy array.
171 |     """
172 |     return torch_tensor.numpy() if not torch_tensor.is_cuda else \
173 |         torch_tensor.cpu().detach().numpy()
174 | 


--------------------------------------------------------------------------------
/v2xvit/models/point_pillar_cobevt.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from einops import rearrange, repeat
  4 | 
  5 | from v2xvit.models.sub_modules.pillar_vfe import PillarVFE
  6 | from v2xvit.models.sub_modules.point_pillar_scatter import PointPillarScatter
  7 | from v2xvit.models.sub_modules.base_bev_backbone import BaseBEVBackbone
  8 | from v2xvit.models.sub_modules.downsample_conv import DownsampleConv
  9 | from v2xvit.models.sub_modules.naive_compress import NaiveCompressor
 10 | from v2xvit.models.fuse_modules.fuse_utils import regroup
 11 | from v2xvit.models.fuse_modules.swap_fusion_modules import \
 12 |     SwapFusionEncoder
 13 | 
 14 | class PointPillarCoBEVT(nn.Module):
 15 |     def __init__(self, args):
 16 |         super(PointPillarCoBEVT, self).__init__()
 17 | 
 18 |         self.max_cav = args['max_cav']
 19 |         # PIllar VFE
 20 |         self.pillar_vfe = PillarVFE(args['pillar_vfe'],
 21 |                                     num_point_features=4,
 22 |                                     voxel_size=args['voxel_size'],
 23 |                                     point_cloud_range=args['lidar_range'])
 24 |         self.scatter = PointPillarScatter(args['point_pillar_scatter'])
 25 |         self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
 26 |         # used to downsample the feature map for efficient computation
 27 |         self.shrink_flag = False
 28 |         if 'shrink_header' in args:
 29 |             self.shrink_flag = True
 30 |             self.shrink_conv = DownsampleConv(args['shrink_header'])
 31 |         self.compression = False
 32 | 
 33 |         if args['compression'] > 0:
 34 |             self.compression = True
 35 |             self.naive_compressor = NaiveCompressor(256, args['compression'])
 36 | 
 37 |         self.fusion_net = SwapFusionEncoder(args['fax_fusion'])
 38 | 
 39 |         self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'],
 40 |                                   kernel_size=1)
 41 |         self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'],
 42 |                                   kernel_size=1)
 43 | 
 44 |         if args['backbone_fix']:
 45 |             self.backbone_fix()
 46 | 
 47 |     def backbone_fix(self):
 48 |         """
 49 |         Fix the parameters of backbone during finetune on timedelay。
 50 |         """
 51 |         for p in self.pillar_vfe.parameters():
 52 |             p.requires_grad = False
 53 | 
 54 |         for p in self.scatter.parameters():
 55 |             p.requires_grad = False
 56 | 
 57 |         for p in self.backbone.parameters():
 58 |             p.requires_grad = False
 59 | 
 60 |         if self.compression:
 61 |             for p in self.naive_compressor.parameters():
 62 |                 p.requires_grad = False
 63 |         if self.shrink_flag:
 64 |             for p in self.shrink_conv.parameters():
 65 |                 p.requires_grad = False
 66 | 
 67 |         for p in self.cls_head.parameters():
 68 |             p.requires_grad = False
 69 |         for p in self.reg_head.parameters():
 70 |             p.requires_grad = False
 71 | 
 72 |     def forward(self, data_dict):
 73 |         voxel_features = data_dict['processed_lidar']['voxel_features']
 74 |         voxel_coords = data_dict['processed_lidar']['voxel_coords']
 75 |         voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
 76 |         record_len = data_dict['record_len']
 77 |         spatial_correction_matrix = data_dict['spatial_correction_matrix']
 78 | 
 79 |         batch_dict = {'voxel_features': voxel_features,
 80 |                       'voxel_coords': voxel_coords,
 81 |                       'voxel_num_points': voxel_num_points,
 82 |                       'record_len': record_len}
 83 |         # n, 4 -> n, c
 84 |         batch_dict = self.pillar_vfe(batch_dict)
 85 |         # n, c -> N, C, H, W
 86 |         batch_dict = self.scatter(batch_dict)
 87 |         batch_dict = self.backbone(batch_dict)
 88 | 
 89 |         spatial_features_2d = batch_dict['spatial_features_2d']
 90 |         # downsample feature to reduce memory
 91 |         if self.shrink_flag:
 92 |             spatial_features_2d = self.shrink_conv(spatial_features_2d)
 93 |         # compressor
 94 |         if self.compression:
 95 |             spatial_features_2d = self.naive_compressor(spatial_features_2d)
 96 | 
 97 |         # N, C, H, W -> B,  L, C, H, W
 98 |         regroup_feature, mask = regroup(spatial_features_2d,
 99 |                                         record_len,
100 |                                         self.max_cav)
101 |         com_mask = mask.unsqueeze(1).unsqueeze(2).unsqueeze(3)
102 |         com_mask = repeat(com_mask,
103 |                           'b h w c l -> b (h new_h) (w new_w) c l',
104 |                           new_h=regroup_feature.shape[3],
105 |                           new_w=regroup_feature.shape[4])
106 | 
107 |         fused_feature = self.fusion_net(regroup_feature, com_mask)
108 | 
109 |         psm = self.cls_head(fused_feature)
110 |         rm = self.reg_head(fused_feature)
111 | 
112 |         output_dict = {'psm': psm,
113 |                        'rm': rm}
114 | 
115 |         return output_dict
116 | 


--------------------------------------------------------------------------------
/v2xvit/models/point_pillar_v2vnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from v2xvit.models.sub_modules.pillar_vfe import PillarVFE
  5 | from v2xvit.models.sub_modules.point_pillar_scatter import PointPillarScatter
  6 | from v2xvit.models.sub_modules.base_bev_backbone import BaseBEVBackbone
  7 | from v2xvit.models.sub_modules.downsample_conv import DownsampleConv
  8 | from v2xvit.models.sub_modules.naive_compress import NaiveCompressor
  9 | from v2xvit.models.sub_modules.v2v_fuse import V2VNetFusion
 10 | 
 11 | 
 12 | class PointPillarV2VNet(nn.Module):
 13 |     def __init__(self, args):
 14 |         super(PointPillarV2VNet, self).__init__()
 15 | 
 16 |         self.max_cav = args['max_cav']
 17 |         # PIllar VFE
 18 |         self.pillar_vfe = PillarVFE(args['pillar_vfe'],
 19 |                                     num_point_features=4,
 20 |                                     voxel_size=args['voxel_size'],
 21 |                                     point_cloud_range=args['lidar_range'])
 22 |         self.scatter = PointPillarScatter(args['point_pillar_scatter'])
 23 |         self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
 24 |         # used to downsample the feature map for efficient computation
 25 |         self.shrink_flag = False
 26 |         if 'shrink_header' in args:
 27 |             self.shrink_flag = True
 28 |             self.shrink_conv = DownsampleConv(args['shrink_header'])
 29 |         self.compression = False
 30 | 
 31 |         if args['compression'] > 0:
 32 |             self.compression = True
 33 |             self.naive_compressor = NaiveCompressor(256, args['compression'])
 34 | 
 35 |         self.fusion_net = V2VNetFusion(args['v2vfusion'])
 36 | 
 37 |         self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'],
 38 |                                   kernel_size=1)
 39 |         self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'],
 40 |                                   kernel_size=1)
 41 | 
 42 |         if args['backbone_fix']:
 43 |             self.backbone_fix()
 44 | 
 45 |     def backbone_fix(self):
 46 |         """
 47 |         Fix the parameters of backbone during finetune on timedelay。
 48 |         """
 49 |         for p in self.pillar_vfe.parameters():
 50 |             p.requires_grad = False
 51 | 
 52 |         for p in self.scatter.parameters():
 53 |             p.requires_grad = False
 54 | 
 55 |         for p in self.backbone.parameters():
 56 |             p.requires_grad = False
 57 | 
 58 |         if self.compression:
 59 |             for p in self.naive_compressor.parameters():
 60 |                 p.requires_grad = False
 61 |         if self.shrink_flag:
 62 |             for p in self.shrink_conv.parameters():
 63 |                 p.requires_grad = False
 64 | 
 65 |         for p in self.cls_head.parameters():
 66 |             p.requires_grad = False
 67 |         for p in self.reg_head.parameters():
 68 |             p.requires_grad = False
 69 | 
 70 |     def unpad_prior_encoding(self, x, record_len):
 71 |         # remove padded zeros to form tensor with shape (N, 3)
 72 |         # x: (B, L, 3); record_len: (B)
 73 |         B = x.shape[0]
 74 |         out = []
 75 |         for i in range(B):
 76 |             # (valid_len, 3)
 77 |             out.append(x[i, :record_len[i], :])
 78 |         out = torch.cat(out, dim=0)
 79 |         # (N, 3)
 80 |         return out
 81 | 
 82 |     def forward(self, data_dict):
 83 |         voxel_features = data_dict['processed_lidar']['voxel_features']
 84 |         voxel_coords = data_dict['processed_lidar']['voxel_coords']
 85 |         voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
 86 |         record_len = data_dict['record_len']
 87 |         spatial_correction_matrix = data_dict['spatial_correction_matrix']
 88 |         pairwise_t_matrix = data_dict['pairwise_t_matrix']
 89 |         prior_encoding = data_dict['prior_encoding']
 90 |         prior_encoding = self.unpad_prior_encoding(prior_encoding, record_len)
 91 | 
 92 |         batch_dict = {'voxel_features': voxel_features,
 93 |                       'voxel_coords': voxel_coords,
 94 |                       'voxel_num_points': voxel_num_points,
 95 |                       'record_len': record_len}
 96 |         # n, 4 -> n, c
 97 |         batch_dict = self.pillar_vfe(batch_dict)
 98 |         # n, c -> N, C, H, W
 99 |         batch_dict = self.scatter(batch_dict)
100 |         batch_dict = self.backbone(batch_dict)
101 | 
102 |         spatial_features_2d = batch_dict['spatial_features_2d']
103 |         # downsample feature to reduce memory
104 |         if self.shrink_flag:
105 |             spatial_features_2d = self.shrink_conv(spatial_features_2d)
106 |         # compressor
107 |         if self.compression:
108 |             spatial_features_2d = self.naive_compressor(spatial_features_2d)
109 |         fused_feature = self.fusion_net(spatial_features_2d,
110 |                                         record_len,
111 |                                         pairwise_t_matrix,
112 |                                         prior_encoding)
113 | 
114 |         psm = self.cls_head(fused_feature)
115 |         rm = self.reg_head(fused_feature)
116 | 
117 |         output_dict = {'psm': psm,
118 |                        'rm': rm}
119 | 
120 |         return output_dict
121 | 


--------------------------------------------------------------------------------
/v2xvit/utils/box_overlaps.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Sergey Karayev
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | from cython.parallel import prange, parallel
 11 | 
 12 | 
 13 | DTYPE = np.float32
 14 | ctypedef float DTYPE_t
 15 | 
 16 | 
 17 | def bbox_overlaps(
 18 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 19 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 20 |     """
 21 |     Parameters
 22 |     ----------
 23 |     boxes: (N, 4) ndarray of float
 24 |     query_boxes: (K, 4) ndarray of float
 25 |     Returns
 26 |     -------
 27 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 28 |     """
 29 |     cdef unsigned int N = boxes.shape[0]
 30 |     cdef unsigned int K = query_boxes.shape[0]
 31 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
 32 |     cdef DTYPE_t iw, ih, box_area
 33 |     cdef DTYPE_t ua
 34 |     cdef unsigned int k, n
 35 |     for k in range(K):
 36 |         box_area = (
 37 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 38 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 39 |         )
 40 |         for n in range(N):
 41 |             iw = (
 42 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 43 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 44 |             )
 45 |             if iw > 0:
 46 |                 ih = (
 47 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 48 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 49 |                 )
 50 |                 if ih > 0:
 51 |                     ua = float(
 52 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
 53 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
 54 |                         box_area - iw * ih
 55 |                     )
 56 |                     overlaps[n, k] = iw * ih / ua
 57 |     return overlaps
 58 | 
 59 | def bbox_intersections(
 60 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 61 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 62 |     """
 63 |     For each query box compute the intersection ratio covered by boxes
 64 |     ----------
 65 |     Parameters
 66 |     ----------
 67 |     boxes: (N, 4) ndarray of float
 68 |     query_boxes: (K, 4) ndarray of float
 69 |     Returns
 70 |     -------
 71 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
 72 |     """
 73 |     cdef unsigned int N = boxes.shape[0]
 74 |     cdef unsigned int K = query_boxes.shape[0]
 75 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
 76 |     cdef DTYPE_t iw, ih, box_area
 77 |     cdef DTYPE_t ua
 78 |     cdef unsigned int k, n
 79 |     for k in range(K):
 80 |         box_area = (
 81 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 82 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 83 |         )
 84 |         for n in range(N):
 85 |             iw = (
 86 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 87 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 88 |             )
 89 |             if iw > 0:
 90 |                 ih = (
 91 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 92 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 93 |                 )
 94 |                 if ih > 0:
 95 |                     intersec[n, k] = iw * ih / box_area
 96 |     return intersec
 97 | 
 98 | # Compute bounding box voting
 99 | def box_vote(
100 |         np.ndarray[float, ndim=2] dets_NMS,
101 |         np.ndarray[float, ndim=2] dets_all):
102 |     cdef np.ndarray[float, ndim=2] dets_voted = np.zeros((dets_NMS.shape[0], dets_NMS.shape[1]), dtype=np.float32)
103 |     cdef unsigned int N = dets_NMS.shape[0]
104 |     cdef unsigned int M = dets_all.shape[0]
105 | 
106 |     cdef np.ndarray[float, ndim=1] det
107 |     cdef np.ndarray[float, ndim=1] acc_box
108 |     cdef float acc_score
109 | 
110 |     cdef np.ndarray[float, ndim=1] det2
111 |     cdef float bi0, bi1, bit2, bi3
112 |     cdef float iw, ih, ua
113 | 
114 |     cdef float thresh=0.5
115 | 
116 |     for i in range(N):
117 |         det = dets_NMS[i, :]
118 |         acc_box = np.zeros((4), dtype=np.float32)
119 |         acc_score = 0.0
120 | 
121 |         for m in range(M):
122 |             det2 = dets_all[m, :]
123 | 
124 |             bi0 = max(det[0], det2[0])
125 |             bi1 = max(det[1], det2[1])
126 |             bi2 = min(det[2], det2[2])
127 |             bi3 = min(det[3], det2[3])
128 | 
129 |             iw = bi2 - bi0 + 1
130 |             ih = bi3 - bi1 + 1
131 | 
132 |             if not (iw > 0 and ih > 0):
133 |                 continue
134 | 
135 |             ua = (det[2] - det[0] + 1) * (det[3] - det[1] + 1) + (det2[2] - det2[0] + 1) * (det2[3] - det2[1] + 1) - iw * ih
136 |             ov = iw * ih / ua
137 | 
138 |             if (ov < thresh):
139 |                 continue
140 | 
141 |             acc_box += det2[4] * det2[0:4]
142 |             acc_score += det2[4]
143 | 
144 |         dets_voted[i][0:4] = acc_box / acc_score
145 |         dets_voted[i][4] = det[4]       # Keep the original score
146 | 
147 |     return dets_voted
148 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # FeaCo
 2 | The official implementation of ACM MM2023 paper "FeaCo: Reaching Robust Feature-Level Consensus in Noisy Pose Conditions".
 3 | ![FeaCo_Overview](./images/Overview.png)
 4 | 
 5 | > [**FeaCo: Reaching Robust Feature-Level Consensus in Noisy Pose Conditions**](https://doi.org/10.1145/3581783.3611880),            
 6 | > Jiaming Gu\*, Jingyu Zhang\*, Muyang Zhang, Weiliang Meng, Shibiao Xu, Jiguang Zhang, Xiaopeng Zhang <br>
 7 | > *Accepted by ACM MM 2023*
 8 | 
 9 | # Abstract
10 | Collaborative perception offers a promising solution to overcome challenges such as occlusion and long-range data processing. However, limited sensor accuracy leads to noisy poses that misalign observations among vehicles. To address this problem, we propose the FeaCo, which achieves robust Feature-level Consensus among collaborating agents in noisy pose conditions without additional training. We design an efficient Pose-error Rectification Module (PRM) to align derived feature maps from different vehicles, reducing the adverse effect of noisy pose and bandwidth requirements. We also provide an effective multi-scale Cross-level Attention Module (CAM) to enhance information aggregation and interaction between various scales. Our FeaCo outperforms all other localization rectification methods, as validated on both the collaborative perception simulation dataset OPV2V and real-world dataset V2V4Real, reducing heading error and enhancing localization accuracy across various error levels.
11 | 
12 | <!-- # Note
13 | The code will be released after the publication of the subsequent work. -->
14 | 
15 | ## Installation
16 | ```bash
17 | # Setup conda environment
18 | conda create -f Env.yaml
19 | 
20 | conda activate opencood
21 | 
22 | # spconv 2.0 install, choose the correct cuda version for you
23 | pip install spconv-cu113
24 | 
25 | # Install dependencies
26 | pip install -r requirements.txt
27 | # Install bbx nms calculation cuda version
28 | python v2xvit/utils/setup.py build_ext --inplace
29 | 
30 | # install v2xvit into the environment
31 | python setup.py develop
32 | ```
33 | 
34 | ## Data Downloading
35 | All the data can be downloaded from [google drive](https://drive.google.com/drive/folders/1dkDeHlwOVbmgXcDazZvO6TFEZ6V_7WUu). If you have a good internet, you can directly
36 | download the complete large zip file such as `train.zip`. In case you suffer from downloading large files, we also split each data set into small chunks, which can be found 
37 | in the directory ending with `_chunks`, such as `train_chunks`. After downloading, please run the following command to each set to merge those chunks together:
38 | ```python
39 | cat train.zip.part* > train.zip
40 | unzip train.zip
41 | ```
42 | 
43 | ## Getting Started
44 | 
45 | ### Note:
46 | 
47 | - Models and parameters should be trained in perfect environment and tested in noisy environment.
48 | 
49 | ### Test with pretrained model
50 | To test the pretrained model of FeaCo, first download the model file from [google url](https://drive.google.com/drive/folders/1reQ7I3jNWRosjpEhVGSSKE2JoLwHIHa4?usp=sharing) and
51 | then put it under v2xvit/logs/opv2v_feaco. Change the `validate_path` in `v2xvit/logs/opv2v_feaco/config.yaml` as `/data/opv2v/test`.
52 | 
53 | To test under perfect setting, change `add_noise` to false in the v2xvit/logs/opv2v_feaco/config.yaml.
54 | 
55 | To test under noisy setting in our paper, change the `noise_settings` as followings:
56 | ```
57 | noise_setting:
58 |   add_noise: True
59 |   args: 
60 |     pos_std: 1
61 |     rot_std: 1
62 |     pos_mean: 0
63 |     rot_mean: 0
64 | ```
65 | Eventually, run the following command to perform test:
66 | ```python
67 | python v2xvit/tools/inference.py --model_dir ${CHECKPOINT_FOLDER}
68 | ```
69 | Arguments Explanation:
70 | - `model_dir`: the path of the checkpoints, e.g. 'v2xvit/logs/opv2v_feaco' for FeaCo testing.
71 | 
72 | ### Train your model
73 | FeaCo uses yaml file to configure all the parameters for training. To train your own model
74 | from scratch or a continued checkpoint, run the following commands:
75 | 
76 | ```python
77 | python v2xvit/tools/train.py --hypes_yaml ${CONFIG_FILE} [--model_dir  ${CHECKPOINT_FOLDER} --half]
78 | ```
79 | Arguments Explanation:
80 | - `hypes_yaml`: the path of the training configuration file, e.g. `v2xvit/hypes_yaml/where2comm_transformer_multiscale_resnet.yaml` for FeaCo training.
81 | - `model_dir` (optional) : the path of the checkpoints. This is used to fine-tune the trained models. When the `model_dir` is
82 | given, the trainer will discard the `hypes_yaml` and load the `config.yaml` in the checkpoint folder.
83 | - `half`(optional): if specified, hybrid-precision training will be used to save memory occupation.
84 | 
85 | ## Citation
86 |  If you are using our FeaCo for your research, please cite the following paper:
87 |  ```bibtex
88 | @inproceedings{gu2023feaco,
89 |   title={FeaCo: Reaching Robust Feature-Level Consensus in Noisy Pose Conditions},
90 |   author={Gu, Jiaming and Zhang, Jingyu and Zhang, Muyang and Meng, Weiliang and Xu, Shibiao and Zhang, Jiguang and Zhang, Xiaopeng},
91 |   booktitle={Proceedings of the 31st ACM International Conference on Multimedia},
92 |   pages={3628--3636},
93 |   year={2023}
94 | }
95 | ```
96 | 
97 | ## Acknowledgment
98 | FeaCo is built upon [OpenCOOD](https://github.com/DerrickXuNu/OpenCOOD) and [V2X-ViT](https://github.com/DerrickXuNu/v2x-vit). 
99 | 


--------------------------------------------------------------------------------
/v2xvit/models/point_pillar_when2com.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Author: Yue Hu <18671129361@sjtu.edu.cn>
  3 | # License: TDG-Attribution-NonCommercial-NoDistrib
  4 | 
  5 | 
  6 | import torch.nn as nn
  7 | import torch
  8 | 
  9 | from v2xvit.models.sub_modules.pillar_vfe import PillarVFE
 10 | from v2xvit.models.sub_modules.point_pillar_scatter import PointPillarScatter
 11 | from v2xvit.models.sub_modules.base_bev_backbone import BaseBEVBackbone
 12 | from v2xvit.models.sub_modules.downsample_conv import DownsampleConv
 13 | from v2xvit.models.sub_modules.naive_compress import NaiveCompressor
 14 | from v2xvit.models.sub_modules.when2com import When2comFusion
 15 | 
 16 | 
 17 | DEBUG = False
 18 | 
 19 | class PointPillarWhen2com(nn.Module):
 20 |     def __init__(self, args):
 21 |         super(PointPillarWhen2com, self).__init__()
 22 | 
 23 |         self.max_cav = args['max_cav']
 24 |         # PIllar VFE
 25 |         self.pillar_vfe = PillarVFE(args['pillar_vfe'],
 26 |                                     num_point_features=4,
 27 |                                     voxel_size=args['voxel_size'],
 28 |                                     point_cloud_range=args['lidar_range'])
 29 |         self.scatter = PointPillarScatter(args['point_pillar_scatter'])
 30 |         self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
 31 |         
 32 |         # used to downsample the feature map for efficient computation
 33 |         self.shrink_flag = False
 34 |         if 'shrink_header' in args:
 35 |             self.shrink_flag = True
 36 |             self.shrink_conv = DownsampleConv(args['shrink_header'])
 37 |         self.compression = False
 38 | 
 39 |         if args['compression'] > 0:
 40 |             self.compression = True
 41 |             self.naive_compressor = NaiveCompressor(256, args['compression'])
 42 |         
 43 |         self.fusion_net = When2comFusion(args['v2vfusion'])
 44 | 
 45 |         self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'],
 46 |                                   kernel_size=1)
 47 |         self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'],
 48 |                                   kernel_size=1)
 49 |         if args['backbone_fix']:
 50 |             self.backbone_fix()
 51 | 
 52 |     def backbone_fix(self):
 53 |         """
 54 |         Fix the parameters of backbone during finetune on timedelay。
 55 |         """
 56 |         for p in self.pillar_vfe.parameters():
 57 |             p.requires_grad = False
 58 | 
 59 |         for p in self.scatter.parameters():
 60 |             p.requires_grad = False
 61 | 
 62 |         for p in self.backbone.parameters():
 63 |             p.requires_grad = False
 64 | 
 65 |         if self.compression:
 66 |             for p in self.naive_compressor.parameters():
 67 |                 p.requires_grad = False
 68 |         if self.shrink_flag:
 69 |             for p in self.shrink_conv.parameters():
 70 |                 p.requires_grad = False
 71 | 
 72 |         for p in self.cls_head.parameters():
 73 |             p.requires_grad = False
 74 |         for p in self.reg_head.parameters():
 75 |             p.requires_grad = False
 76 | 
 77 |     def forward(self, data_dict):
 78 |         voxel_features = data_dict['processed_lidar']['voxel_features']
 79 |         voxel_coords = data_dict['processed_lidar']['voxel_coords']
 80 |         voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
 81 |         record_len = data_dict['record_len']
 82 |         # lidar_pose = data_dict['lidar_pose']  # [sum(cav), 6]
 83 | 
 84 |         pairwise_t_matrix = data_dict['pairwise_t_matrix']
 85 | 
 86 |         batch_dict = {'voxel_features': voxel_features,
 87 |                       'voxel_coords': voxel_coords,
 88 |                       'voxel_num_points': voxel_num_points,
 89 |                       'record_len': record_len}
 90 | 
 91 | 
 92 |         # n, 4 -> n, c
 93 |         batch_dict = self.pillar_vfe(batch_dict)
 94 |         # n, c -> N, C, H, W
 95 |         batch_dict = self.scatter(batch_dict)
 96 |         if DEBUG:
 97 |             origin_feature = torch.clone(batch_dict['spatial_features'])
 98 | 
 99 |         batch_dict = self.backbone(batch_dict)
100 |         # N, C, H', W'. [N, 256, 50, 176]
101 |         spatial_features_2d = batch_dict['spatial_features_2d']
102 | 
103 |         # downsample feature to reduce memory
104 |         if self.shrink_flag:
105 |             spatial_features_2d = self.shrink_conv(spatial_features_2d)
106 |         # compressor
107 |         if self.compression:
108 |             spatial_features_2d = self.naive_compressor(spatial_features_2d)
109 | 
110 |         # spatial_features_2d is [sum(cav_num), 256, 50, 176]
111 |         # output only contains ego
112 |         # [B, 256, 50, 176]
113 |         if DEBUG:
114 |             self.fusion_net.forward_debug(spatial_features_2d, origin_feature,record_len, pairwise_t_matrix)
115 |             raise
116 | 
117 | 
118 |         fused_feature = self.fusion_net(spatial_features_2d,
119 |                                         record_len,
120 |                                         pairwise_t_matrix)
121 | 
122 |         psm = self.cls_head(fused_feature)
123 |         rm = self.reg_head(fused_feature)
124 | 
125 |         output_dict = {'psm': psm,
126 |                        'rm': rm}
127 | 
128 |         return output_dict
129 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/pre_processor/voxel_preprocessor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Convert lidar to voxel
  3 | """
  4 | import sys
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from v2xvit.data_utils.pre_processor.base_preprocessor import \
 10 |     BasePreprocessor
 11 | 
 12 | 
 13 | class VoxelPreprocessor(BasePreprocessor):
 14 |     def __init__(self, preprocess_params, train):
 15 |         super(VoxelPreprocessor, self).__init__(preprocess_params, train)
 16 |         self.lidar_range = self.params['cav_lidar_range']
 17 | 
 18 |         self.vw = self.params['args']['vw']
 19 |         self.vh = self.params['args']['vh']
 20 |         self.vd = self.params['args']['vd']
 21 |         self.T = self.params['args']['T']
 22 | 
 23 |     def preprocess(self, pcd_np):
 24 |         """
 25 |         Preprocess the lidar points by  voxelization.
 26 | 
 27 |         Parameters
 28 |         ----------
 29 |         pcd_np : np.ndarray
 30 |             The raw lidar.
 31 | 
 32 |         Returns
 33 |         -------
 34 |         data_dict : the structured output dictionary.
 35 |         """
 36 |         data_dict = {}
 37 | 
 38 |         # calculate the voxel coordinates
 39 |         voxel_coords = ((pcd_np[:, :3] -
 40 |                          np.floor(np.array([self.lidar_range[0],
 41 |                                             self.lidar_range[1],
 42 |                                             self.lidar_range[2]])) / (
 43 |                              self.vw, self.vh, self.vd))).astype(np.int32)
 44 | 
 45 |         # convert to  (D, H, W) as the paper
 46 |         voxel_coords = voxel_coords[:, [2, 1, 0]]
 47 |         voxel_coords, inv_ind, voxel_counts = np.unique(voxel_coords, axis=0,
 48 |                                                         return_inverse=True,
 49 |                                                         return_counts=True)
 50 | 
 51 |         voxel_features = []
 52 | 
 53 |         for i in range(len(voxel_coords)):
 54 |             voxel = np.zeros((self.T, 7), dtype=np.float32)
 55 |             pts = pcd_np[inv_ind == i]
 56 |             if voxel_counts[i] > self.T:
 57 |                 pts = pts[:self.T, :]
 58 |                 voxel_counts[i] = self.T
 59 | 
 60 |             # augment the points
 61 |             voxel[:pts.shape[0], :] = np.concatenate((pts, pts[:, :3] -
 62 |                                                       np.mean(pts[:, :3], 0)),
 63 |                                                      axis=1)
 64 |             voxel_features.append(voxel)
 65 | 
 66 |         data_dict['voxel_features'] = np.array(voxel_features)
 67 |         data_dict['voxel_coords'] = voxel_coords
 68 | 
 69 |         return data_dict
 70 | 
 71 |     def collate_batch(self, batch):
 72 |         """
 73 |         Customized pytorch data loader collate function.
 74 | 
 75 |         Parameters
 76 |         ----------
 77 |         batch : list or dict
 78 |             List or dictionary.
 79 | 
 80 |         Returns
 81 |         -------
 82 |         processed_batch : dict
 83 |             Updated lidar batch.
 84 |         """
 85 | 
 86 |         if isinstance(batch, list):
 87 |             return self.collate_batch_list(batch)
 88 |         elif isinstance(batch, dict):
 89 |             return self.collate_batch_dict(batch)
 90 |         else:
 91 |             sys.exit('Batch has too be a list or a dictionarn')
 92 | 
 93 |     @staticmethod
 94 |     def collate_batch_list(batch):
 95 |         """
 96 |         Customized pytorch data loader collate function.
 97 | 
 98 |         Parameters
 99 |         ----------
100 |         batch : list
101 |             List of dictionary. Each dictionary represent a single frame.
102 | 
103 |         Returns
104 |         -------
105 |         processed_batch : dict
106 |             Updated lidar batch.
107 |         """
108 |         voxel_features = []
109 |         voxel_coords = []
110 | 
111 |         for i in range(len(batch)):
112 |             voxel_features.append(batch[i]['voxel_features'])
113 |             coords = batch[i]['voxel_coords']
114 |             voxel_coords.append(
115 |                 np.pad(coords, ((0, 0), (1, 0)),
116 |                        mode='constant', constant_values=i))
117 | 
118 |         voxel_features = torch.from_numpy(np.concatenate(voxel_features))
119 |         voxel_coords = torch.from_numpy(np.concatenate(voxel_coords))
120 | 
121 |         return {'voxel_features': voxel_features,
122 |                 'voxel_coords': voxel_coords}
123 | 
124 |     @staticmethod
125 |     def collate_batch_dict(batch: dict):
126 |         """
127 |         Collate batch if the batch is a dictionary,
128 |         eg: {'voxel_features': [feature1, feature2...., feature n]}
129 | 
130 |         Parameters
131 |         ----------
132 |         batch : dict
133 | 
134 |         Returns
135 |         -------
136 |         processed_batch : dict
137 |             Updated lidar batch.
138 |         """
139 |         voxel_features = \
140 |             torch.from_numpy(np.concatenate(batch['voxel_features']))
141 |         coords = batch['voxel_coords']
142 |         voxel_coords = []
143 | 
144 |         for i in range(len(coords)):
145 |             voxel_coords.append(
146 |                 np.pad(coords[i], ((0, 0), (1, 0)),
147 |                        mode='constant', constant_values=i))
148 |         voxel_coords = torch.from_numpy(np.concatenate(voxel_coords))
149 | 
150 |         return {'voxel_features': voxel_features,
151 |                 'voxel_coords': voxel_coords}
152 | 


--------------------------------------------------------------------------------
/v2xvit/visualization/simple_vis.py:
--------------------------------------------------------------------------------
  1 | from matplotlib import pyplot as plt
  2 | import numpy as np
  3 | 
  4 | import v2xvit.visualization.simple_plot3d.canvas_3d as canvas_3d
  5 | import v2xvit.visualization.simple_plot3d.canvas_bev as canvas_bev
  6 | 
  7 | def visualize(pred_box_tensor, gt_tensor, pcd, pc_range, save_path, method='3d', vis_gt_box=True, vis_pred_box=True, left_hand=False, uncertainty=None):
  8 |     """
  9 |     Visualize the prediction, ground truth with point cloud together.
 10 |     They may be flipped in y axis. Since carla is left hand coordinate, while kitti is right hand.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     pred_box_tensor : torch.Tensor
 15 |         (N, 8, 3) prediction.
 16 | 
 17 |     gt_tensor : torch.Tensor
 18 |         (N, 8, 3) groundtruth bbx
 19 | 
 20 |     pcd : torch.Tensor
 21 |         PointCloud, (N, 4).
 22 | 
 23 |     pc_range : list
 24 |         [xmin, ymin, zmin, xmax, ymax, zmax]
 25 | 
 26 |     save_path : str
 27 |         Save the visualization results to given path.
 28 | 
 29 |     dataset : BaseDataset
 30 |         opencood dataset object.
 31 | 
 32 |     method: str, 'bev' or '3d'
 33 | 
 34 |     """
 35 | 
 36 |     pc_range = [int(i) for i in pc_range]
 37 |     if isinstance(pcd, list):
 38 |         pcd_np = [x.cpu().numpy() for x in pcd]
 39 |     else:
 40 |         pcd_np = pcd.cpu().numpy()
 41 |     
 42 |     if pred_box_tensor == None:
 43 |         vis_pred_box = False
 44 |     # vis_gt_box = False
 45 |     
 46 |     if vis_pred_box:
 47 |         pred_box_np = pred_box_tensor.cpu().numpy()
 48 |         # pred_name = ['pred'] * pred_box_np.shape[0]
 49 |         pred_name = [''] * pred_box_np.shape[0]
 50 |         if uncertainty is not None:
 51 |             uncertainty_np = uncertainty.cpu().numpy()
 52 |             uncertainty_np = np.exp(uncertainty_np)
 53 |             d_a_square = 1.6**2 + 3.9**2
 54 |             
 55 |             if uncertainty_np.shape[1] == 3:
 56 |                 uncertainty_np[:,:2] *= d_a_square
 57 |                 uncertainty_np = np.sqrt(uncertainty_np) 
 58 |                 # yaw angle is in radian, it's the same in g2o SE2's setting.
 59 | 
 60 |                 pred_name = [f'x_u:{uncertainty_np[i,0]:.3f} y_u:{uncertainty_np[i,1]:.3f} a_u:{uncertainty_np[i,2]:.3f}' \
 61 |                                 for i in range(uncertainty_np.shape[0])]
 62 | 
 63 |             elif uncertainty_np.shape[1] == 2:
 64 |                 uncertainty_np[:,:2] *= d_a_square
 65 |                 uncertainty_np = np.sqrt(uncertainty_np) # yaw angle is in radian
 66 | 
 67 |                 pred_name = [f'x_u:{uncertainty_np[i,0]:.3f} y_u:{uncertainty_np[i,1]:3f}' \
 68 |                                 for i in range(uncertainty_np.shape[0])]
 69 | 
 70 |             elif uncertainty_np.shape[1] == 7:
 71 |                 uncertainty_np[:,:2] *= d_a_square
 72 |                 uncertainty_np = np.sqrt(uncertainty_np) # yaw angle is in radian
 73 | 
 74 |                 pred_name = [f'x_u:{uncertainty_np[i,0]:.3f} y_u:{uncertainty_np[i,1]:3f} a_u:{uncertainty_np[i,6]:3f}' \
 75 |                                 for i in range(uncertainty_np.shape[0])]                    
 76 | 
 77 |     if vis_gt_box:
 78 |         gt_box_np = gt_tensor.cpu().numpy()
 79 |         # gt_name = ['gt'] * gt_box_np.shape[0]
 80 |         gt_name = [''] * gt_box_np.shape[0]
 81 | 
 82 |     if method == 'bev':
 83 |         canvas = canvas_bev.Canvas_BEV_heading_right(canvas_shape=((pc_range[4]-pc_range[1])*10, (pc_range[3]-pc_range[0])*10),
 84 |                                         canvas_x_range=(pc_range[0], pc_range[3]), 
 85 |                                         canvas_y_range=(pc_range[1], pc_range[4]),
 86 |                                         left_hand=left_hand
 87 |                                         ) 
 88 | 
 89 |         canvas_xy, valid_mask = canvas.get_canvas_coords(pcd_np) # Get Canvas Coords
 90 |         canvas.draw_canvas_points(canvas_xy[valid_mask])
 91 |         # color_list = [(0, 206, 209),(255, 215,0)]
 92 |         # for i, pcd_np_t in enumerate(pcd_np[1:2]):
 93 |         #     canvas_xy, valid_mask = canvas.get_canvas_coords(pcd_np_t) # Get Canvas Coords
 94 |         #     canvas.draw_canvas_points(canvas_xy[valid_mask], colors=color_list[i-1]) # Only draw valid points
 95 |         box_line_thickness = 5
 96 |         if vis_gt_box:
 97 |             # canvas.draw_boxes(gt_box_np,colors=(0,255,0), texts=gt_name)
 98 |             canvas.draw_boxes(gt_box_np,colors=(0,255,0), texts=gt_name, box_line_thickness=box_line_thickness)
 99 |         
100 |         if vis_pred_box:
101 |             canvas.draw_boxes(pred_box_np, colors=(255,0,0), texts=pred_name, box_line_thickness=box_line_thickness)
102 | 
103 |     elif method == '3d':
104 |         canvas = canvas_3d.Canvas_3D(left_hand=left_hand)
105 |         canvas_xy, valid_mask = canvas.get_canvas_coords(pcd_np)
106 |         canvas.draw_canvas_points(canvas_xy[valid_mask])
107 |         
108 |         if vis_gt_box:
109 |             canvas.draw_boxes(gt_box_np,colors=(0,255,0), texts=gt_name)
110 |         if vis_pred_box:
111 |             canvas.draw_boxes(pred_box_np, colors=(255,0,0), texts=pred_name)
112 |     else:
113 |         raise(f"Not Completed for f{method} visualization.")
114 | 
115 |     plt.axis("off")
116 | 
117 |     plt.imshow(canvas.canvas)
118 | 
119 |     plt.tight_layout()
120 |     plt.savefig(save_path, transparent=False, dpi=400, pad_inches=0.0)
121 |     plt.clf()
122 |     # print(save_path)
123 | 


--------------------------------------------------------------------------------
/v2xvit/models/point_pillar_transformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from v2xvit.models.sub_modules.pillar_vfe import PillarVFE
  5 | from v2xvit.models.sub_modules.point_pillar_scatter import PointPillarScatter
  6 | from v2xvit.models.sub_modules.base_bev_backbone import BaseBEVBackbone
  7 | from v2xvit.models.sub_modules.fuse_utils import regroup
  8 | from v2xvit.models.sub_modules.downsample_conv import DownsampleConv
  9 | from v2xvit.models.sub_modules.naive_compress import NaiveCompressor
 10 | from v2xvit.models.sub_modules.v2xvit_basic import V2XTransformer
 11 | 
 12 | 
 13 | class PointPillarTransformer(nn.Module):
 14 |     def __init__(self, args):
 15 |         super(PointPillarTransformer, self).__init__()
 16 | 
 17 |         self.max_cav = args['max_cav']
 18 |         # PIllar VFE
 19 |         self.pillar_vfe = PillarVFE(args['pillar_vfe'],
 20 |                                     num_point_features=4,
 21 |                                     voxel_size=args['voxel_size'],
 22 |                                     point_cloud_range=args['lidar_range'])
 23 |         self.scatter = PointPillarScatter(args['point_pillar_scatter'])
 24 |         self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
 25 |         # used to downsample the feature map for efficient computation
 26 |         self.shrink_flag = False
 27 |         if 'shrink_header' in args:
 28 |             self.shrink_flag = True
 29 |             self.shrink_conv = DownsampleConv(args['shrink_header'])
 30 |         self.compression = False
 31 | 
 32 |         if args['compression'] > 0:
 33 |             self.compression = True
 34 |             self.naive_compressor = NaiveCompressor(256, args['compression'])
 35 | 
 36 |         self.fusion_net = V2XTransformer(args['transformer'])
 37 | 
 38 |         self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'],
 39 |                                   kernel_size=1)
 40 |         self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'],
 41 |                                   kernel_size=1)
 42 | 
 43 |         if args['backbone_fix']:
 44 |             self.backbone_fix()
 45 | 
 46 |     def backbone_fix(self):
 47 |         """
 48 |         Fix the parameters of backbone during finetune on timedelay。
 49 |         """
 50 |         for p in self.pillar_vfe.parameters():
 51 |             p.requires_grad = False
 52 | 
 53 |         for p in self.scatter.parameters():
 54 |             p.requires_grad = False
 55 | 
 56 |         for p in self.backbone.parameters():
 57 |             p.requires_grad = False
 58 | 
 59 |         if self.compression:
 60 |             for p in self.naive_compressor.parameters():
 61 |                 p.requires_grad = False
 62 |         if self.shrink_flag:
 63 |             for p in self.shrink_conv.parameters():
 64 |                 p.requires_grad = False
 65 | 
 66 |         for p in self.cls_head.parameters():
 67 |             p.requires_grad = False
 68 |         for p in self.reg_head.parameters():
 69 |             p.requires_grad = False
 70 | 
 71 |     def forward(self, data_dict):
 72 |         voxel_features = data_dict['processed_lidar']['voxel_features']
 73 |         voxel_coords = data_dict['processed_lidar']['voxel_coords']
 74 |         voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
 75 |         record_len = data_dict['record_len']
 76 |         spatial_correction_matrix = data_dict['spatial_correction_matrix']
 77 | 
 78 |         # B, max_cav, 3(dt dv infra), 1, 1
 79 |         prior_encoding =\
 80 |             data_dict['prior_encoding'].unsqueeze(-1).unsqueeze(-1)
 81 | 
 82 |         batch_dict = {'voxel_features': voxel_features,
 83 |                       'voxel_coords': voxel_coords,
 84 |                       'voxel_num_points': voxel_num_points,
 85 |                       'record_len': record_len}
 86 |         # n, 4 -> n, c
 87 |         batch_dict = self.pillar_vfe(batch_dict)
 88 |         # n, c -> N, C, H, W
 89 |         batch_dict = self.scatter(batch_dict)
 90 |         batch_dict = self.backbone(batch_dict)
 91 | 
 92 |         spatial_features_2d = batch_dict['spatial_features_2d']
 93 |         # downsample feature to reduce memory
 94 |         if self.shrink_flag:
 95 |             spatial_features_2d = self.shrink_conv(spatial_features_2d)
 96 |         # compressor
 97 |         if self.compression:
 98 |             spatial_features_2d = self.naive_compressor(spatial_features_2d)
 99 |         # N, C, H, W -> B,  L, C, H, W
100 |         regroup_feature, mask = regroup(spatial_features_2d,
101 |                                         record_len,
102 |                                         self.max_cav)
103 |         # prior encoding added
104 |         prior_encoding = prior_encoding.repeat(1, 1, 1,
105 |                                                regroup_feature.shape[3],
106 |                                                regroup_feature.shape[4])
107 |         regroup_feature = torch.cat([regroup_feature, prior_encoding], dim=2)
108 | 
109 |         # b l c h w -> b l h w c
110 |         regroup_feature = regroup_feature.permute(0, 1, 3, 4, 2)
111 |         # transformer fusion
112 |         fused_feature = self.fusion_net(regroup_feature, mask, spatial_correction_matrix)
113 |         # b h w c -> b c h w
114 |         fused_feature = fused_feature.permute(0, 3, 1, 2)
115 | 
116 |         psm = self.cls_head(fused_feature)
117 |         rm = self.reg_head(fused_feature)
118 | 
119 |         output_dict = {'psm': psm,
120 |                        'rm': rm}
121 | 
122 |         return output_dict
123 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/pre_processor/sp_voxel_preprocessor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Transform points to voxels using sparse conv library
  3 | """
  4 | import sys
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | from cumm import tensorview as tv
  9 | from spconv.utils import Point2VoxelCPU3d
 10 | 
 11 | from v2xvit.data_utils.pre_processor.base_preprocessor import \
 12 |     BasePreprocessor
 13 | 
 14 | 
 15 | class SpVoxelPreprocessor(BasePreprocessor):
 16 |     def __init__(self, preprocess_params, train):
 17 |         super(SpVoxelPreprocessor, self).__init__(preprocess_params,
 18 |                                                   train)
 19 | 
 20 |         self.lidar_range = self.params['cav_lidar_range']
 21 |         self.voxel_size = self.params['args']['voxel_size']
 22 |         self.max_points_per_voxel = self.params['args']['max_points_per_voxel']
 23 | 
 24 |         if train:
 25 |             self.max_voxels = self.params['args']['max_voxel_train']
 26 |         else:
 27 |             self.max_voxels = self.params['args']['max_voxel_test']
 28 | 
 29 |         grid_size = (np.array(self.lidar_range[3:6]) -
 30 |                      np.array(self.lidar_range[0:3])) / np.array(self.voxel_size)
 31 |         self.grid_size = np.round(grid_size).astype(np.int64)
 32 | 
 33 |         # use sparse conv library to generate voxel
 34 |         self.voxel_generator = Point2VoxelCPU3d(
 35 |             vsize_xyz=self.voxel_size,
 36 |             coors_range_xyz=self.lidar_range,
 37 |             max_num_points_per_voxel=self.max_points_per_voxel,
 38 |             num_point_features=4,
 39 |             max_num_voxels=self.max_voxels
 40 |         )
 41 | 
 42 |     def preprocess(self, pcd_np):
 43 |         data_dict = {}
 44 |         pcd_tv = tv.from_numpy(pcd_np)
 45 |         voxel_output = self.voxel_generator.point_to_voxel(pcd_tv)
 46 |         if isinstance(voxel_output, dict):
 47 |             voxels, coordinates, num_points = \
 48 |                 voxel_output['voxels'], voxel_output['coordinates'], \
 49 |                 voxel_output['num_points_per_voxel']
 50 |         else:
 51 |             voxels, coordinates, num_points = voxel_output
 52 | 
 53 |         data_dict['voxel_features'] = voxels.numpy()
 54 |         data_dict['voxel_coords'] = coordinates.numpy()
 55 |         data_dict['voxel_num_points'] = num_points.numpy()
 56 | 
 57 |         return data_dict
 58 | 
 59 |     def collate_batch(self, batch):
 60 |         """
 61 |         Customized pytorch data loader collate function.
 62 | 
 63 |         Parameters
 64 |         ----------
 65 |         batch : list or dict
 66 |             List or dictionary.
 67 | 
 68 |         Returns
 69 |         -------
 70 |         processed_batch : dict
 71 |             Updated lidar batch.
 72 |         """
 73 | 
 74 |         if isinstance(batch, list):
 75 |             return self.collate_batch_list(batch)
 76 |         elif isinstance(batch, dict):
 77 |             return self.collate_batch_dict(batch)
 78 |         else:
 79 |             sys.exit('Batch has too be a list or a dictionarn')
 80 | 
 81 |     @staticmethod
 82 |     def collate_batch_list(batch):
 83 |         """
 84 |         Customized pytorch data loader collate function.
 85 | 
 86 |         Parameters
 87 |         ----------
 88 |         batch : list
 89 |             List of dictionary. Each dictionary represent a single frame.
 90 | 
 91 |         Returns
 92 |         -------
 93 |         processed_batch : dict
 94 |             Updated lidar batch.
 95 |         """
 96 |         voxel_features = []
 97 |         voxel_num_points = []
 98 |         voxel_coords = []
 99 | 
100 |         for i in range(len(batch)):
101 |             voxel_features.append(batch[i]['voxel_features'])
102 |             voxel_num_points.append(batch[i]['voxel_num_points'])
103 |             coords = batch[i]['voxel_coords']
104 |             voxel_coords.append(
105 |                 np.pad(coords, ((0, 0), (1, 0)),
106 |                        mode='constant', constant_values=i))
107 | 
108 |         voxel_num_points = torch.from_numpy(np.concatenate(voxel_num_points))
109 |         voxel_features = torch.from_numpy(np.concatenate(voxel_features))
110 |         voxel_coords = torch.from_numpy(np.concatenate(voxel_coords))
111 | 
112 |         return {'voxel_features': voxel_features,
113 |                 'voxel_coords': voxel_coords,
114 |                 'voxel_num_points': voxel_num_points}
115 | 
116 |     @staticmethod
117 |     def collate_batch_dict(batch: dict):
118 |         """
119 |         Collate batch if the batch is a dictionary,
120 |         eg: {'voxel_features': [feature1, feature2...., feature n]}
121 | 
122 |         Parameters
123 |         ----------
124 |         batch : dict
125 | 
126 |         Returns
127 |         -------
128 |         processed_batch : dict
129 |             Updated lidar batch.
130 |         """
131 |         voxel_features = \
132 |             torch.from_numpy(np.concatenate(batch['voxel_features']))
133 |         voxel_num_points = \
134 |             torch.from_numpy(np.concatenate(batch['voxel_num_points']))
135 |         coords = batch['voxel_coords']
136 |         voxel_coords = []
137 | 
138 |         for i in range(len(coords)):
139 |             voxel_coords.append(
140 |                 np.pad(coords[i], ((0, 0), (1, 0)),
141 |                        mode='constant', constant_values=i))
142 |         voxel_coords = torch.from_numpy(np.concatenate(voxel_coords))
143 | 
144 |         return {'voxel_features': voxel_features,
145 |                 'voxel_coords': voxel_coords,
146 |                 'voxel_num_points': voxel_num_points}
147 | 


--------------------------------------------------------------------------------