├── .gitignore
├── LICENSE
├── README.md
├── image.png
├── requirements.txt
├── setup.py
└── v2xvit
    ├── __init__.py
    ├── data_utils
        ├── __init__.py
        ├── augmentor
        │   ├── __init__.py
        │   ├── augment_utils.py
        │   └── data_augmentor.py
        ├── datasets
        │   ├── __init__.py
        │   ├── basedataset.py
        │   ├── early_fusion_dataset.py
        │   ├── early_fusion_vis_dataset.py
        │   ├── intermediate_fusion_dataset.py
        │   └── late_fusion_dataset.py
        ├── post_processor
        │   ├── __init__.py
        │   ├── base_postprocessor.py
        │   ├── bev_postprocessor.py
        │   └── voxel_postprocessor.py
        └── pre_processor
        │   ├── __init__.py
        │   ├── base_preprocessor.py
        │   ├── bev_preprocessor.py
        │   ├── sp_voxel_preprocessor.py
        │   └── voxel_preprocessor.py
    ├── hypes_yaml
        ├── __init__.py
        ├── how2comm
        │   └── v2xset_how2comm_stcformer.yaml
        └── yaml_utils.py
    ├── loss
        ├── __init__.py
        ├── pixor_loss.py
        ├── point_pillar_loss.py
        └── voxel_net_loss.py
    ├── models
        ├── __init__.py
        ├── comm_modules
        │   ├── communication.py
        │   └── mutual_communication.py
        ├── fuse_modules
        │   ├── __init__.py
        │   ├── fuse_utils.py
        │   ├── how2comm_deformable.py
        │   ├── how2comm_deformable_transformer.py
        │   └── stcformer.py
        ├── point_pillar_how2comm.py
        └── sub_modules
        │   ├── __init__.py
        │   ├── base_bev_backbone.py
        │   ├── base_bev_backbone_resnet.py
        │   ├── base_transformer.py
        │   ├── downsample_conv.py
        │   ├── feature_flow.py
        │   ├── fuse_utils.py
        │   ├── how2comm_preprocess.py
        │   ├── naive_compress.py
        │   ├── pillar_vfe.py
        │   ├── point_pillar_scatter.py
        │   ├── resblock.py
        │   ├── self_attn.py
        │   └── torch_transformation_utils.py
    ├── tools
        ├── __init__.py
        ├── debug_utils.py
        ├── inference.py
        ├── infrence_utils.py
        ├── multi_gpu_utils.py
        ├── train.py
        └── train_utils.py
    ├── utils
        ├── __init__.py
        ├── box_overlaps.pyx
        ├── box_utils.py
        ├── common_utils.py
        ├── eval_utils.py
        ├── pcd_utils.py
        ├── setup.py
        └── transformation_utils.py
    ├── version.py
    └── visualization
        ├── __init__.py
        ├── pinhole_param.json
        ├── vis_data_sequence.py
        └── vis_utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | v2xset/
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | pip-wheel-metadata/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # pipenv
 89 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 90 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 91 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 92 | #   install all needed dependencies.
 93 | #Pipfile.lock
 94 | 
 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 96 | __pypackages__/
 97 | 
 98 | # Celery stuff
 99 | celerybeat-schedule
100 | celerybeat.pid
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | logs/
132 | *.c
133 | *.so
134 | .idea
135 | opv2x
136 | .DS_Store
137 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Dicken
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # How2comm: Communication-Efficient and Collaboration-Pragmatic Multi-Agent Perception
  2 | 
  3 | The official repository of the NeurIPS2023 paper:
  4 | 
  5 | ![teaser](image.png)
  6 | 
  7 | > [**How2comm: Communication-Efficient and Collaboration-Pragmatic Multi-Agent Perception**](https://openreview.net/pdf?id=Dbaxm9ujq6)        
  8 | >  Dingkang Yang\*, Kun Yang\*, Yuzheng Wang, Jing Liu, Zhi Xu, Rongbin Yin, Peng Zhai, Lihua Zhang <br>
  9 | 
 10 | 
 11 | 
 12 | ## Abstract
 13 | 
 14 | Multi-agent collaborative perception has recently received widespread attention as an emerging application in driving scenarios. Despite the advancements in previous efforts, challenges remain due to various dilemmas in the perception procedure, including communication redundancy, transmission delay, and collaboration heterogeneity. To tackle these issues, we propose *How2comm*, a collaborative perception framework that seeks a trade-off between perception performance and communication bandwidth. Our novelties lie in three aspects. First, we devise a mutual information-aware communication mechanism to maximally sustain the informative features shared by collaborators. The spatial-channel filtering is adopted to perform effective feature sparsification for efficient communication. Second, we present a flow-guided delay compensation strategy to predict future characteristics from collaborators and eliminate feature misalignment due to temporal asynchrony. Ultimately, a pragmatic collaboration transformer is introduced to integrate holistic spatial semantics and temporal context clues among agents.
 15 | Our framework is thoroughly evaluated on several LiDAR-based collaborative detection datasets in real-world and simulated scenarios. Comprehensive experiments demonstrate the superiority of How2comm and the effectiveness of all its vital components.
 16 | 
 17 | 
 18 | ## Installation
 19 | Please refer to [OpenCOOD](https://opencood.readthedocs.io/en/latest/md_files/installation.html) and [centerformer](https://github.com/TuSimple/centerformer/blob/master/docs/INSTALL.md) for more installation details.
 20 | 
 21 | Here we install the environment based on the OpenCOOD and centerformer repos.
 22 | 
 23 | ```bash
 24 | # Clone the OpenCOOD repo
 25 | git clone https://github.com/DerrickXuNu/OpenCOOD.git
 26 | cd OpenCOOD
 27 | 
 28 | # Create a conda environment
 29 | conda env create -f environment.yml
 30 | conda activate opencood
 31 | 
 32 | # install pytorch
 33 | conda install -y pytorch torchvision cudatoolkit=11.3 -c pytorch
 34 | 
 35 | # install spconv 
 36 | pip install spconv-cu113
 37 | 
 38 | # install basic library of deformable attention
 39 | git clone https://github.com/TuSimple/centerformer.git
 40 | cd centerformer
 41 | 
 42 | # install requirements
 43 | pip install -r requirements.txt
 44 | sh setup.sh
 45 | 
 46 | # clone our repo
 47 | https://github.com/ydk122024/How2comm.git
 48 | 
 49 | # install v2xvit into the conda environment
 50 | python setup.py develop
 51 | python v2xvit/utils/setup.py build_ext --inplace
 52 | ```
 53 | 
 54 | ## Data
 55 | Please download the [V2XSet](https://drive.google.com/drive/folders/1r5sPiBEvo8Xby-nMaWUTnJIPK6WhY1B6) and [OPV2V](https://drive.google.com/drive/folders/1dkDeHlwOVbmgXcDazZvO6TFEZ6V_7WUu) datasets. The dataset folder should be structured as follows:
 56 | ```sh
 57 | v2xset # the downloaded v2xset data
 58 |   ── train
 59 |   ── validate
 60 |   ── test
 61 | opv2v # the downloaded opv2v data
 62 |   ── train
 63 |   ── validate
 64 |   ── test
 65 | ```
 66 | 
 67 | ## Getting Started
 68 | ### Test with pretrained model
 69 | We provide our pretrained models on V2XSet and OPV2V datasets. The download URLs are as follows:
 70 | 
 71 | * Baidu Disk URL is [here](https://pan.baidu.com/share/init?surl=oTepWy7q0U_x1jXNThbyMw&pwd=vaz2).
 72 | 
 73 | 
 74 | * Google Drive URL is [here](https://drive.google.com/drive/folders/1xuUAJ82BgCP4EERW6S98NjWTzF8Hqrib).
 75 | 
 76 | 
 77 | To test the provided pretrained models of How2comm, please download the model file and put it under v2xvit/logs/how2comm. The `validate_path` in the corresponding `config.yaml` file should be changed as `v2xset/test` or `opv2v/test`. 
 78 | 
 79 | Run the following command to conduct test:
 80 | ```sh
 81 | python v2xvit/tools/inference.py --model_dir ${CONFIG_DIR} --eval_epoch ${EVAL_EPOCH}
 82 | ```
 83 | The explanation of the optional arguments are as follows:
 84 | - `model_dir`: the path to your saved model.
 85 | - `eval_epoch`: the evaluated epoch number.
 86 | 
 87 | You can use the following commands to test the provided pretrained models:
 88 | ```sh
 89 | V2XSet dataset: python v2xvit/tools/inference.py --model_dir ${CONFIG_DIR} --eval_epoch 32
 90 | OPV2V dataset: python v2xvit/tools/inference.py --model_dir ${CONFIG_DIR} --eval_epoch 36
 91 | ```
 92 | 
 93 | ### Train your model
 94 | We follow OpenCOOD to use yaml files to configure the training parameters. You can use the following command to train your own model from scratch or a continued checkpoint:
 95 | ```sh
 96 | CUDA_LAUNCH_BLOCKING=1 CUDA_VISIBLE_DEVICES=1 python v2xvit/tools/train.py --hypes_yaml ${YAML_DIR} --model_dir {}
 97 | ```
 98 | The explanation of the optional arguments are as follows:
 99 | - `hypes_yaml`: the path of the training configuration file, e.g. `v2xvit/hypes_yaml/how2comm/v2xset_how2comm_stcformer.yaml`. You can change the configuration parameters in this provided yaml file.
100 | - `model_dir` (optional) : the path of the checkpoints. This is used to fine-tune the trained models. When the `model_dir` is
101 | given, the trainer will discard the `hypes_yaml` and load the `config.yaml` in the checkpoint folder.
102 | 
103 | ## Citation
104 |  If you are using our How2comm for your research, please cite the following paper:
105 |  ```bibtex
106 | @inproceedings{yang2023how2comm,
107 |   title={How2comm: Communication-efficient and collaboration-pragmatic multi-agent perception},
108 |   author={Yang, Dingkang and Yang, Kun and Wang, Yuzheng and Liu, Jing and Xu, Zhi and Yin, Rongbin and Zhai, Peng and Zhang, Lihua},
109 |   booktitle={Thirty-seventh Conference on Neural Information Processing Systems (NeurIPS)},
110 |   year={2023}
111 | }
112 | ```
113 | 
114 | ## Acknowledgement
115 | Many thanks to Runsheng Xu for the high-quality dataset and codebase, including [V2XSet](https://drive.google.com/drive/folders/1r5sPiBEvo8Xby-nMaWUTnJIPK6WhY1B6), [OPV2V](https://drive.google.com/drive/folders/1dkDeHlwOVbmgXcDazZvO6TFEZ6V_7WUu), [OpenCOOD](https://github.com/DerrickXuNu/OpenCOOD) and [OpenCDA](https://github.com/ucla-mobility/OpenCDA). The same goes for [Where2comm](https://github.com/MediaBrain-SJTU/Where2comm.git) and [centerformer](https://github.com/TuSimple/centerformer.git) for the excellent codebase.
116 | 


--------------------------------------------------------------------------------
/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/image.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | matplotlib
 2 | numpy
 3 | open3d
 4 | opencv-python
 5 | cython
 6 | tensorboardX
 7 | shapely
 8 | einops
 9 | 
10 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | from os.path import dirname, realpath
 5 | from setuptools import setup, find_packages, Distribution
 6 | from v2xvit.version import __version__
 7 | 
 8 | 
 9 | def _read_requirements_file():
10 |     """Return the elements in requirements.txt."""
11 |     req_file_path = '%s/requirements.txt' % dirname(realpath(__file__))
12 |     with open(req_file_path) as f:
13 |         return [line.strip() for line in f]
14 | 
15 | 
16 | setup(
17 |     name='V2XViT',
18 |     version=__version__,
19 |     packages=find_packages(),
20 |     url='https://github.com/ucla-mobility/OpenCDA.git',
21 |     license='MIT',
22 |     author='Runsheng Xu, Hao Xiang, Zhengzhong Tu',
23 |     author_email='rxx3386@ucla.edu',
24 |     description='An opensource pytorch framework for autonomous driving '
25 |                 'cooperative detection',
26 |     long_description=open("README.md").read(),
27 |     install_requires=_read_requirements_file(),
28 | )
29 | 


--------------------------------------------------------------------------------
/v2xvit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/__init__.py


--------------------------------------------------------------------------------
/v2xvit/data_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/data_utils/__init__.py


--------------------------------------------------------------------------------
/v2xvit/data_utils/augmentor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/data_utils/augmentor/__init__.py


--------------------------------------------------------------------------------
/v2xvit/data_utils/augmentor/augment_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from v2xvit.utils import common_utils
 4 | 
 5 | 
 6 | def random_flip_along_x(gt_boxes, points):
 7 |     """
 8 |     Args:
 9 |         gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
10 |         points: (M, 3 + C)
11 |     Returns:
12 |     """
13 |     enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5])
14 |     if enable:
15 |         gt_boxes[:, 1] = -gt_boxes[:, 1]
16 |         gt_boxes[:, 6] = -gt_boxes[:, 6]
17 |         points[:, 1] = -points[:, 1]
18 | 
19 |         if gt_boxes.shape[1] > 7:
20 |             gt_boxes[:, 8] = -gt_boxes[:, 8]
21 | 
22 |     return gt_boxes, points
23 | 
24 | 
25 | def random_flip_along_y(gt_boxes, points):
26 |     """
27 |     Args:
28 |         gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
29 |         points: (M, 3 + C)
30 |     Returns:
31 |     """
32 |     enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5])
33 |     if enable:
34 |         gt_boxes[:, 0] = -gt_boxes[:, 0]
35 |         gt_boxes[:, 6] = -(gt_boxes[:, 6] + np.pi)
36 |         points[:, 0] = -points[:, 0]
37 | 
38 |         if gt_boxes.shape[1] > 7:
39 |             gt_boxes[:, 7] = -gt_boxes[:, 7]
40 | 
41 |     return gt_boxes, points
42 | 
43 | 
44 | def global_rotation(gt_boxes, points, rot_range):
45 |     """
46 |     Args:
47 |         gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]]
48 |         points: (M, 3 + C),
49 |         rot_range: [min, max]
50 |     Returns:
51 |     """
52 |     noise_rotation = np.random.uniform(rot_range[0],
53 |                                        rot_range[1])
54 |     points = common_utils.rotate_points_along_z(points[np.newaxis, :, :],
55 |                                                 np.array([noise_rotation]))[0]
56 | 
57 |     gt_boxes[:, 0:3] = \
58 |         common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3],
59 |                                            np.array([noise_rotation]))[0]
60 |     gt_boxes[:, 6] += noise_rotation
61 | 
62 |     if gt_boxes.shape[1] > 7:
63 |         gt_boxes[:, 7:9] = common_utils.rotate_points_along_z(
64 |             np.hstack((gt_boxes[:, 7:9], np.zeros((gt_boxes.shape[0], 1))))[
65 |             np.newaxis, :, :],
66 |             np.array([noise_rotation]))[0][:, 0:2]
67 | 
68 |     return gt_boxes, points
69 | 
70 | 
71 | def global_scaling(gt_boxes, points, scale_range):
72 |     """
73 |     Args:
74 |         gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading]
75 |         points: (M, 3 + C),
76 |         scale_range: [min, max]
77 |     Returns:
78 |     """
79 |     if scale_range[1] - scale_range[0] < 1e-3:
80 |         return gt_boxes, points
81 |     noise_scale = np.random.uniform(scale_range[0], scale_range[1])
82 |     points[:, :3] *= noise_scale
83 |     gt_boxes[:, :6] *= noise_scale
84 | 
85 |     return gt_boxes, points
86 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/augmentor/data_augmentor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Class for data augmentation
  3 | """
  4 | from functools import partial
  5 | 
  6 | from v2xvit.data_utils.augmentor import augment_utils
  7 | 
  8 | 
  9 | class DataAugmentor(object):
 10 |     """
 11 |     Data Augmentor.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     augment_config : list
 16 |         A list of augmentation configuration.
 17 | 
 18 |     Attributes
 19 |     ----------
 20 |     data_augmentor_queue : list
 21 |         The list of data augmented functions.
 22 |     """
 23 | 
 24 |     def __init__(self, augment_config, train=True):
 25 |         self.data_augmentor_queue = []
 26 |         self.train = train
 27 | 
 28 |         for cur_cfg in augment_config:
 29 |             cur_augmentor = getattr(self, cur_cfg['NAME'])(config=cur_cfg)
 30 |             self.data_augmentor_queue.append(cur_augmentor)
 31 | 
 32 |     def random_world_flip(self, data_dict=None, config=None):
 33 |         if data_dict is None:
 34 |             return partial(self.random_world_flip, config=config)
 35 | 
 36 |         gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \
 37 |                                     data_dict['object_bbx_mask'], \
 38 |                                     data_dict['lidar_np']
 39 |         gt_boxes_valid = gt_boxes[gt_mask == 1]
 40 | 
 41 |         for cur_axis in config['ALONG_AXIS_LIST']:
 42 |             assert cur_axis in ['x', 'y']
 43 |             gt_boxes_valid, points = getattr(augment_utils,
 44 |                                              'random_flip_along_%s' % cur_axis)(
 45 |                 gt_boxes_valid, points,
 46 |             )
 47 | 
 48 |         gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid
 49 | 
 50 |         data_dict['object_bbx_center'] = gt_boxes
 51 |         data_dict['object_bbx_mask'] = gt_mask
 52 |         data_dict['lidar_np'] = points
 53 | 
 54 |         return data_dict
 55 | 
 56 |     def random_world_rotation(self, data_dict=None, config=None):
 57 |         if data_dict is None:
 58 |             return partial(self.random_world_rotation, config=config)
 59 | 
 60 |         rot_range = config['WORLD_ROT_ANGLE']
 61 |         if not isinstance(rot_range, list):
 62 |             rot_range = [-rot_range, rot_range]
 63 | 
 64 |         gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \
 65 |                                     data_dict['object_bbx_mask'], \
 66 |                                     data_dict['lidar_np']
 67 |         gt_boxes_valid = gt_boxes[gt_mask == 1]
 68 |         gt_boxes_valid, points = augment_utils.global_rotation(
 69 |             gt_boxes_valid, points, rot_range=rot_range
 70 |         )
 71 |         gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid
 72 | 
 73 |         data_dict['object_bbx_center'] = gt_boxes
 74 |         data_dict['object_bbx_mask'] = gt_mask
 75 |         data_dict['lidar_np'] = points
 76 | 
 77 |         return data_dict
 78 | 
 79 |     def random_world_scaling(self, data_dict=None, config=None):
 80 |         if data_dict is None:
 81 |             return partial(self.random_world_scaling, config=config)
 82 | 
 83 |         gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \
 84 |                                     data_dict['object_bbx_mask'], \
 85 |                                     data_dict['lidar_np']
 86 |         gt_boxes_valid = gt_boxes[gt_mask == 1]
 87 | 
 88 |         gt_boxes_valid, points = augment_utils.global_scaling(
 89 |             gt_boxes_valid, points, config['WORLD_SCALE_RANGE']
 90 |         )
 91 |         gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid
 92 | 
 93 |         data_dict['object_bbx_center'] = gt_boxes
 94 |         data_dict['object_bbx_mask'] = gt_mask
 95 |         data_dict['lidar_np'] = points
 96 | 
 97 |         return data_dict
 98 | 
 99 |     def forward(self, data_dict):
100 |         """
101 |         Args:
102 |             data_dict:
103 |                 points: (N, 3 + C_in)
104 |                 gt_boxes: optional, (N, 7) [x, y, z, dx, dy, dz, heading]
105 |                 gt_names: optional, (N), string
106 |                 ...
107 | 
108 |         Returns:
109 |         """
110 |         if self.train:
111 |             for cur_augmentor in self.data_augmentor_queue:
112 |                 data_dict = cur_augmentor(data_dict=data_dict)
113 | 
114 |         return data_dict
115 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from v2xvit.data_utils.datasets.late_fusion_dataset import LateFusionDataset
 2 | from v2xvit.data_utils.datasets.early_fusion_dataset import EarlyFusionDataset
 3 | from v2xvit.data_utils.datasets.intermediate_fusion_dataset import IntermediateFusionDataset
 4 | 
 5 | __all__ = {
 6 |     'LateFusionDataset': LateFusionDataset,
 7 |     'EarlyFusionDataset': EarlyFusionDataset,
 8 |     'IntermediateFusionDataset': IntermediateFusionDataset
 9 | }
10 | 
11 | # the final range for evaluation
12 | GT_RANGE = [-140, -40, -3, 140, 40, 1]
13 | # The communication range for cavs
14 | COM_RANGE = 70
15 | 
16 | 
17 | def build_dataset(dataset_cfg, visualize=False, train=True):
18 |     dataset_name = dataset_cfg['fusion']['core_method']
19 |     error_message = f"{dataset_name} is not found. " \
20 |                     f"Please add your processor file's name in opencood/" \
21 |                     f"data_utils/datasets/init.py"
22 |     assert dataset_name in ['LateFusionDataset', 'EarlyFusionDataset',
23 |                             'IntermediateFusionDataset'], error_message
24 | 
25 |     dataset = __all__[dataset_name](
26 |         params=dataset_cfg,
27 |         visualize=visualize,
28 |         train=train
29 |     )
30 | 
31 |     return dataset
32 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/datasets/early_fusion_vis_dataset.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This is a dataset for early fusion visualization only.
  3 | """
  4 | from collections import OrderedDict
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from v2xvit.utils import box_utils
 10 | from v2xvit.data_utils.post_processor import build_postprocessor
 11 | from v2xvit.data_utils.datasets import basedataset
 12 | from v2xvit.data_utils.pre_processor import build_preprocessor
 13 | from v2xvit.utils.pcd_utils import \
 14 |     mask_points_by_range, mask_ego_points, shuffle_points, \
 15 |     downsample_lidar_minimum
 16 | 
 17 | 
 18 | class EarlyFusionVisDataset(basedataset.BaseDataset):
 19 |     def __init__(self, params, visualize, train=True):
 20 |         super(EarlyFusionVisDataset, self).__init__(params, visualize, train)
 21 |         self.pre_processor = build_preprocessor(params['preprocess'],
 22 |                                                 train)
 23 |         self.post_processor = build_postprocessor(params['postprocess'], train)
 24 | 
 25 |     def __getitem__(self, idx):
 26 |         base_data_dict = self.retrieve_base_data(idx)
 27 | 
 28 |         processed_data_dict = OrderedDict()
 29 |         processed_data_dict['ego'] = {}
 30 | 
 31 |         ego_id = -1
 32 |         ego_lidar_pose = []
 33 | 
 34 |         # first find the ego vehicle's lidar pose
 35 |         for cav_id, cav_content in base_data_dict.items():
 36 |             if cav_content['ego']:
 37 |                 ego_id = cav_id
 38 |                 ego_lidar_pose = cav_content['params']['lidar_pose']
 39 |                 break
 40 | 
 41 |         assert ego_id != -1
 42 |         assert len(ego_lidar_pose) > 0
 43 | 
 44 |         projected_lidar_stack = []
 45 |         object_stack = []
 46 |         object_id_stack = []
 47 | 
 48 |         # loop over all CAVs to process information
 49 |         for cav_id, selected_cav_base in base_data_dict.items():
 50 |             selected_cav_processed = self.get_item_single_car(
 51 |                 selected_cav_base,
 52 |                 ego_lidar_pose)
 53 |             # all these lidar and object coordinates are projected to ego
 54 |             # already.
 55 |             projected_lidar_stack.append(
 56 |                 selected_cav_processed['projected_lidar'])
 57 |             object_stack.append(selected_cav_processed['object_bbx_center'])
 58 |             object_id_stack += selected_cav_processed['object_ids']
 59 | 
 60 |         # exclude all repetitive objects
 61 |         unique_indices = \
 62 |             [object_id_stack.index(x) for x in set(object_id_stack)]
 63 |         object_stack = np.vstack(object_stack)
 64 |         object_stack = object_stack[unique_indices]
 65 | 
 66 |         # make sure bounding boxes across all frames have the same number
 67 |         object_bbx_center = \
 68 |             np.zeros((self.params['postprocess']['max_num'], 7))
 69 |         mask = np.zeros(self.params['postprocess']['max_num'])
 70 |         object_bbx_center[:object_stack.shape[0], :] = object_stack
 71 |         mask[:object_stack.shape[0]] = 1
 72 | 
 73 |         # convert list to numpy array, (N, 4)
 74 |         projected_lidar_stack = np.vstack(projected_lidar_stack)
 75 | 
 76 |         # data augmentation
 77 |         projected_lidar_stack, object_bbx_center, mask = \
 78 |             self.augment(projected_lidar_stack, object_bbx_center, mask)
 79 | 
 80 |         # we do lidar filtering in the stacked lidar
 81 |         projected_lidar_stack = mask_points_by_range(projected_lidar_stack,
 82 |                                                      self.params['preprocess'][
 83 |                                                          'cav_lidar_range'])
 84 |         # augmentation may remove some of the bbx out of range
 85 |         object_bbx_center_valid = object_bbx_center[mask == 1]
 86 |         object_bbx_center_valid = \
 87 |             box_utils.mask_boxes_outside_range_numpy(object_bbx_center_valid,
 88 |                                                      self.params['preprocess'][
 89 |                                                          'cav_lidar_range'],
 90 |                                                      self.params['postprocess'][
 91 |                                                          'order']
 92 |                                                      )
 93 |         mask[object_bbx_center_valid.shape[0]:] = 0
 94 |         object_bbx_center[:object_bbx_center_valid.shape[0]] = \
 95 |             object_bbx_center_valid
 96 |         object_bbx_center[object_bbx_center_valid.shape[0]:] = 0
 97 | 
 98 |         processed_data_dict['ego'].update(
 99 |             {'object_bbx_center': object_bbx_center,
100 |              'object_bbx_mask': mask,
101 |              'object_ids': [object_id_stack[i] for i in unique_indices],
102 |              'origin_lidar': projected_lidar_stack
103 |              })
104 | 
105 |         return processed_data_dict
106 | 
107 |     def get_item_single_car(self, selected_cav_base, ego_pose):
108 |         """
109 |         Project the lidar and bbx to ego space first, and then do clipping.
110 | 
111 |         Parameters
112 |         ----------
113 |         selected_cav_base : dict
114 |             The dictionary contains a single CAV's raw information.
115 |         ego_pose : list
116 |             The ego vehicle lidar pose under world coordinate.
117 | 
118 |         Returns
119 |         -------
120 |         selected_cav_processed : dict
121 |             The dictionary contains the cav's processed information.
122 |         """
123 |         selected_cav_processed = {}
124 | 
125 |         # calculate the transformation matrix
126 |         transformation_matrix = \
127 |             selected_cav_base['params']['transformation_matrix']
128 | 
129 |         # retrieve objects under ego coordinates
130 |         object_bbx_center, object_bbx_mask, object_ids = \
131 |             self.post_processor.generate_object_center([selected_cav_base],
132 |                                                        ego_pose)
133 | 
134 |         # filter lidar
135 |         lidar_np = selected_cav_base['lidar_np']
136 |         lidar_np = shuffle_points(lidar_np)
137 |         # remove points that hit itself
138 |         lidar_np = mask_ego_points(lidar_np)
139 |         # project the lidar to ego space
140 |         lidar_np[:, :3] = \
141 |             box_utils.project_points_by_matrix_torch(lidar_np[:, :3],
142 |                                                      transformation_matrix)
143 | 
144 |         selected_cav_processed.update(
145 |             {'object_bbx_center': object_bbx_center[object_bbx_mask == 1],
146 |              'object_ids': object_ids,
147 |              'projected_lidar': lidar_np})
148 | 
149 |         return selected_cav_processed
150 | 
151 |     def collate_batch_train(self, batch):
152 |         """
153 |         Customized collate function for pytorch dataloader during training
154 |         for late fusion dataset.
155 | 
156 |         Parameters
157 |         ----------
158 |         batch : dict
159 | 
160 |         Returns
161 |         -------
162 |         batch : dict
163 |             Reformatted batch.
164 |         """
165 |         # during training, we only care about ego.
166 |         output_dict = {'ego': {}}
167 | 
168 |         object_bbx_center = []
169 |         object_bbx_mask = []
170 |         origin_lidar = []
171 | 
172 |         for i in range(len(batch)):
173 |             ego_dict = batch[i]['ego']
174 |             object_bbx_center.append(ego_dict['object_bbx_center'])
175 |             object_bbx_mask.append(ego_dict['object_bbx_mask'])
176 |             origin_lidar.append(ego_dict['origin_lidar'])
177 | 
178 |         # convert to numpy, (B, max_num, 7)
179 |         object_bbx_center = torch.from_numpy(np.array(object_bbx_center))
180 |         object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask))
181 |         output_dict['ego'].update({'object_bbx_center': object_bbx_center,
182 |                                    'object_bbx_mask': object_bbx_mask})
183 | 
184 |         origin_lidar = \
185 |             np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar))
186 |         origin_lidar = torch.from_numpy(origin_lidar)
187 |         output_dict['ego'].update({'origin_lidar': origin_lidar})
188 | 
189 |         return output_dict
190 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/post_processor/__init__.py:
--------------------------------------------------------------------------------
 1 | from v2xvit.data_utils.post_processor.voxel_postprocessor import VoxelPostprocessor
 2 | from v2xvit.data_utils.post_processor.bev_postprocessor import BevPostprocessor
 3 | 
 4 | __all__ = {
 5 |     'VoxelPostprocessor': VoxelPostprocessor,
 6 |     'BevPostprocessor': BevPostprocessor,
 7 | }
 8 | 
 9 | 
10 | def build_postprocessor(anchor_cfg, train):
11 |     process_method_name = anchor_cfg['core_method']
12 |     assert process_method_name in ['VoxelPostprocessor', 'BevPostprocessor']
13 |     anchor_generator = __all__[process_method_name](
14 |         anchor_params=anchor_cfg,
15 |         train=train
16 |     )
17 | 
18 |     return anchor_generator
19 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/post_processor/base_postprocessor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Template for AnchorGenerator
  3 | """
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | 
  8 | from v2xvit.utils import box_utils
  9 | 
 10 | 
 11 | class BasePostprocessor(object):
 12 |     """
 13 |     Template for Anchor generator.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     anchor_params : dict
 18 |         The dictionary containing all anchor-related parameters.
 19 |     train : bool
 20 |         Indicate train or test mode.
 21 | 
 22 |     Attributes
 23 |     ----------
 24 |     bbx_dict : dictionary
 25 |         Contain all objects information across the cav, key: id, value: bbx
 26 |         coordinates (1, 7)
 27 |     """
 28 | 
 29 |     def __init__(self, anchor_params, train=True):
 30 |         self.params = anchor_params
 31 |         self.bbx_dict = {}
 32 |         self.train = train
 33 | 
 34 |     def generate_anchor_box(self):
 35 |         # needs to be overloaded
 36 |         return None
 37 | 
 38 |     def generate_label(self, *argv):
 39 |         return None
 40 | 
 41 |     def generate_gt_bbx(self, data_dict):
 42 |         """
 43 |         The base postprocessor will generate 3d groundtruth bounding box.
 44 | 
 45 |         Parameters
 46 |         ----------
 47 |         data_dict : dict
 48 |             The dictionary containing the origin input data of model.
 49 | 
 50 |         Returns
 51 |         -------
 52 |         gt_box3d_tensor : torch.Tensor
 53 |             The groundtruth bounding box tensor, shape (N, 8, 3).
 54 |         """
 55 |         gt_box3d_list = []
 56 |         # used to avoid repetitive bounding box
 57 |         object_id_list = []
 58 | 
 59 |         for cav_id, cav_content in data_dict.items():
 60 |             # used to project gt bounding box to ego space.
 61 |             # the transformation matrix for gt should always be based on
 62 |             # current timestamp (object transformation matrix is for
 63 |             # late fusion only since other fusion method already did
 64 |             #  the transformation in the preprocess)
 65 |             transformation_matrix = cav_content['transformation_matrix'] \
 66 |                 if 'gt_transformation_matrix' not in cav_content \
 67 |                 else cav_content['gt_transformation_matrix']
 68 | 
 69 |             object_bbx_center = cav_content['object_bbx_center']
 70 |             object_bbx_mask = cav_content['object_bbx_mask']
 71 |             object_ids = cav_content['object_ids']
 72 |             object_bbx_center = object_bbx_center[object_bbx_mask == 1]
 73 | 
 74 |             # convert center to corner
 75 |             object_bbx_corner = \
 76 |                 box_utils.boxes_to_corners_3d(object_bbx_center,
 77 |                                               self.params['order'])
 78 |             projected_object_bbx_corner = \
 79 |                 box_utils.project_box3d(object_bbx_corner.float(),
 80 |                                         transformation_matrix)
 81 |             gt_box3d_list.append(projected_object_bbx_corner)
 82 | 
 83 |             # append the corresponding ids
 84 |             object_id_list += object_ids
 85 | 
 86 |         # gt bbx 3d
 87 |         gt_box3d_list = torch.vstack(gt_box3d_list)
 88 |         # some of the bbx may be repetitive, use the id list to filter
 89 |         gt_box3d_selected_indices = \
 90 |             [object_id_list.index(x) for x in set(object_id_list)]
 91 |         gt_box3d_tensor = gt_box3d_list[gt_box3d_selected_indices]
 92 | 
 93 |         # filter the gt_box to make sure all bbx are in the range
 94 |         mask = \
 95 |             box_utils.get_mask_for_boxes_within_range_torch(gt_box3d_tensor)
 96 |         gt_box3d_tensor = gt_box3d_tensor[mask, :, :]
 97 | 
 98 |         return gt_box3d_tensor
 99 | 
100 |     def generate_object_center(self,
101 |                                cav_contents,
102 |                                reference_lidar_pose):
103 |         """
104 |         Retrieve all objects in a format of (n, 7), where 7 represents
105 |         x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw.
106 | 
107 |         Parameters
108 |         ----------
109 |         cav_contents : list
110 |             List of dictionary, save all cavs' information.
111 | 
112 |         reference_lidar_pose : list
113 |             The final target lidar pose with length 6.
114 | 
115 |         Returns
116 |         -------
117 |         object_np : np.ndarray
118 |             Shape is (max_num, 7).
119 |         mask : np.ndarray
120 |             Shape is (max_num,).
121 |         object_ids : list
122 |             Length is number of bbx in current sample.
123 |         """
124 |         from v2xvit.data_utils.datasets import GT_RANGE
125 | 
126 |         tmp_object_dict = {}
127 |         for cav_content in cav_contents:
128 |             tmp_object_dict.update(cav_content['params']['vehicles'])
129 | 
130 |         output_dict = {}
131 |         filter_range = self.params['anchor_args']['cav_lidar_range'] \
132 |             if self.train else GT_RANGE
133 | 
134 |         box_utils.project_world_objects(tmp_object_dict,
135 |                                         output_dict,
136 |                                         reference_lidar_pose,
137 |                                         filter_range,
138 |                                         self.params['order'])
139 | 
140 |         object_np = np.zeros((self.params['max_num'], 7))
141 |         mask = np.zeros(self.params['max_num'])
142 |         object_ids = []
143 | 
144 |         for i, (object_id, object_bbx) in enumerate(output_dict.items()):
145 |             object_np[i] = object_bbx[0, :]
146 |             mask[i] = 1
147 |             object_ids.append(object_id)
148 | 
149 |         return object_np, mask, object_ids
150 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/pre_processor/__init__.py:
--------------------------------------------------------------------------------
 1 | from v2xvit.data_utils.pre_processor.base_preprocessor import BasePreprocessor
 2 | from v2xvit.data_utils.pre_processor.voxel_preprocessor import VoxelPreprocessor
 3 | from v2xvit.data_utils.pre_processor.bev_preprocessor import BevPreprocessor
 4 | from v2xvit.data_utils.pre_processor.sp_voxel_preprocessor import SpVoxelPreprocessor
 5 | 
 6 | __all__ = {
 7 |     'BasePreprocessor': BasePreprocessor,
 8 |     'VoxelPreprocessor': VoxelPreprocessor,
 9 |     'BevPreprocessor': BevPreprocessor,
10 |     'SpVoxelPreprocessor': SpVoxelPreprocessor
11 | }
12 | 
13 | 
14 | def build_preprocessor(preprocess_cfg, train):
15 |     process_method_name = preprocess_cfg['core_method']
16 |     error_message = f"{process_method_name} is not found. " \
17 |                      f"Please add your processor file's name in opencood/" \
18 |                      f"data_utils/processor/init.py"
19 |     assert process_method_name in ['BasePreprocessor', 'VoxelPreprocessor',
20 |                                    'BevPreprocessor', 'SpVoxelPreprocessor'], \
21 |         error_message
22 | 
23 |     processor = __all__[process_method_name](
24 |         preprocess_params=preprocess_cfg,
25 |         train=train
26 |     )
27 | 
28 |     return processor
29 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/pre_processor/base_preprocessor.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from v2xvit.utils import pcd_utils
 4 | 
 5 | 
 6 | class BasePreprocessor(object):
 7 |     """
 8 |     Basic Lidar pre-processor.
 9 | 
10 |     Parameters
11 |     ----------
12 |     preprocess_params : dict
13 |         The dictionary containing all parameters of the preprocessing.
14 | 
15 |     train : bool
16 |         Train or test mode.
17 |     """
18 | 
19 |     def __init__(self, preprocess_params, train):
20 |         self.params = preprocess_params
21 |         self.train = train
22 | 
23 |     def preprocess(self, pcd_np):
24 |         """
25 |         Preprocess the lidar points by simple sampling.
26 | 
27 |         Parameters
28 |         ----------
29 |         pcd_np : np.ndarray
30 |             The raw lidar.
31 | 
32 |         Returns
33 |         -------
34 |         data_dict : the output dictionary.
35 |         """
36 |         data_dict = {}
37 |         sample_num = self.params['args']['sample_num']
38 | 
39 |         pcd_np = pcd_utils.downsample_lidar(pcd_np, sample_num)
40 |         data_dict['downsample_lidar'] = pcd_np
41 | 
42 |         return data_dict
43 | 
44 |     def project_points_to_bev_map(self, points, ratio=0.1):
45 |         """
46 |         Project points to BEV occupancy map with default ratio=0.1.
47 | 
48 |         Parameters
49 |         ----------
50 |         points : np.ndarray
51 |             (N, 3) / (N, 4)
52 | 
53 |         ratio : float
54 |             Discretization parameters. Default is 0.1.
55 | 
56 |         Returns
57 |         -------
58 |         bev_map : np.ndarray
59 |             BEV occupancy map including projected points with shape
60 |             (img_row, img_col).
61 | 
62 |         """
63 |         L1, W1, H1, L2, W2, H2 = self.params["cav_lidar_range"]
64 |         img_row = int((L2 - L1) / ratio)
65 |         img_col = int((W2 - W1) / ratio)
66 |         bev_map = np.zeros((img_row, img_col))
67 |         bev_origin = np.array([L1, W1, H1]).reshape(1, -1)
68 |         # (N, 3)
69 |         indices = ((points[:, :3] - bev_origin) / ratio).astype(int)
70 |         mask = np.logical_and(indices[:, 0] > 0, indices[:, 0] < img_row)
71 |         mask = np.logical_and(mask, np.logical_and(indices[:, 1] > 0,
72 |                                                    indices[:, 1] < img_col))
73 |         indices = indices[mask, :]
74 |         bev_map[indices[:, 0], indices[:, 1]] = 1
75 |         return bev_map
76 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/pre_processor/bev_preprocessor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Convert lidar to bev
  3 | """
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | from v2xvit.data_utils.pre_processor.base_preprocessor import \
  8 |     BasePreprocessor
  9 | 
 10 | class BevPreprocessor(BasePreprocessor):
 11 |     def __init__(self, preprocess_params, train):
 12 |         super(BevPreprocessor, self).__init__(preprocess_params, train)
 13 |         self.lidar_range = self.params['cav_lidar_range']
 14 |         self.geometry_param = preprocess_params["geometry_param"]
 15 | 
 16 |     def preprocess(self, pcd_raw):
 17 |         """
 18 |         Preprocess the lidar points to BEV representations.
 19 | 
 20 |         Parameters
 21 |         ----------
 22 |         pcd_raw : np.ndarray
 23 |             The raw lidar.
 24 | 
 25 |         Returns
 26 |         -------
 27 |         data_dict : the structured output dictionary.
 28 |         """
 29 |         bev = np.zeros(self.geometry_param['input_shape'], dtype=np.float32)
 30 |         intensity_map_count = np.zeros((bev.shape[0], bev.shape[1]), dtype=np.int)
 31 |         bev_origin = np.array(
 32 |             [self.geometry_param["L1"], self.geometry_param["W1"],
 33 |              self.geometry_param["H1"]]).reshape(1, -1)
 34 | 
 35 |         indices = ((pcd_raw[:, :3] - bev_origin) / self.geometry_param[
 36 |             "res"]).astype(int)
 37 |         ## bev[indices[:, 0], indices[:, 1], indices[:, 2]] = 1
 38 |         # np.add.at(bev, (indices[:, 0], indices[:, 1], indices[:, 2]), 1)
 39 |         # bev[indices[:, 0], indices[:, 1], -1] += pcd_raw[:, 3]
 40 |         # intensity_map_count[indices[:, 0], indices[:, 1]] += 1
 41 | 
 42 |         for i in range(indices.shape[0]):
 43 |             bev[indices[i, 0], indices[i, 1], indices[i, 2]] = 1
 44 |             bev[indices[i, 0], indices[i, 1], -1] += pcd_raw[i, 3]
 45 |             intensity_map_count[indices[i, 0], indices[i, 1]] += 1
 46 |         divide_mask = intensity_map_count!=0
 47 |         bev[divide_mask, -1] = np.divide(bev[divide_mask, -1], intensity_map_count[divide_mask])
 48 | 
 49 |         data_dict = {
 50 |             "bev_input": np.transpose(bev, (2, 0, 1))
 51 |         }
 52 |         return data_dict
 53 | 
 54 |     @staticmethod
 55 |     def collate_batch_list(batch):
 56 |         """
 57 |         Customized pytorch data loader collate function.
 58 | 
 59 |         Parameters
 60 |         ----------
 61 |         batch : list
 62 |             List of dictionary. Each dictionary represent a single frame.
 63 | 
 64 |         Returns
 65 |         -------
 66 |         processed_batch : dict
 67 |             Updated lidar batch.
 68 |         """
 69 |         bev_input_list = [
 70 |             x["bev_input"][np.newaxis, ...] for x in batch
 71 |         ]
 72 |         processed_batch = {
 73 |             "bev_input": torch.from_numpy(
 74 |                 np.concatenate(bev_input_list, axis=0))
 75 |         }
 76 |         return processed_batch
 77 |     @staticmethod
 78 |     def collate_batch_dict(batch):
 79 |         """
 80 |         Customized pytorch data loader collate function.
 81 | 
 82 |         Parameters
 83 |         ----------
 84 |         batch : dict
 85 |             Dict of list. Each element represents a CAV.
 86 | 
 87 |         Returns
 88 |         -------
 89 |         processed_batch : dict
 90 |             Updated lidar batch.
 91 |         """
 92 |         bev_input_list = [
 93 |             x[np.newaxis, ...] for x in batch["bev_input"]
 94 |         ]
 95 |         processed_batch = {
 96 |             "bev_input": torch.from_numpy(
 97 |                 np.concatenate(bev_input_list, axis=0))
 98 |         }
 99 |         return processed_batch
100 | 
101 |     def collate_batch(self, batch):
102 |         """
103 |         Customized pytorch data loader collate function.
104 | 
105 |         Parameters
106 |         ----------
107 |         batch : list / dict
108 |             Batched data.
109 |         Returns
110 |         -------
111 |         processed_batch : dict
112 |             Updated lidar batch.
113 |         """
114 |         if isinstance(batch, list):
115 |             return self.collate_batch_list(batch)
116 |         elif isinstance(batch, dict):
117 |             return self.collate_batch_dict(batch)
118 |         else:
119 |             raise NotImplemented
120 | 
121 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/pre_processor/sp_voxel_preprocessor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Transform points to voxels using sparse conv library
  3 | """
  4 | import sys
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | from cumm import tensorview as tv
  9 | from spconv.utils import Point2VoxelCPU3d
 10 | 
 11 | from v2xvit.data_utils.pre_processor.base_preprocessor import \
 12 |     BasePreprocessor
 13 | 
 14 | 
 15 | class SpVoxelPreprocessor(BasePreprocessor):
 16 |     def __init__(self, preprocess_params, train):
 17 |         super(SpVoxelPreprocessor, self).__init__(preprocess_params,
 18 |                                                   train)
 19 | 
 20 |         self.lidar_range = self.params['cav_lidar_range']
 21 |         self.voxel_size = self.params['args']['voxel_size']
 22 |         self.max_points_per_voxel = self.params['args']['max_points_per_voxel']
 23 | 
 24 |         if train:
 25 |             self.max_voxels = self.params['args']['max_voxel_train']
 26 |         else:
 27 |             self.max_voxels = self.params['args']['max_voxel_test']
 28 | 
 29 |         grid_size = (np.array(self.lidar_range[3:6]) -
 30 |                      np.array(self.lidar_range[0:3])) / np.array(self.voxel_size)
 31 |         self.grid_size = np.round(grid_size).astype(np.int64)
 32 | 
 33 |         # use sparse conv library to generate voxel
 34 |         self.voxel_generator = Point2VoxelCPU3d(
 35 |             vsize_xyz=self.voxel_size,
 36 |             coors_range_xyz=self.lidar_range,
 37 |             max_num_points_per_voxel=self.max_points_per_voxel,
 38 |             num_point_features=4,
 39 |             max_num_voxels=self.max_voxels
 40 |         )
 41 | 
 42 |     def preprocess(self, pcd_np):
 43 |         data_dict = {}
 44 |         pcd_tv = tv.from_numpy(pcd_np)
 45 |         voxel_output = self.voxel_generator.point_to_voxel(pcd_tv)
 46 |         if isinstance(voxel_output, dict):
 47 |             voxels, coordinates, num_points = \
 48 |                 voxel_output['voxels'], voxel_output['coordinates'], \
 49 |                 voxel_output['num_points_per_voxel']
 50 |         else:
 51 |             voxels, coordinates, num_points = voxel_output
 52 | 
 53 |         data_dict['voxel_features'] = voxels.numpy()
 54 |         data_dict['voxel_coords'] = coordinates.numpy()
 55 |         data_dict['voxel_num_points'] = num_points.numpy()
 56 | 
 57 |         return data_dict
 58 | 
 59 |     def collate_batch(self, batch):
 60 |         """
 61 |         Customized pytorch data loader collate function.
 62 | 
 63 |         Parameters
 64 |         ----------
 65 |         batch : list or dict
 66 |             List or dictionary.
 67 | 
 68 |         Returns
 69 |         -------
 70 |         processed_batch : dict
 71 |             Updated lidar batch.
 72 |         """
 73 | 
 74 |         if isinstance(batch, list):
 75 |             return self.collate_batch_list(batch)
 76 |         elif isinstance(batch, dict):
 77 |             return self.collate_batch_dict(batch)
 78 |         else:
 79 |             sys.exit('Batch has too be a list or a dictionarn')
 80 | 
 81 |     @staticmethod
 82 |     def collate_batch_list(batch):
 83 |         """
 84 |         Customized pytorch data loader collate function.
 85 | 
 86 |         Parameters
 87 |         ----------
 88 |         batch : list
 89 |             List of dictionary. Each dictionary represent a single frame.
 90 | 
 91 |         Returns
 92 |         -------
 93 |         processed_batch : dict
 94 |             Updated lidar batch.
 95 |         """
 96 |         voxel_features = []
 97 |         voxel_num_points = []
 98 |         voxel_coords = []
 99 | 
100 |         for i in range(len(batch)):
101 |             voxel_features.append(batch[i]['voxel_features'])
102 |             voxel_num_points.append(batch[i]['voxel_num_points'])
103 |             coords = batch[i]['voxel_coords']
104 |             voxel_coords.append(
105 |                 np.pad(coords, ((0, 0), (1, 0)),
106 |                        mode='constant', constant_values=i))
107 | 
108 |         voxel_num_points = torch.from_numpy(np.concatenate(voxel_num_points))
109 |         voxel_features = torch.from_numpy(np.concatenate(voxel_features))
110 |         voxel_coords = torch.from_numpy(np.concatenate(voxel_coords))
111 | 
112 |         return {'voxel_features': voxel_features,
113 |                 'voxel_coords': voxel_coords,
114 |                 'voxel_num_points': voxel_num_points}
115 | 
116 |     @staticmethod
117 |     def collate_batch_dict(batch: dict):
118 |         """
119 |         Collate batch if the batch is a dictionary,
120 |         eg: {'voxel_features': [feature1, feature2...., feature n]}
121 | 
122 |         Parameters
123 |         ----------
124 |         batch : dict
125 | 
126 |         Returns
127 |         -------
128 |         processed_batch : dict
129 |             Updated lidar batch.
130 |         """
131 |         voxel_features = \
132 |             torch.from_numpy(np.concatenate(batch['voxel_features']))
133 |         voxel_num_points = \
134 |             torch.from_numpy(np.concatenate(batch['voxel_num_points']))
135 |         coords = batch['voxel_coords']
136 |         voxel_coords = []
137 | 
138 |         for i in range(len(coords)):
139 |             voxel_coords.append(
140 |                 np.pad(coords[i], ((0, 0), (1, 0)),
141 |                        mode='constant', constant_values=i))
142 |         voxel_coords = torch.from_numpy(np.concatenate(voxel_coords))
143 | 
144 |         return {'voxel_features': voxel_features,
145 |                 'voxel_coords': voxel_coords,
146 |                 'voxel_num_points': voxel_num_points}
147 | 


--------------------------------------------------------------------------------
/v2xvit/data_utils/pre_processor/voxel_preprocessor.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Convert lidar to voxel
  3 | """
  4 | import sys
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from v2xvit.data_utils.pre_processor.base_preprocessor import \
 10 |     BasePreprocessor
 11 | 
 12 | 
 13 | class VoxelPreprocessor(BasePreprocessor):
 14 |     def __init__(self, preprocess_params, train):
 15 |         super(VoxelPreprocessor, self).__init__(preprocess_params, train)
 16 |         self.lidar_range = self.params['cav_lidar_range']
 17 | 
 18 |         self.vw = self.params['args']['vw']
 19 |         self.vh = self.params['args']['vh']
 20 |         self.vd = self.params['args']['vd']
 21 |         self.T = self.params['args']['T']
 22 | 
 23 |     def preprocess(self, pcd_np):
 24 |         """
 25 |         Preprocess the lidar points by  voxelization.
 26 | 
 27 |         Parameters
 28 |         ----------
 29 |         pcd_np : np.ndarray
 30 |             The raw lidar.
 31 | 
 32 |         Returns
 33 |         -------
 34 |         data_dict : the structured output dictionary.
 35 |         """
 36 |         data_dict = {}
 37 | 
 38 |         # calculate the voxel coordinates
 39 |         voxel_coords = ((pcd_np[:, :3] -
 40 |                          np.floor(np.array([self.lidar_range[0],
 41 |                                             self.lidar_range[1],
 42 |                                             self.lidar_range[2]])) / (
 43 |                              self.vw, self.vh, self.vd))).astype(np.int32)
 44 | 
 45 |         # convert to  (D, H, W) as the paper
 46 |         voxel_coords = voxel_coords[:, [2, 1, 0]]
 47 |         voxel_coords, inv_ind, voxel_counts = np.unique(voxel_coords, axis=0,
 48 |                                                         return_inverse=True,
 49 |                                                         return_counts=True)
 50 | 
 51 |         voxel_features = []
 52 | 
 53 |         for i in range(len(voxel_coords)):
 54 |             voxel = np.zeros((self.T, 7), dtype=np.float32)
 55 |             pts = pcd_np[inv_ind == i]
 56 |             if voxel_counts[i] > self.T:
 57 |                 pts = pts[:self.T, :]
 58 |                 voxel_counts[i] = self.T
 59 | 
 60 |             # augment the points
 61 |             voxel[:pts.shape[0], :] = np.concatenate((pts, pts[:, :3] -
 62 |                                                       np.mean(pts[:, :3], 0)),
 63 |                                                      axis=1)
 64 |             voxel_features.append(voxel)
 65 | 
 66 |         data_dict['voxel_features'] = np.array(voxel_features)
 67 |         data_dict['voxel_coords'] = voxel_coords
 68 | 
 69 |         return data_dict
 70 | 
 71 |     def collate_batch(self, batch):
 72 |         """
 73 |         Customized pytorch data loader collate function.
 74 | 
 75 |         Parameters
 76 |         ----------
 77 |         batch : list or dict
 78 |             List or dictionary.
 79 | 
 80 |         Returns
 81 |         -------
 82 |         processed_batch : dict
 83 |             Updated lidar batch.
 84 |         """
 85 | 
 86 |         if isinstance(batch, list):
 87 |             return self.collate_batch_list(batch)
 88 |         elif isinstance(batch, dict):
 89 |             return self.collate_batch_dict(batch)
 90 |         else:
 91 |             sys.exit('Batch has too be a list or a dictionarn')
 92 | 
 93 |     @staticmethod
 94 |     def collate_batch_list(batch):
 95 |         """
 96 |         Customized pytorch data loader collate function.
 97 | 
 98 |         Parameters
 99 |         ----------
100 |         batch : list
101 |             List of dictionary. Each dictionary represent a single frame.
102 | 
103 |         Returns
104 |         -------
105 |         processed_batch : dict
106 |             Updated lidar batch.
107 |         """
108 |         voxel_features = []
109 |         voxel_coords = []
110 | 
111 |         for i in range(len(batch)):
112 |             voxel_features.append(batch[i]['voxel_features'])
113 |             coords = batch[i]['voxel_coords']
114 |             voxel_coords.append(
115 |                 np.pad(coords, ((0, 0), (1, 0)),
116 |                        mode='constant', constant_values=i))
117 | 
118 |         voxel_features = torch.from_numpy(np.concatenate(voxel_features))
119 |         voxel_coords = torch.from_numpy(np.concatenate(voxel_coords))
120 | 
121 |         return {'voxel_features': voxel_features,
122 |                 'voxel_coords': voxel_coords}
123 | 
124 |     @staticmethod
125 |     def collate_batch_dict(batch: dict):
126 |         """
127 |         Collate batch if the batch is a dictionary,
128 |         eg: {'voxel_features': [feature1, feature2...., feature n]}
129 | 
130 |         Parameters
131 |         ----------
132 |         batch : dict
133 | 
134 |         Returns
135 |         -------
136 |         processed_batch : dict
137 |             Updated lidar batch.
138 |         """
139 |         voxel_features = \
140 |             torch.from_numpy(np.concatenate(batch['voxel_features']))
141 |         coords = batch['voxel_coords']
142 |         voxel_coords = []
143 | 
144 |         for i in range(len(coords)):
145 |             voxel_coords.append(
146 |                 np.pad(coords[i], ((0, 0), (1, 0)),
147 |                        mode='constant', constant_values=i))
148 |         voxel_coords = torch.from_numpy(np.concatenate(voxel_coords))
149 | 
150 |         return {'voxel_features': voxel_features,
151 |                 'voxel_coords': voxel_coords}
152 | 


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/hypes_yaml/__init__.py


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/how2comm/v2xset_how2comm_stcformer.yaml:
--------------------------------------------------------------------------------
  1 | name: v2xset_how2comm_stcformer_opv2v
  2 | # root_dir: '/data/v2xset/train'
  3 | # validate_dir: '/data/v2xset/validate'
  4 | root_dir: '/data/opv2v/train'
  5 | validate_dir: '/data/opv2v/validate'
  6 | 
  7 | wild_setting:
  8 |   async: false
  9 |   async_overhead: 0
 10 |   seed: 20
 11 |   loc_err: false
 12 |   xyz_std: 0
 13 |   ryp_std: 0
 14 |   data_size: 1.06 
 15 |   transmission_speed: 27 
 16 |   backbone_delay: 0 
 17 | 
 18 | 
 19 | yaml_parser: "load_point_pillar_params"
 20 | train_params:
 21 |   batch_size: &batch_size 1
 22 |   epoches: 20
 23 |   eval_freq: 2
 24 |   save_freq: 1
 25 |   max_cav: &max_cav 5
 26 |   frame: &frame 1
 27 | 
 28 | 
 29 | fusion:
 30 |   core_method: 'IntermediateFusionDataset' 
 31 |   args:
 32 |     cur_ego_pose_flag: true
 33 | 
 34 | preprocess:
 35 |   core_method: 'SpVoxelPreprocessor'
 36 |   args:
 37 |     voxel_size: &voxel_size [0.4, 0.4, 4]
 38 |     max_points_per_voxel: 32
 39 |     max_voxel_train: 32000
 40 |     max_voxel_test: 70000
 41 |   cav_lidar_range: &cav_lidar  [-140.8, -40, -3, 140.8, 40, 1]
 42 | 
 43 | data_augment:
 44 |   - NAME: random_world_flip
 45 |     ALONG_AXIS_LIST: [ 'x' ]
 46 | 
 47 |   - NAME: random_world_rotation
 48 |     WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ]
 49 | 
 50 |   - NAME: random_world_scaling
 51 |     WORLD_SCALE_RANGE: [ 0.95, 1.05 ]
 52 | 
 53 | 
 54 | postprocess:
 55 |   core_method: 'VoxelPostprocessor' 
 56 |   gt_range: *cav_lidar
 57 |   anchor_args:
 58 |     cav_lidar_range: *cav_lidar
 59 |     l: 3.9
 60 |     w: 1.6
 61 |     h: 1.56
 62 |     r: [0, 90]
 63 |     feature_stride: 2
 64 |     num: &achor_num 2
 65 |   target_args:
 66 |     pos_threshold: 0.6
 67 |     neg_threshold: 0.45
 68 |     score_threshold: 0.20
 69 |   order: 'hwl' 
 70 |   max_num: 100 
 71 |   nms_thresh: 0.15
 72 | 
 73 | 
 74 | model:
 75 |   core_method: point_pillar_how2comm
 76 |   
 77 |   args:
 78 |     voxel_size: *voxel_size
 79 |     lidar_range: *cav_lidar
 80 |     anchor_number: *achor_num
 81 |     max_cav: *max_cav
 82 |     compression: 0 
 83 |     backbone_fix: false
 84 |     flow_flag: true  
 85 | 
 86 |     pillar_vfe:
 87 |       use_norm: true
 88 |       with_distance: false
 89 |       use_absolute_xyz: true
 90 |       num_filters: [64]
 91 |     point_pillar_scatter:
 92 |       num_features: 64
 93 | 
 94 |     base_bev_backbone:
 95 |       resnet: True
 96 |       layer_nums: &layer_nums [3, 4, 5]
 97 |       layer_strides: [2, 2, 2]
 98 |       num_filters: &num_filters [64, 128, 256]
 99 |       upsample_strides: [1, 2, 4]
100 |       num_upsample_filter: [128, 128, 128]
101 |       compression: 0
102 |       voxel_size: *voxel_size
103 |     shrink_header:
104 |       kernal_size: [ 3 ]
105 |       stride: [ 1 ]
106 |       padding: [ 1 ]
107 |       dim: [ 256 ]
108 |       input_dim: 384 
109 | 
110 |     fusion_args:
111 |       voxel_size: *voxel_size
112 |       downsample_rate: 1
113 |       in_channels: 256
114 |       n_head: 8
115 |       dropout_rate: 0
116 |       only_attention: true
117 |       communication:
118 |         thre: 0.01
119 |         compressed_dim: 2  
120 |         request_flag: True
121 |         gaussian_smooth:
122 |           k_size: 5
123 |           c_sigma: 1.0
124 |       communication_flag: True
125 |       agg_operator:
126 |         mode: 'STCFormer'
127 |         feature_dim: 256
128 |         n_head: 8
129 |         depth: 1
130 |         embed_pos: None
131 |         with_spe: false
132 |         with_scm: false
133 |         hetero: False
134 |       multi_scale: true
135 |       frame: *frame
136 |       layer_nums: *layer_nums
137 |       num_filters: *num_filters
138 |       temporal_fusion:  
139 |         height: [100, 50, 25, 100]
140 |         width: [352, 176, 88, 352]
141 |         layers: 1 
142 |         channel: [64, 128, 256, 256]
143 |         gate: False
144 |         n_head: 5
145 |         rte_ratio: 1
146 |         hidden_dim: [64, 128, 256, 256]  
147 |         delay: 0
148 |         compressed_dim: 2  
149 |       only_ego: False
150 | 
151 | 
152 | loss:
153 |   core_method: point_pillar_loss
154 |   args:
155 |     cls_weight: 1.0
156 |     reg: 2.0
157 | 
158 | optimizer:
159 |   core_method: Adam
160 |   lr: 0.002
161 |   args:
162 |     eps: 1e-10
163 |     weight_decay: 1e-4
164 | 
165 | lr_scheduler:
166 |   core_method: multistep 
167 |   gamma: 0.1
168 |   step_size: [10, 20]


--------------------------------------------------------------------------------
/v2xvit/hypes_yaml/yaml_utils.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import yaml
  3 | import os
  4 | import math
  5 | 
  6 | import numpy as np
  7 | 
  8 | 
  9 | def load_yaml(file, opt=None):
 10 |     """
 11 |     Load yaml file and return a dictionary.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     file : string
 16 |         yaml file path.
 17 | 
 18 |     opt : argparser
 19 |          Argparser.
 20 |     Returns
 21 |     -------
 22 |     param : dict
 23 |         A dictionary that contains defined parameters.
 24 |     """
 25 |     if opt and opt.model_dir:
 26 |         file = os.path.join(opt.model_dir, 'config.yaml')
 27 | 
 28 |     stream = open(file, 'r')
 29 |     loader = yaml.Loader
 30 |     loader.add_implicit_resolver(
 31 |         u'tag:yaml.org,2002:float',
 32 |         re.compile(u'''^(?:
 33 |          [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)?
 34 |         |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+)
 35 |         |\\.[0-9_]+(?:[eE][-+][0-9]+)?
 36 |         |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]*
 37 |         |[-+]?\\.(?:inf|Inf|INF)
 38 |         |\\.(?:nan|NaN|NAN))$''', re.X),
 39 |         list(u'-+0123456789.'))
 40 |     param = yaml.load(stream, Loader=loader)
 41 |     if "yaml_parser" in param:
 42 |         param = eval(param["yaml_parser"])(param)
 43 | 
 44 |     return param
 45 | 
 46 | 
 47 | def load_voxel_params(param):
 48 |     """
 49 |     Based on the lidar range and resolution of voxel, calcuate the anchor box
 50 |     and target resolution.
 51 | 
 52 |     Parameters
 53 |     ----------
 54 |     param : dict
 55 |         Original loaded parameter dictionary.
 56 | 
 57 |     Returns
 58 |     -------
 59 |     param : dict
 60 |         Modified parameter dictionary with new attribute `anchor_args[W][H][L]`
 61 |     """
 62 |     anchor_args = param['postprocess']['anchor_args']
 63 |     cav_lidar_range = anchor_args['cav_lidar_range']
 64 |     voxel_size = param['preprocess']['args']['voxel_size']
 65 | 
 66 |     vw = voxel_size[0]
 67 |     vh = voxel_size[1]
 68 |     vd = voxel_size[2]
 69 | 
 70 |     anchor_args['vw'] = vw
 71 |     anchor_args['vh'] = vh
 72 |     anchor_args['vd'] = vd
 73 | 
 74 |     anchor_args['W'] = int((cav_lidar_range[3] - cav_lidar_range[0]) / vw)
 75 |     anchor_args['H'] = int((cav_lidar_range[4] - cav_lidar_range[1]) / vh)
 76 |     anchor_args['D'] = int((cav_lidar_range[5] - cav_lidar_range[2]) / vd)
 77 | 
 78 |     param['postprocess'].update({'anchor_args': anchor_args})
 79 |     # sometimes we just want to visualize the data without implementing model
 80 |     if 'model' in param:
 81 |         param['model']['args']['W'] = anchor_args['W']
 82 |         param['model']['args']['H'] = anchor_args['H']
 83 |         param['model']['args']['D'] = anchor_args['D']
 84 |     return param
 85 | 
 86 | 
 87 | def load_point_pillar_params(param):
 88 |     """
 89 |     Based on the lidar range and resolution of voxel, calcuate the anchor box
 90 |     and target resolution.
 91 | 
 92 |     Parameters
 93 |     ----------
 94 |     param : dict
 95 |         Original loaded parameter dictionary.
 96 | 
 97 |     Returns
 98 |     -------
 99 |     param : dict
100 |         Modified parameter dictionary with new attribute.
101 |     """
102 |     cav_lidar_range = param['preprocess']['cav_lidar_range']
103 |     voxel_size = param['preprocess']['args']['voxel_size']
104 | 
105 |     grid_size = (np.array(cav_lidar_range[3:6]) - np.array(
106 |         cav_lidar_range[0:3])) / \
107 |                 np.array(voxel_size)
108 |     grid_size = np.round(grid_size).astype(np.int64)
109 |     param['model']['args']['point_pillar_scatter']['grid_size'] = grid_size
110 | 
111 |     anchor_args = param['postprocess']['anchor_args']
112 | 
113 |     vw = voxel_size[0]
114 |     vh = voxel_size[1]
115 |     vd = voxel_size[2]
116 | 
117 |     anchor_args['vw'] = vw
118 |     anchor_args['vh'] = vh
119 |     anchor_args['vd'] = vd
120 | 
121 |     anchor_args['W'] = math.ceil((cav_lidar_range[3] - cav_lidar_range[0]) / vw)
122 |     anchor_args['H'] = math.ceil((cav_lidar_range[4] - cav_lidar_range[1]) / vh)
123 |     anchor_args['D'] = math.ceil((cav_lidar_range[5] - cav_lidar_range[2]) / vd)
124 | 
125 |     param['postprocess'].update({'anchor_args': anchor_args})
126 | 
127 |     return param
128 | 
129 | def load_second_params(param):
130 |     """
131 |     Based on the lidar range and resolution of voxel, calcuate the anchor box
132 |     and target resolution.
133 | 
134 |     Parameters
135 |     ----------
136 |     param : dict
137 |         Original loaded parameter dictionary.
138 | 
139 |     Returns
140 |     -------
141 |     param : dict
142 |         Modified parameter dictionary with new attribute.
143 |     """
144 |     cav_lidar_range = param['preprocess']['cav_lidar_range']
145 |     voxel_size = param['preprocess']['args']['voxel_size']
146 | 
147 |     grid_size = (np.array(cav_lidar_range[3:6]) - np.array(
148 |         cav_lidar_range[0:3])) / \
149 |                 np.array(voxel_size)
150 |     grid_size = np.round(grid_size).astype(np.int64)
151 |     param['model']['args']['grid_size'] = grid_size
152 | 
153 |     anchor_args = param['postprocess']['anchor_args']
154 | 
155 |     vw = voxel_size[0]
156 |     vh = voxel_size[1]
157 |     vd = voxel_size[2]
158 | 
159 |     anchor_args['vw'] = vw
160 |     anchor_args['vh'] = vh
161 |     anchor_args['vd'] = vd
162 | 
163 |     anchor_args['W'] = math.ceil((cav_lidar_range[3] - cav_lidar_range[0]) / vw)
164 |     anchor_args['H'] = math.ceil((cav_lidar_range[4] - cav_lidar_range[1]) / vh)
165 |     anchor_args['D'] = math.ceil((cav_lidar_range[5] - cav_lidar_range[2]) / vd)
166 | 
167 |     param['postprocess'].update({'anchor_args': anchor_args})
168 | 
169 |     return param
170 | 
171 | def load_bev_params(param):
172 |     """
173 |     Load bev related geometry parameters s.t. boundary, resolutions, input
174 |     shape, target shape etc.
175 | 
176 |     Parameters
177 |     ----------
178 |     param : dict
179 |         Original loaded parameter dictionary.
180 | 
181 |     Returns
182 |     -------
183 |     param : dict
184 |         Modified parameter dictionary with new attribute `geometry_param`.
185 | 
186 |     """
187 |     res = param["preprocess"]["args"]["res"]
188 |     L1, W1, H1, L2, W2, H2 = param["preprocess"]["cav_lidar_range"]
189 |     downsample_rate = param["preprocess"]["args"]["downsample_rate"]
190 | 
191 |     def f(low, high, r):
192 |         return int((high - low) / r)
193 | 
194 |     input_shape = (
195 |         int((f(L1, L2, res))),
196 |         int((f(W1, W2, res))),
197 |         int((f(H1, H2, res)) + 1)
198 |     )
199 |     label_shape = (
200 |         int(input_shape[0] / downsample_rate),
201 |         int(input_shape[1] / downsample_rate),
202 |         7
203 |     )
204 |     geometry_param = {
205 |         'L1': L1,
206 |         'L2': L2,
207 |         'W1': W1,
208 |         'W2': W2,
209 |         'H1': H1,
210 |         'H2': H2,
211 |         "downsample_rate": downsample_rate,
212 |         "input_shape": input_shape,
213 |         "label_shape": label_shape,
214 |         "res": res
215 |     }
216 |     param["preprocess"]["geometry_param"] = geometry_param
217 |     param["postprocess"]["geometry_param"] = geometry_param
218 |     param["model"]["args"]["geometry_param"] = geometry_param
219 |     return param
220 | 
221 | 
222 | def save_yaml(data, save_name):
223 |     """
224 |     Save the dictionary into a yaml file.
225 | 
226 |     Parameters
227 |     ----------
228 |     data : dict
229 |         The dictionary contains all data.
230 | 
231 |     save_name : string
232 |         Full path of the output yaml file.
233 |     """
234 | 
235 |     with open(save_name, 'w') as outfile:
236 |         yaml.dump(data, outfile, default_flow_style=False)
237 | 


--------------------------------------------------------------------------------
/v2xvit/loss/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/loss/__init__.py


--------------------------------------------------------------------------------
/v2xvit/loss/pixor_loss.py:
--------------------------------------------------------------------------------
  1 | from functools import reduce
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | 
  8 | class PixorLoss(nn.Module):
  9 |     def __init__(self, args):
 10 |         super(PixorLoss, self).__init__()
 11 |         self.alpha = args["alpha"]
 12 |         self.beta = args["beta"]
 13 |         self.loss_dict = {}
 14 | 
 15 |     def forward(self, output_dict, target_dict):
 16 |         """
 17 |         Compute loss for pixor network
 18 |         Parameters
 19 |         ----------
 20 |         output_dict : dict
 21 |            The dictionary that contains the output.
 22 | 
 23 |         target_dict : dict
 24 |            The dictionary that contains the target.
 25 | 
 26 |         Returns
 27 |         -------
 28 |         total_loss : torch.Tensor
 29 |             Total loss.
 30 | 
 31 |         """
 32 |         targets = target_dict["label_map"]
 33 |         cls_preds, loc_preds = output_dict["cls"], output_dict["reg"]
 34 | 
 35 |         cls_targets, loc_targets = targets.split([1, 6], dim=1)
 36 |         pos_count = cls_targets.sum()
 37 |         neg_count = (cls_targets == 0).sum()
 38 |         w1, w2 = neg_count / (pos_count + neg_count), pos_count / (
 39 |                     pos_count + neg_count)
 40 |         weights = torch.ones_like(cls_preds.reshape(-1))
 41 |         weights[cls_targets.reshape(-1) == 1] = w1
 42 |         weights[cls_targets.reshape(-1) == 0] = w2
 43 |         # cls_targets = cls_targets.float()
 44 |         # cls_loss = F.binary_cross_entropy_with_logits(input=cls_preds.reshape(-1), target=cls_targets.reshape(-1), weight=weights,
 45 |         #                                               reduction='mean')
 46 |         cls_loss = F.binary_cross_entropy_with_logits(
 47 |             input=cls_preds, target=cls_targets,
 48 |             reduction='mean')
 49 |         pos_pixels = cls_targets.sum()
 50 | 
 51 |         loc_loss = F.smooth_l1_loss(cls_targets * loc_preds,
 52 |                                     cls_targets * loc_targets,
 53 |                                     reduction='sum')
 54 |         loc_loss = loc_loss / pos_pixels if pos_pixels > 0 else loc_loss
 55 | 
 56 |         total_loss = self.alpha * cls_loss + self.beta * loc_loss
 57 | 
 58 |         self.loss_dict.update({'total_loss': total_loss,
 59 |                                'reg_loss': loc_loss,
 60 |                                'cls_loss': cls_loss})
 61 | 
 62 |         return total_loss
 63 | 
 64 |     def logging(self, epoch, batch_id, batch_len, writer):
 65 |         """
 66 |         Print out  the loss function for current iteration.
 67 | 
 68 |         Parameters
 69 |         ----------
 70 |         epoch : int
 71 |             Current epoch for training.
 72 |         batch_id : int
 73 |             The current batch.
 74 |         batch_len : int
 75 |             Total batch length in one iteration of training,
 76 |         writer : SummaryWriter
 77 |             Used to visualize on tensorboard
 78 |         """
 79 |         total_loss = self.loss_dict['total_loss']
 80 |         reg_loss = self.loss_dict['reg_loss']
 81 |         cls_loss = self.loss_dict['cls_loss']
 82 | 
 83 |         print("[epoch %d][%d/%d], || Loss: %.4f || cls Loss: %.4f"
 84 |               " || reg Loss: %.4f" % (
 85 |                   epoch, batch_id + 1, batch_len,
 86 |                   total_loss.item(), cls_loss.item(), reg_loss.item()))
 87 | 
 88 |         writer.add_scalar('Regression_loss', reg_loss.item(),
 89 |                           epoch * batch_len + batch_id)
 90 |         writer.add_scalar('Confidence_loss', cls_loss.item(),
 91 |                           epoch * batch_len + batch_id)
 92 | 
 93 | 
 94 | def test():
 95 |     torch.manual_seed(0)
 96 |     loss = PixorLoss(None)
 97 |     pred = torch.sigmoid(torch.randn(1, 7, 2, 3))
 98 |     label = torch.zeros(1, 7, 2, 3)
 99 |     loss = loss(pred, label)
100 |     print(loss)
101 | 
102 | 
103 | if __name__ == "__main__":
104 |     test()
105 | 


--------------------------------------------------------------------------------
/v2xvit/loss/point_pillar_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | 
  6 | 
  7 | class WeightedSmoothL1Loss(nn.Module):
  8 |     """
  9 |     Code-wise Weighted Smooth L1 Loss modified based on fvcore.nn.smooth_l1_loss
 10 |     https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py
 11 |                   | 0.5 * x ** 2 / beta   if abs(x) < beta
 12 |     smoothl1(x) = |
 13 |                   | abs(x) - 0.5 * beta   otherwise,
 14 |     where x = input - target.
 15 |     """
 16 |     def __init__(self, beta: float = 1.0 / 9.0, code_weights: list = None):
 17 |         """
 18 |         Args:
 19 |             beta: Scalar float.
 20 |                 L1 to L2 change point.
 21 |                 For beta values < 1e-5, L1 loss is computed.
 22 |             code_weights: (#codes) float list if not None.
 23 |                 Code-wise weights.
 24 |         """
 25 |         super(WeightedSmoothL1Loss, self).__init__()
 26 |         self.beta = beta
 27 |         if code_weights is not None:
 28 |             self.code_weights = np.array(code_weights, dtype=np.float32)
 29 |             self.code_weights = torch.from_numpy(self.code_weights).cuda()
 30 | 
 31 |     @staticmethod
 32 |     def smooth_l1_loss(diff, beta):
 33 |         if beta < 1e-5:
 34 |             loss = torch.abs(diff)
 35 |         else:
 36 |             n = torch.abs(diff)
 37 |             loss = torch.where(n < beta, 0.5 * n ** 2 / beta, n - 0.5 * beta)
 38 | 
 39 |         return loss
 40 | 
 41 |     def forward(self, input: torch.Tensor,
 42 |                 target: torch.Tensor, weights: torch.Tensor = None):
 43 |         """
 44 |         Args:
 45 |             input: (B, #anchors, #codes) float tensor.
 46 |                 Ecoded predicted locations of objects.
 47 |             target: (B, #anchors, #codes) float tensor.
 48 |                 Regression targets.
 49 |             weights: (B, #anchors) float tensor if not None.
 50 | 
 51 |         Returns:
 52 |             loss: (B, #anchors) float tensor.
 53 |                 Weighted smooth l1 loss without reduction.
 54 |         """
 55 |         target = torch.where(torch.isnan(target), input, target)  # ignore nan targets
 56 | 
 57 |         diff = input - target
 58 |         loss = self.smooth_l1_loss(diff, self.beta)
 59 | 
 60 |         # anchor-wise weighting
 61 |         if weights is not None:
 62 |             assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1]
 63 |             loss = loss * weights.unsqueeze(-1)
 64 | 
 65 |         return loss
 66 | 
 67 | 
 68 | class PointPillarLoss(nn.Module):
 69 |     def __init__(self, args):
 70 |         super(PointPillarLoss, self).__init__()
 71 |         self.reg_loss_func = WeightedSmoothL1Loss()
 72 |         self.alpha = 0.25
 73 |         self.gamma = 2.0
 74 | 
 75 |         self.cls_weight = args['cls_weight']
 76 |         self.reg_coe = args['reg']
 77 |         self.loss_dict = {}
 78 | 
 79 |     def forward(self, output_dict, target_dict, prefix=''):
 80 |         """
 81 |         Parameters
 82 |         ----------
 83 |         output_dict : dict
 84 |         target_dict : dict
 85 |         """
 86 |         rm = output_dict['rm{}'.format(prefix)] 
 87 |         psm = output_dict['psm{}'.format(prefix)]
 88 |         targets = target_dict['targets']
 89 | 
 90 |         cls_preds = psm.permute(0, 2, 3, 1).contiguous()
 91 | 
 92 |         box_cls_labels = target_dict['pos_equal_one']
 93 |         box_cls_labels = box_cls_labels.view(psm.shape[0], -1).contiguous()
 94 | 
 95 |         positives = box_cls_labels > 0
 96 |         negatives = box_cls_labels == 0
 97 |         negative_cls_weights = negatives * 1.0
 98 |         cls_weights = (negative_cls_weights + 1.0 * positives).float()
 99 |         reg_weights = positives.float()
100 | 
101 |         pos_normalizer = positives.sum(1, keepdim=True).float()
102 |         reg_weights /= torch.clamp(pos_normalizer, min=1.0)
103 |         cls_weights /= torch.clamp(pos_normalizer, min=1.0)
104 |         cls_targets = box_cls_labels
105 |         cls_targets = cls_targets.unsqueeze(dim=-1)
106 | 
107 |         cls_targets = cls_targets.squeeze(dim=-1)
108 |         one_hot_targets = torch.zeros(
109 |             *list(cls_targets.shape), 2,
110 |             dtype=cls_preds.dtype, device=cls_targets.device
111 |         )
112 |         one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0)
113 |         cls_preds = cls_preds.view(psm.shape[0], -1, 1)
114 |         one_hot_targets = one_hot_targets[..., 1:]
115 | 
116 |         cls_loss_src = self.cls_loss_func(cls_preds,
117 |                                           one_hot_targets,
118 |                                           weights=cls_weights)  # [N, M]
119 |         cls_loss = cls_loss_src.sum() / psm.shape[0]
120 |         conf_loss = cls_loss * self.cls_weight
121 | 
122 |         # regression
123 |         rm = rm.permute(0, 2, 3, 1).contiguous()
124 |         rm = rm.view(rm.size(0), -1, 7)
125 |         targets = targets.view(targets.size(0), -1, 7)
126 |         box_preds_sin, reg_targets_sin = self.add_sin_difference(rm,
127 |                                                                  targets)
128 |         loc_loss_src =\
129 |             self.reg_loss_func(box_preds_sin,
130 |                                reg_targets_sin,
131 |                                weights=reg_weights)
132 |         reg_loss = loc_loss_src.sum() / rm.shape[0]
133 |         reg_loss *= self.reg_coe
134 | 
135 |         total_loss = reg_loss + conf_loss
136 | 
137 |         self.loss_dict.update({'total_loss{}'.format(prefix): total_loss,
138 |                                'reg_loss{}'.format(prefix): reg_loss,
139 |                                'conf_loss{}'.format(prefix): conf_loss})
140 | 
141 |         return total_loss
142 | 
143 |     def cls_loss_func(self, input: torch.Tensor,
144 |                       target: torch.Tensor,
145 |                       weights: torch.Tensor):
146 |         """
147 |         Args:
148 |             input: (B, #anchors, #classes) float tensor.
149 |                 Predicted logits for each class
150 |             target: (B, #anchors, #classes) float tensor.
151 |                 One-hot encoded classification targets
152 |             weights: (B, #anchors) float tensor.
153 |                 Anchor-wise weights.
154 | 
155 |         Returns:
156 |             weighted_loss: (B, #anchors, #classes) float tensor after weighting.
157 |         """
158 |         pred_sigmoid = torch.sigmoid(input)
159 |         alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha)
160 |         pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid
161 |         focal_weight = alpha_weight * torch.pow(pt, self.gamma)
162 | 
163 |         bce_loss = self.sigmoid_cross_entropy_with_logits(input, target)
164 | 
165 |         loss = focal_weight * bce_loss
166 | 
167 |         if weights.shape.__len__() == 2 or \
168 |                 (weights.shape.__len__() == 1 and target.shape.__len__() == 2):
169 |             weights = weights.unsqueeze(-1)
170 | 
171 |         assert weights.shape.__len__() == loss.shape.__len__()
172 | 
173 |         return loss * weights
174 | 
175 |     @staticmethod
176 |     def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor):
177 |         """ PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits:
178 |             max(x, 0) - x * z + log(1 + exp(-abs(x))) in
179 |             https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
180 | 
181 |         Args:
182 |             input: (B, #anchors, #classes) float tensor.
183 |                 Predicted logits for each class
184 |             target: (B, #anchors, #classes) float tensor.
185 |                 One-hot encoded classification targets
186 | 
187 |         Returns:
188 |             loss: (B, #anchors, #classes) float tensor.
189 |                 Sigmoid cross entropy loss without reduction
190 |         """
191 |         loss = torch.clamp(input, min=0) - input * target + \
192 |                torch.log1p(torch.exp(-torch.abs(input)))
193 |         return loss
194 | 
195 |     @staticmethod
196 |     def add_sin_difference(boxes1, boxes2, dim=6):
197 |         assert dim != -1
198 |         rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \
199 |                             torch.cos(boxes2[..., dim:dim + 1])
200 |         rad_tg_encoding = torch.cos(boxes1[..., dim:dim + 1]) * \
201 |                           torch.sin(boxes2[..., dim:dim + 1])
202 | 
203 |         boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding,
204 |                             boxes1[..., dim + 1:]], dim=-1)
205 |         boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding,
206 |                             boxes2[..., dim + 1:]], dim=-1)
207 |         return boxes1, boxes2
208 | 
209 | 
210 |     def logging(self, epoch, batch_id, batch_len, writer, pbar=None):
211 |         """
212 |         Print out  the loss function for current iteration.
213 | 
214 |         Parameters
215 |         ----------
216 |         epoch : int
217 |             Current epoch for training.
218 |         batch_id : int
219 |             The current batch.
220 |         batch_len : int
221 |             Total batch length in one iteration of training,
222 |         writer : SummaryWriter
223 |             Used to visualize on tensorboard
224 |         """
225 |         total_loss = self.loss_dict['total_loss']
226 |         reg_loss = self.loss_dict['reg_loss']
227 |         conf_loss = self.loss_dict['conf_loss']
228 |         if pbar is None:
229 |             print("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f"
230 |                 " || Loc Loss: %.4f" % (
231 |                     epoch, batch_id + 1, batch_len,
232 |                     total_loss.item(), conf_loss.item(), reg_loss.item()))
233 |         else:
234 |             pbar.set_description("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f"
235 |                   " || Loc Loss: %.4f" % (
236 |                       epoch, batch_id + 1, batch_len,
237 |                       total_loss.item(), conf_loss.item(), reg_loss.item()))
238 | 
239 | 
240 |         writer.add_scalar('Regression_loss', reg_loss.item(),
241 |                           epoch*batch_len + batch_id)
242 |         writer.add_scalar('Confidence_loss', conf_loss.item(),
243 |                           epoch*batch_len + batch_id)
244 | 


--------------------------------------------------------------------------------
/v2xvit/loss/voxel_net_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class VoxelNetLoss(nn.Module):
 7 |     def __init__(self, args):
 8 |         super(VoxelNetLoss, self).__init__()
 9 |         self.smoothl1loss = nn.SmoothL1Loss(size_average=False)
10 |         self.alpha = args['alpha']
11 |         self.beta = args['beta']
12 |         self.reg_coe = args['reg']
13 |         self.loss_dict = {}
14 | 
15 |     def forward(self, output_dict, target_dict):
16 |         """
17 |         Parameters
18 |         ----------
19 |         output_dict : dict
20 |         target_dict : dict
21 |         """
22 |         rm = output_dict['rm']
23 |         psm = output_dict['psm']
24 | 
25 |         pos_equal_one = target_dict['pos_equal_one']
26 |         neg_equal_one = target_dict['neg_equal_one']
27 |         targets = target_dict['targets']
28 | 
29 |         p_pos = F.sigmoid(psm.permute(0, 2, 3, 1))
30 |         rm = rm.permute(0, 2, 3, 1).contiguous()
31 |         rm = rm.view(rm.size(0), rm.size(1), rm.size(2), -1, 7)
32 |         targets = targets.view(targets.size(0), targets.size(1),
33 |                                targets.size(2), -1, 7)
34 |         pos_equal_one_for_reg = pos_equal_one.unsqueeze(
35 |             pos_equal_one.dim()).expand(-1, -1, -1, -1, 7)
36 | 
37 |         rm_pos = rm * pos_equal_one_for_reg
38 |         targets_pos = targets * pos_equal_one_for_reg
39 | 
40 |         cls_pos_loss = -pos_equal_one * torch.log(p_pos + 1e-6)
41 |         cls_pos_loss = cls_pos_loss.sum() / (pos_equal_one.sum() + 1e-6)
42 | 
43 |         cls_neg_loss = -neg_equal_one * torch.log(1 - p_pos + 1e-6)
44 |         cls_neg_loss = cls_neg_loss.sum() / (neg_equal_one.sum() + 1e-6)
45 | 
46 |         reg_loss = self.smoothl1loss(rm_pos, targets_pos)
47 |         reg_loss = reg_loss / (pos_equal_one.sum() + 1e-6)
48 |         conf_loss = self.alpha * cls_pos_loss + self.beta * cls_neg_loss
49 | 
50 |         total_loss = self.reg_coe * reg_loss + conf_loss
51 | 
52 |         self.loss_dict.update({'total_loss': total_loss,
53 |                                'reg_loss': reg_loss,
54 |                                'conf_loss': conf_loss})
55 | 
56 |         return total_loss
57 | 
58 |     def logging(self, epoch, batch_id, batch_len, writer):
59 |         """
60 |         Print out  the loss function for current iteration.
61 | 
62 |         Parameters
63 |         ----------
64 |         epoch : int
65 |             Current epoch for training.
66 |         batch_id : int
67 |             The current batch.
68 |         batch_len : int
69 |             Total batch length in one iteration of training,
70 |         writer : SummaryWriter
71 |             Used to visualize on tensorboard
72 |         """
73 |         total_loss = self.loss_dict['total_loss']
74 |         reg_loss = self.loss_dict['reg_loss']
75 |         conf_loss = self.loss_dict['conf_loss']
76 | 
77 |         print("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f"
78 |               " || Loc Loss: %.4f" % (
79 |                   epoch, batch_id + 1, batch_len,
80 |                   total_loss.item(), conf_loss.item(), reg_loss.item()))
81 | 
82 |         writer.add_scalar('Regression_loss', reg_loss.item(),
83 |                           epoch*batch_len + batch_id)
84 |         writer.add_scalar('Confidence_loss', conf_loss.item(),
85 |                           epoch*batch_len + batch_id)
86 | 


--------------------------------------------------------------------------------
/v2xvit/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/models/__init__.py


--------------------------------------------------------------------------------
/v2xvit/models/comm_modules/communication.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | 
 5 | class Communication(nn.Module):
 6 |     def __init__(self, args):
 7 |         super(Communication, self).__init__()
 8 |         
 9 |         self.smooth = False
10 |         self.thre = args['thre']
11 |         self.compressed_dim = args['compressed_dim']
12 |         if 'gaussian_smooth' in args:
13 |             self.smooth = True
14 |             kernel_size = args['gaussian_smooth']['k_size']
15 |             c_sigma = args['gaussian_smooth']['c_sigma']
16 |             self.gaussian_filter = nn.Conv2d(1, 1, kernel_size=kernel_size, stride=1, padding=(kernel_size-1)//2)
17 |             self.init_gaussian_filter(kernel_size, c_sigma)
18 |             self.gaussian_filter.requires_grad = False
19 |         
20 |     def init_gaussian_filter(self, k_size=5, sigma=1):
21 |         def _gen_gaussian_kernel(k_size=5, sigma=1):
22 |             center = k_size // 2
23 |             x, y = np.mgrid[0 - center : k_size - center, 0 - center : k_size - center]
24 |             g = 1 / (2 * np.pi * sigma) * np.exp(-(np.square(x) + np.square(y)) / (2 * np.square(sigma)))
25 |             return g
26 |         gaussian_kernel = _gen_gaussian_kernel(k_size, sigma)
27 |         self.gaussian_filter.weight.data = torch.Tensor(gaussian_kernel).to(self.gaussian_filter.weight.device).unsqueeze(0).unsqueeze(0)
28 |         self.gaussian_filter.bias.data.zero_()
29 | 
30 |     def forward(self, psm):
31 |         B = len(psm)
32 |         _, _, H, W = psm[0].shape
33 |         
34 | 
35 |         private_confidence_maps = []
36 |         private_communication_masks = []
37 |         communication_rates = []
38 |         
39 |         for b in range(B):
40 |             ori_private_communication_maps = psm[b].sigmoid().max(dim=1)[0].unsqueeze(1)  
41 |             
42 |             if self.smooth:
43 |                 private_communication_maps = self.gaussian_filter(ori_private_communication_maps)
44 |             else:
45 |                 private_communication_maps = ori_private_communication_maps
46 |             private_confidence_maps.append(private_communication_maps)  
47 |                 
48 | 
49 |             ones_mask = torch.ones_like(private_communication_maps).to(private_communication_maps.device)
50 |             zeros_mask = torch.zeros_like(private_communication_maps).to(private_communication_maps)
51 |             
52 |             private_mask = torch.where(private_communication_maps > self.thre, ones_mask, zeros_mask)  
53 |             cav_num = private_mask.shape[0]
54 |             private_rate = private_mask[1:].sum()/((cav_num-1) * H * W)
55 |             
56 |             private_mask_nodiag = private_mask.clone()
57 |             ones_mask = torch.ones_like(private_mask).to(private_mask.device)
58 |             private_mask_nodiag[::2] = ones_mask[::2]  
59 |             private_communication_masks.append(private_mask_nodiag)
60 |             communication_rates.append(private_rate)
61 |             
62 |         communication_rates = sum(communication_rates)/B
63 |         private_mask = torch.cat(private_communication_masks, dim=0)  
64 |         
65 |         return private_mask, communication_rates, private_confidence_maps
66 |     


--------------------------------------------------------------------------------
/v2xvit/models/comm_modules/mutual_communication.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import torch.nn.functional as F
  5 | import torch.optim as optim
  6 | import random
  7 | 
  8 | 
  9 | class Channel_Request_Attention(nn.Module):
 10 |     def __init__(self, in_planes, ratio=16):
 11 |         super(Channel_Request_Attention, self).__init__()
 12 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 13 |         self.max_pool = nn.AdaptiveMaxPool2d(1)
 14 | 
 15 |         self.sharedMLP = nn.Sequential(
 16 |             nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False), nn.ReLU(),
 17 |             nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False))
 18 |         self.sigmoid = nn.Sigmoid()
 19 | 
 20 |     def forward(self, x):
 21 |         avgout = self.sharedMLP(self.avg_pool(x))
 22 |         maxout = self.sharedMLP(self.max_pool(x))
 23 |         return self.sigmoid(avgout + maxout)
 24 | 
 25 | class Spatial_Request_Attention(nn.Module):
 26 |     def __init__(self, kernel_size=3):
 27 |         super(Spatial_Request_Attention, self).__init__()
 28 |         assert kernel_size in (3, 7), "kernel size must be 3 or 7"
 29 |         padding = 3 if kernel_size == 7 else 1
 30 | 
 31 |         self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
 32 |         self.sigmoid = nn.Sigmoid()
 33 | 
 34 |     def forward(self, x):
 35 |         avgout = torch.mean(x, dim=1, keepdim=True)
 36 |         maxout, _ = torch.max(x, dim=1, keepdim=True)
 37 |         x = torch.cat([avgout, maxout], dim=1)
 38 |         x = self.conv(x)
 39 |         return self.sigmoid(x)
 40 | 
 41 | 
 42 | class StatisticsNetwork(nn.Module):
 43 |     def __init__(self, img_feature_channels: int):
 44 | 
 45 |         super().__init__()
 46 |         self.conv1 = nn.Conv2d(
 47 |             in_channels=img_feature_channels, out_channels=img_feature_channels*2, kernel_size=1, stride=1
 48 |         )
 49 |         self.conv2 = nn.Conv2d(
 50 |             in_channels=img_feature_channels*2, out_channels=img_feature_channels*2, kernel_size=1, stride=1
 51 |         )
 52 |         self.conv3 = nn.Conv2d(
 53 |             in_channels=img_feature_channels*2, out_channels=1, kernel_size=1, stride=1)
 54 |         self.relu = nn.ReLU()
 55 | 
 56 |     def forward(self, concat_feature: torch.Tensor) -> torch.Tensor:
 57 |         x = self.conv1(concat_feature) 
 58 |         x = self.relu(x)
 59 |         x = self.conv2(x)
 60 |         x = self.relu(x)
 61 |         local_statistics = self.conv3(x)
 62 |         return local_statistics
 63 | 
 64 | 
 65 | class DeepInfoMaxLoss(nn.Module):
 66 |     def __init__(self, loss_coeff=1) -> None:
 67 |         super().__init__()
 68 |         self.loss_coeff = loss_coeff
 69 | 
 70 |     def __call__(self, T: torch.Tensor, T_prime: torch.Tensor) -> float:
 71 | 
 72 |         joint_expectation = (-F.softplus(-T)).mean()
 73 |         marginal_expectation = F.softplus(T_prime).mean()
 74 |         mutual_info = joint_expectation - marginal_expectation
 75 | 
 76 |         return -mutual_info*self.loss_coeff
 77 | 
 78 | 
 79 | class Communication(nn.Module):
 80 |     def __init__(self, args, in_planes):
 81 |         super(Communication, self).__init__()
 82 |         self.channel_request = Channel_Request_Attention(in_planes) 
 83 |         self.spatial_request = Spatial_Request_Attention()
 84 |         self.channel_fusion = nn.Conv2d(in_planes*2, in_planes, 1, bias=False)
 85 |         self.spatial_fusion = nn.Conv2d(2, 1, 1, bias=False)
 86 |         self.statisticsNetwork = StatisticsNetwork(in_planes*2)
 87 |         self.mutual_loss = DeepInfoMaxLoss()
 88 |         self.request_flag = args['request_flag']
 89 | 
 90 |         self.smooth = False
 91 |         self.thre = args['thre']  
 92 |         if 'gaussian_smooth' in args:
 93 |             self.smooth = True
 94 |             kernel_size = args['gaussian_smooth']['k_size']
 95 |             self.kernel_size = kernel_size
 96 |             c_sigma = args['gaussian_smooth']['c_sigma']
 97 |             self.gaussian_filter = nn.Conv2d(
 98 |                 1, 1, kernel_size=kernel_size, stride=1, padding=(kernel_size-1)//2)
 99 |             self.init_gaussian_filter(kernel_size, c_sigma)
100 |             self.gaussian_filter.requires_grad = False  
101 | 
102 |         x = torch.arange(-(kernel_size - 1) // 2, (kernel_size + 1) // 2, dtype=torch.float32)
103 |         d1_gaussian_filter = torch.exp(-x**2 / (2 * c_sigma**2))
104 |         d1_gaussian_filter /= d1_gaussian_filter.sum()
105 | 
106 |         self.d1_gaussian_filter = d1_gaussian_filter.view(1, 1, kernel_size).cuda()
107 |         
108 |     def init_gaussian_filter(self, k_size=5, sigma=1):
109 |         def _gen_gaussian_kernel(k_size=5, sigma=1):
110 |             center = k_size // 2
111 |             x, y = np.mgrid[0 - center: k_size -
112 |                             center, 0 - center: k_size - center]
113 |             g = 1 / (2 * np.pi * sigma) * np.exp(-(np.square(x) +
114 |                                                    np.square(y)) / (2 * np.square(sigma)))
115 |             return g
116 |         gaussian_kernel = _gen_gaussian_kernel(k_size, sigma)
117 |         gaussian_kernel = torch.Tensor(gaussian_kernel).to(
118 |             self.gaussian_filter.weight.device).unsqueeze(0).unsqueeze(0)
119 |         self.gaussian_filter.weight.data = gaussian_kernel
120 |         self.gaussian_filter.bias.data.zero_()
121 | 
122 |     def forward(self, feat_list,confidence_map_list=None):
123 |         send_feats = []
124 |         comm_rate_list = []  
125 |         sparse_mask_list = []  
126 |         total_loss = torch.zeros(1).to(feat_list[0].device)
127 |         for bs in range(len(feat_list)):  
128 |             agent_feature = feat_list[bs]  
129 |             cav_num, C, H, W = agent_feature.shape
130 |             if cav_num == 1:
131 |                 send_feats.append(agent_feature)
132 |                 ones_mask = torch.ones(cav_num, C, H, W).to(feat_list[0].device)
133 |                 sparse_mask_list.append(ones_mask)
134 |                 continue
135 |                 
136 |             collaborator_feature = torch.tensor([]).to(agent_feature.device)
137 |             sparse_batch_mask = torch.tensor([]).to(agent_feature.device)
138 | 
139 |             agent_channel_attention = self.channel_request(
140 |                 agent_feature) 
141 |             agent_spatial_attention = self.spatial_request(
142 |                 agent_feature) 
143 |             agent_activation = torch.mean(agent_feature, dim=1, keepdims=True).sigmoid()  
144 |             agent_activation = self.gaussian_filter(agent_activation) 
145 | 
146 |             ego_channel_request = (
147 |                 1 - agent_channel_attention[0, ]).unsqueeze(0)  
148 |             ego_spatial_request = (
149 |                 1 - agent_spatial_attention[0, ]).unsqueeze(0)  
150 | 
151 | 
152 |             for i in range(cav_num - 1):
153 |                 if self.request_flag:
154 |                     channel_coefficient = self.channel_fusion(torch.cat(
155 |                         [ego_channel_request, agent_channel_attention[i+1, ].unsqueeze(0)], dim=1))  
156 |                     spatial_coefficient = self.spatial_fusion(torch.cat(
157 |                         [ego_spatial_request, agent_spatial_attention[i+1, ].unsqueeze(0)], dim=1))  
158 |                 else:  
159 |                     channel_coefficient = agent_channel_attention[i+1, ].unsqueeze(
160 |                         0)
161 |                     spatial_coefficient = agent_spatial_attention[i+1, ].unsqueeze(
162 |                         0)
163 | 
164 |                 spatial_coefficient = spatial_coefficient.sigmoid()
165 |                 channel_coefficient = channel_coefficient.sigmoid()
166 |                 smoth_channel_coefficient = F.conv1d(channel_coefficient.reshape(1,1,C), self.d1_gaussian_filter, padding=(self.kernel_size - 1) // 2)
167 |                 channel_coefficient = smoth_channel_coefficient.reshape(1,C,1,1)  
168 |                 
169 |                 spatial_coefficient = self.gaussian_filter(spatial_coefficient)
170 |                 sparse_matrix = channel_coefficient * spatial_coefficient 
171 |                 temp_activation = agent_activation[i+1, ].unsqueeze(0) 
172 |                 sparse_matrix = sparse_matrix * temp_activation
173 | 
174 |                 if self.thre > 0:
175 |                     ones_mask = torch.ones_like(
176 |                         sparse_matrix).to(sparse_matrix.device)
177 |                     zeros_mask = torch.zeros_like(
178 |                         sparse_matrix).to(sparse_matrix.device)
179 |                     sparse_mask = torch.where(
180 |                         sparse_matrix > self.thre, ones_mask, zeros_mask)
181 |                 else:
182 |                     K = int(C * H * W * random.uniform(0, 0.3))
183 |                     communication_maps = sparse_matrix.reshape(1, C * H * W)
184 |                     _, indices = torch.topk(communication_maps, k=K, sorted=False)
185 |                     communication_mask = torch.zeros_like(communication_maps).to(communication_maps.device)
186 |                     ones_fill = torch.ones(1, K, dtype=communication_maps.dtype, device=communication_maps.device)
187 |                     sparse_mask = torch.scatter(communication_mask, -1, indices, ones_fill).reshape(1, C, H, W)
188 |                 
189 |                 comm_rate = sparse_mask.sum()/(C*H*W)
190 |                 comm_rate_list.append(comm_rate)
191 | 
192 |                 collaborator_feature = torch.cat(
193 |                     [collaborator_feature, agent_feature[i+1, ].unsqueeze(0)*sparse_mask], dim=0)
194 |                 sparse_batch_mask = torch.cat(
195 |                     [sparse_batch_mask, sparse_mask], dim=0)
196 | 
197 | 
198 |             org_feature = agent_feature.clone()  
199 |             sparse_feature = torch.cat(
200 |                 [agent_feature[:1], collaborator_feature], dim=0)  
201 |             send_feats.append(sparse_feature)  
202 |             ego_mask = torch.ones_like(agent_feature[:1]).to(
203 |                 agent_feature[:1].device)  
204 |             sparse_batch_mask = torch.cat(
205 |                 [ego_mask, sparse_batch_mask], dim=0)  
206 |             sparse_mask_list.append(sparse_batch_mask)
207 | 
208 |             org_feature_prime = torch.cat(
209 |                 [org_feature[1:], org_feature[0].unsqueeze(0)], dim=0)  
210 |             local_mutual = self.statisticsNetwork(
211 |                 torch.cat([org_feature, sparse_feature], dim=1))  
212 |             local_mutual_prime = self.statisticsNetwork(
213 |                 torch.cat([org_feature_prime, sparse_feature], dim=1)) 
214 |             loss = self.mutual_loss(local_mutual, local_mutual_prime)
215 |             total_loss += loss
216 | 
217 |         if len(comm_rate_list) > 0:
218 |             mean_rate = sum(comm_rate_list) / len(comm_rate_list) 
219 |         else:
220 |             mean_rate = torch.tensor(0).to(feat_list[0].device)
221 |         sparse_mask = torch.cat(sparse_mask_list, dim=0)  
222 | 
223 |         return send_feats, total_loss, mean_rate, sparse_mask
224 | 


--------------------------------------------------------------------------------
/v2xvit/models/fuse_modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/models/fuse_modules/__init__.py


--------------------------------------------------------------------------------
/v2xvit/models/fuse_modules/fuse_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | from einops import rearrange
 5 | from v2xvit.utils.common_utils import torch_tensor_to_numpy
 6 | 
 7 | 
 8 | def regroup(dense_feature, record_len, max_len):
 9 |     """
10 |     Regroup the data based on the record_len.
11 |     Parameters
12 |     ----------
13 |     dense_feature : torch.Tensor
14 |         N, C, H, W
15 |     record_len : list
16 |         [sample1_len, sample2_len, ...]
17 |     max_len : int
18 |         Maximum cav number
19 |     Returns
20 |     -------
21 |     regroup_feature : torch.Tensor
22 |         B, L, C, H, W
23 |     """
24 |     cum_sum_len = list(np.cumsum(torch_tensor_to_numpy(record_len)))
25 |     split_features = torch.tensor_split(dense_feature,
26 |                                         cum_sum_len[:-1])
27 |     regroup_features = []
28 |     mask = []
29 | 
30 |     for split_feature in split_features:
31 |         # M, C, H, W
32 |         feature_shape = split_feature.shape
33 | 
34 |         # the maximum M is 5 as most 5 cavs
35 |         padding_len = max_len - feature_shape[0]
36 |         mask.append([1] * feature_shape[0] + [0] * padding_len)
37 | 
38 |         padding_tensor = torch.zeros(padding_len, feature_shape[1],
39 |                                      feature_shape[2], feature_shape[3])
40 |         padding_tensor = padding_tensor.to(split_feature.device)
41 | 
42 |         split_feature = torch.cat([split_feature, padding_tensor],
43 |                                   dim=0)
44 | 
45 |         # 1, 5C, H, W
46 |         split_feature = split_feature.view(-1,
47 |                                            feature_shape[2],
48 |                                            feature_shape[3]).unsqueeze(0)
49 |         regroup_features.append(split_feature)
50 | 
51 |     # B, 5C, H, W
52 |     regroup_features = torch.cat(regroup_features, dim=0)
53 |     # B, L, C, H, W
54 |     regroup_features = rearrange(regroup_features,
55 |                                  'b (l c) h w -> b l c h w',
56 |                                  l=max_len)
57 |     mask = torch.from_numpy(np.array(mask)).to(regroup_features.device)
58 | 
59 |     return regroup_features, mask


--------------------------------------------------------------------------------
/v2xvit/models/fuse_modules/how2comm_deformable.py:
--------------------------------------------------------------------------------
  1 | from turtle import update
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import numpy as np
  6 | from torch.nn import functional as F
  7 | from torch import batch_norm, einsum
  8 | from einops import rearrange, repeat
  9 | from icecream import ic
 10 | 
 11 | from v2xvit.models.sub_modules.torch_transformation_utils import warp_affine_simple
 12 | from v2xvit.models.comm_modules.communication import Communication
 13 | from v2xvit.models.sub_modules.how2comm_preprocess import How2commPreprocess
 14 | from v2xvit.models.fuse_modules.stcformer import STCFormer
 15 | 
 16 | class How2comm(nn.Module):
 17 |     def __init__(self, args, args_pre):
 18 |         super(How2comm, self).__init__()
 19 | 
 20 |         self.max_cav = 5 
 21 |         self.communication = False
 22 |         self.round = 1
 23 |         if 'communication' in args:
 24 |             self.communication = True
 25 |             self.naive_communication = Communication(args['communication'])
 26 |             if 'round' in args['communication']:
 27 |                 self.round = args['communication']['round']
 28 |         print("communication:", self.communication)
 29 |         self.communication_flag = args['communication_flag']
 30 |         self.discrete_ratio = args['voxel_size'][0]  
 31 |         self.downsample_rate = args['downsample_rate']
 32 |         self.async_flag = True
 33 |         self.channel_fuse = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=7, stride=1, padding=3)
 34 | 
 35 |         self.agg_mode = args['agg_operator']['mode']
 36 |         self.multi_scale = args['multi_scale']
 37 |         self.how2comm = How2commPreprocess(args_pre, channel=64, delay=1)
 38 |         if self.multi_scale:
 39 |             layer_nums = args['layer_nums']  
 40 |             num_filters = args['num_filters'] 
 41 |             self.num_levels = len(layer_nums)
 42 |             self.fuse_modules = nn.ModuleList()
 43 |             for idx in range(self.num_levels):
 44 |                 if self.agg_mode == 'STCFormer':
 45 |                     fuse_network = STCFormer(
 46 |                         channel=num_filters[idx], args=args['temporal_fusion'], idx=idx)
 47 |                 self.fuse_modules.append(fuse_network)
 48 | 
 49 |     def regroup(self, x, record_len):
 50 |         cum_sum_len = torch.cumsum(record_len, dim=0)
 51 |         split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
 52 |         return split_x
 53 | 
 54 |     def forward(self, x, psm, record_len, pairwise_t_matrix, backbone=None, heads=None, history=None):
 55 |         _, C, H, W = x.shape
 56 |         B, L = pairwise_t_matrix.shape[:2] 
 57 | 
 58 |         pairwise_t_matrix = pairwise_t_matrix[:, :, :, [
 59 |             0, 1], :][:, :, :, :, [0, 1, 3]]  
 60 |         pairwise_t_matrix[..., 0, 1] = pairwise_t_matrix[..., 0, 1] * H / W
 61 |         pairwise_t_matrix[..., 1, 0] = pairwise_t_matrix[..., 1, 0] * W / H
 62 |         pairwise_t_matrix[..., 0, 2] = pairwise_t_matrix[..., 0,
 63 |                                                          2] / (self.downsample_rate * self.discrete_ratio * W) * 2
 64 |         pairwise_t_matrix[..., 1, 2] = pairwise_t_matrix[..., 1,
 65 |                                                          2] / (self.downsample_rate * self.discrete_ratio * H) * 2
 66 | 
 67 |         
 68 |         if history and self.async_flag: 
 69 |             feat_final, offset_loss = self.how2comm(x, history, record_len, backbone, heads)
 70 |             x = feat_final
 71 |         else:
 72 |             offset_loss = torch.zeros(1).to(x.device)
 73 |         neighbor_psm_list = []
 74 |         if history:
 75 |             his = history[0]
 76 |         else:
 77 |             his = x
 78 | 
 79 |         if self.multi_scale:
 80 |             ups = []
 81 |             ups_temporal = []
 82 |             ups_exclusive = []
 83 |             ups_common = []
 84 |             with_resnet = True if hasattr(backbone, 'resnet') else False  
 85 |             if with_resnet:
 86 |                 feats = backbone.resnet(x)
 87 |                 history_feats = backbone.resnet(his)
 88 | 
 89 |             for i in range(self.num_levels):  
 90 |                 x = feats[i] if with_resnet else backbone.blocks[i](x)
 91 |                 his = history_feats[i] if with_resnet else backbone.blocks[i](his)
 92 | 
 93 |                 if i == 0:
 94 |                     if self.communication:
 95 |                         batch_confidence_maps = self.regroup(psm, record_len)
 96 |                         _, _, confidence_maps = self.naive_communication(batch_confidence_maps)
 97 |                         
 98 |                         batch_temp_features = self.regroup(x, record_len)
 99 |                         batch_temp_features_his = self.regroup(his, record_len)
100 |                         temp_list = []
101 |                         temp_psm_list = [] 
102 |                         history_list = []
103 |                         for b in range(B):
104 |                             N = record_len[b]
105 |                             t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
106 |                             temp_features = batch_temp_features[b]
107 |                             C, H, W = temp_features.shape[1:]
108 |                             neighbor_feature = warp_affine_simple(temp_features,
109 |                                                                   t_matrix[0,
110 |                                                                            :, :, :],
111 |                                                                   (H, W)) 
112 |                             temp_list.append(neighbor_feature)
113 |                             
114 |                             temp_features_his = batch_temp_features_his[b]
115 |                             C, H, W = temp_features_his.shape[1:]
116 |                             neighbor_feature_his = warp_affine_simple(temp_features_his,
117 |                                                                   t_matrix[0,
118 |                                                                            :, :, :],
119 |                                                                   (H, W))
120 |                             history_list.append(neighbor_feature_his)
121 |                             
122 |                             temp_psm_list.append(warp_affine_simple(confidence_maps[b], t_matrix[0, :, :, :], (H, W)))  
123 |                         x = torch.cat(temp_list, dim=0)
124 |                         his = torch.cat(history_list, dim=0)
125 |                         if self.communication_flag:
126 |                             sparse_feats, commu_loss, communication_rates, sparse_history = self.how2comm.communication(
127 |                             x, record_len,history_list,temp_psm_list) 
128 |                             x = F.interpolate(sparse_feats, scale_factor=1, mode='bilinear', align_corners=False) 
129 |                             x = self.channel_fuse(x)
130 |                             his = F.interpolate(sparse_history, scale_factor=1, mode='bilinear', align_corners=False)  
131 |                             his = self.channel_fuse(his)
132 |                         else:
133 |                             communication_rates = torch.tensor(0).to(x.device)
134 |                             commu_loss = torch.zeros(1).to(x.device)
135 |                     else:
136 |                         communication_rates = torch.tensor(0).to(x.device)
137 | 
138 |                 batch_node_features = self.regroup(x, record_len)
139 |                 batch_node_features_his = self.regroup(his, record_len)
140 | 
141 |                 x_fuse = []
142 |                 x_temporal = []
143 |                 x_exclusive = []
144 |                 x_common = []
145 |                 for b in range(B):
146 |                     N = record_len[b]
147 |                     t_matrix = pairwise_t_matrix[b][:N, :N, :, :]
148 |                     node_features = batch_node_features[b]
149 |                     node_features_his = batch_node_features_his[b]
150 |                     if i == 0:
151 |                         neighbor_feature = node_features 
152 |                         neighbor_feature_his = node_features_his
153 |                         neighbor_psm = warp_affine_simple(
154 |                             confidence_maps[b], t_matrix[0, :, :, :], (H, W))
155 |                         
156 |                     else:
157 |                         C, H, W = node_features.shape[1:]  
158 |                         neighbor_feature = warp_affine_simple(node_features,
159 |                                                               t_matrix[0,
160 |                                                                        :, :, :],
161 |                                                               (H, W))
162 |                         neighbor_feature_his = warp_affine_simple(node_features_his,
163 |                                                               t_matrix[0,
164 |                                                                        :, :, :],
165 |                                                               (H, W)) 
166 | 
167 |                     feature_shape = neighbor_feature.shape
168 |                     padding_len = self.max_cav - feature_shape[0]
169 |                     padding_feature = torch.zeros(padding_len, feature_shape[1],
170 |                                                   feature_shape[2], feature_shape[3])
171 |                     padding_feature = padding_feature.to(
172 |                         neighbor_feature.device)
173 |                     neighbor_feature = torch.cat([neighbor_feature, padding_feature],
174 |                                                  dim=0)
175 | 
176 |                     if i == 0: 
177 |                         padding_map = torch.zeros(
178 |                             padding_len, 1, feature_shape[2], feature_shape[3])
179 |                         padding_map = padding_map.to(neighbor_feature.device)
180 |                         neighbor_psm = torch.cat(
181 |                             [neighbor_psm, padding_map], dim=0)
182 |                         neighbor_psm_list.append(neighbor_psm)
183 |                         
184 |                     if self.agg_mode == "STCFormer":
185 |                         fusion, output_list = self.fuse_modules[i](neighbor_feature, neighbor_psm_list[b], neighbor_feature_his, i)
186 |                         x_fuse.append(fusion)
187 |                         x_temporal.append(output_list[0])
188 |                         x_exclusive.append(output_list[1])
189 |                         x_common.append(output_list[2])
190 | 
191 |                 x_fuse = torch.stack(x_fuse)
192 |                 x_temporal = torch.stack(x_temporal)
193 |                 x_exclusive = torch.stack(x_exclusive)
194 |                 x_common = torch.stack(x_common)
195 | 
196 |                 if len(backbone.deblocks) > 0:
197 |                     ups.append(backbone.deblocks[i](x_fuse))
198 |                     ups_temporal.append(backbone.deblocks[i](x_temporal))
199 |                     ups_exclusive.append(backbone.deblocks[i](x_exclusive))
200 |                     ups_common.append(backbone.deblocks[i](x_common))
201 |                 else:
202 |                     ups.append(x_fuse)
203 | 
204 |             if len(ups) > 1:
205 |                 x_fuse = torch.cat(ups, dim=1)
206 |                 x_temporal = torch.cat(ups_temporal, dim=1)
207 |                 x_exclusive = torch.cat(ups_exclusive, dim=1)
208 |                 x_common = torch.cat(ups_common, dim=1)
209 |             elif len(ups) == 1:
210 |                 x_fuse = ups[0]
211 | 
212 |             if len(backbone.deblocks) > self.num_levels:
213 |                 x_fuse = backbone.deblocks[-1](x_fuse)
214 |                 
215 |         return x_fuse, communication_rates, {}, offset_loss, commu_loss, None, [x_temporal, x_exclusive, x_common]
216 | 


--------------------------------------------------------------------------------
/v2xvit/models/fuse_modules/stcformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import os
  3 | from torch import nn
  4 | import torch.nn.functional as F
  5 | from icecream import ic
  6 | import numpy as np
  7 | from v2xvit.models.fuse_modules.how2comm_deformable_transformer import RPN_transformer_deformable_mtf_singlescale
  8 | 
  9 | 
 10 | class ScaledDotProductAttention(nn.Module):
 11 |     def __init__(self, dim):
 12 |         super(ScaledDotProductAttention, self).__init__()
 13 |         self.sqrt_dim = np.sqrt(dim)
 14 | 
 15 |     def forward(self, query, key, value):
 16 |         score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim
 17 |         attn = F.softmax(score, -1)
 18 |         context = torch.bmm(attn, value)  
 19 |         return context
 20 | 
 21 | 
 22 | class TemporalAttention(nn.Module):
 23 |     def __init__(self, feature_dim):
 24 |         super(TemporalAttention, self).__init__()
 25 |         self.att = ScaledDotProductAttention(feature_dim)
 26 |         self.hidden_dim = feature_dim * 2
 27 |         self.conv_query = nn.Conv2d(
 28 |             feature_dim, self.hidden_dim, kernel_size=3, padding=1)
 29 |         self.conv_key = nn.Conv2d(
 30 |             feature_dim, self.hidden_dim, kernel_size=3, padding=1)
 31 |         self.conv_value = nn.Conv2d(
 32 |             feature_dim, self.hidden_dim, kernel_size=3, padding=1)
 33 |         self.conv_temporal_key = nn.Conv1d(
 34 |             self.hidden_dim, self.hidden_dim, kernel_size=1, stride=1)
 35 |         self.conv_temporal_value = nn.Conv1d(
 36 |             self.hidden_dim, self.hidden_dim, kernel_size=1, stride=1)
 37 |         self.pool = nn.AdaptiveAvgPool2d((1, 1))
 38 |         self.conv_feat = nn.Conv2d(
 39 |             self.hidden_dim, feature_dim, kernel_size=3, padding=1)
 40 | 
 41 |     def forward(self, x):
 42 |         frame, C, H, W = x.shape
 43 |         ego = x[:1]  
 44 |         query = self.conv_query(ego)  
 45 |         query = query.view(1, self.hidden_dim, -1).permute(2, 0, 1)  
 46 | 
 47 | 
 48 |         key = self.conv_key(x)  
 49 |         key_avg = key
 50 |         value = self.conv_value(x)
 51 |         val_avg = value
 52 |         key = key.view(frame, self.hidden_dim, -1).permute(2, 0, 1)  
 53 |         value = value.view(frame, self.hidden_dim, -
 54 |                            1).permute(2, 0, 1)  
 55 | 
 56 | 
 57 |         key_avg = self.pool(key_avg).squeeze(-1).squeeze(-1)  
 58 |         val_avg = self.pool(val_avg).squeeze(-1).squeeze(-1)  
 59 |         key_avg = self.conv_temporal_key(
 60 |             key_avg.unsqueeze(0).permute(0, 2, 1))  
 61 |         val_avg = self.conv_temporal_value(
 62 |             val_avg.unsqueeze(0).permute(0, 2, 1))  
 63 |         key_avg = key_avg.permute(0, 2, 1)
 64 |         val_avg = val_avg.permute(0, 2, 1)
 65 |         key = key * key_avg  
 66 |         value = value * val_avg
 67 | 
 68 | 
 69 |         x = self.att(query, key, value)  
 70 |         x = x.permute(1, 2, 0).view(1, self.hidden_dim, H, W)
 71 |         out = self.conv_feat(x)  
 72 | 
 73 |         return out
 74 | 
 75 | 
 76 | class LateFusion(nn.Module):
 77 |     def __init__(self, channel):
 78 |         super(LateFusion, self).__init__()
 79 |         self.channel = channel
 80 |         self.gate_1 = nn.Conv2d(
 81 |             self.channel, 1, kernel_size=3, stride=1, padding=1)
 82 |         self.gate_2 = nn.Conv2d(
 83 |             self.channel, 1, kernel_size=3, stride=1, padding=1)
 84 | 
 85 |     def forward(self, exc, com):
 86 |         weight_1 = self.gate_1(exc)  
 87 |         weight_2 = self.gate_2(com)  
 88 |         weights = torch.cat([weight_1, weight_2], dim=1)
 89 |         weights = torch.softmax(weights, dim=1)
 90 |         final = weights[:, :1, :, :] * exc + \
 91 |             weights[:, 1:, :, :] * com  #
 92 | 
 93 |         return final
 94 | 
 95 | 
 96 | class Decoupling(nn.Module):
 97 |     def __init__(self):
 98 |         super(Decoupling, self).__init__()
 99 |         self.exclusive_thre = 0.01  
100 |         self.common_thre = 0.01  
101 | 
102 |     def forward(self, feat, confidence):
103 | 
104 |         ego_confi = confidence[:1]  
105 |         exclusive_list = []
106 |         exclusive_map_list = [ego_confi]
107 |         common_list = []
108 |         common_map_list = [ego_confi]
109 |         for n in range(1, feat.shape[0]):  
110 |             exclusive_map = (1 - ego_confi) * \
111 |                 confidence[n].unsqueeze(0)  #
112 |             exclusive_map_list.append(exclusive_map)
113 |             common_map = ego_confi * confidence[n].unsqueeze(0)  
114 |             common_map_list.append(common_map)
115 |             ones_mask = torch.ones_like(exclusive_map).to(exclusive_map.device)
116 |             zeros_mask = torch.zeros_like(
117 |                 exclusive_map).to(exclusive_map.device)
118 |             exclusive_mask = torch.where(
119 |                 exclusive_map > self.exclusive_thre, ones_mask, zeros_mask)
120 |             common_mask = torch.where(
121 |                 common_map > self.common_thre, ones_mask, zeros_mask)
122 | 
123 |             exclusive_list.append(feat[n].unsqueeze(0) * exclusive_mask)
124 |             common_list.append(feat[n].unsqueeze(0) * common_mask)
125 | 
126 |         return torch.cat(exclusive_list, dim=0), torch.cat(common_list, dim=0), torch.cat(exclusive_map_list, dim=0), torch.cat(common_map_list, dim=0)
127 | 
128 | 
129 | class FeedForward(nn.Module):
130 |     def __init__(self, dim, hidden_dim, dropout=0.):
131 |         super().__init__()
132 |         self.net = nn.Sequential(
133 |             nn.Linear(dim, hidden_dim),
134 |             nn.GELU(),
135 |             nn.Dropout(dropout),
136 |             nn.Linear(hidden_dim, dim),
137 |             nn.Dropout(dropout)
138 |         )
139 | 
140 |     def forward(self, x):
141 |         return self.net(x)
142 | 
143 | 
144 | class STCFormer(nn.Module):
145 |     def __init__(self, channel, args, idx):
146 |         super(STCFormer, self).__init__()
147 | 
148 |         self.decoupling = Decoupling()
149 |         self.scale = [1, 0.5, 0.25]
150 |         self.temporal_self_attention = TemporalAttention(channel)
151 |         self.layer_norm = nn.LayerNorm(
152 |             [channel, args['height'][idx], args['width'][idx]])
153 |         self.exclusive_encoder = RPN_transformer_deformable_mtf_singlescale(
154 |             channel=channel, points=9)
155 |         self.common_encoder = RPN_transformer_deformable_mtf_singlescale(
156 |             channel=channel, points=3)
157 |         self.late_fusion = LateFusion(channel=channel)
158 |         self.time_embedding = nn.Linear(1, channel)
159 | 
160 |     def forward(self, neighbor_feat, neighbor_confidence, history_feat, level):
161 |         if level > 0: 
162 |             neighbor_confidence = F.interpolate(
163 |                 neighbor_confidence, scale_factor=self.scale[level])
164 |         exclusive_feat, common_feat, exclusive_map, common_map = self.decoupling(
165 |             neighbor_feat, neighbor_confidence)
166 | 
167 |         ego_feat = neighbor_feat[:1]
168 |         history_feat = torch.cat([ego_feat, history_feat], dim=0)  
169 |         
170 |         delay = [0.0] + [-1.0] * (history_feat.shape[0] -1)
171 |         delay = torch.tensor([delay]).to(ego_feat.device)  
172 |         time_embed = self.time_embedding(delay[:, :, None])
173 |         time_embed = time_embed.reshape(history_feat.shape[0], -1, 1, 1)  
174 |         history_feat = history_feat + time_embed  
175 |         
176 |         x = self.temporal_self_attention(history_feat)
177 |         ego_feat = x 
178 |         temporal_feat = ego_feat
179 | 
180 |         exclusive_feat = torch.cat(
181 |             [ego_feat, exclusive_feat], dim=0)  
182 |         common_feat = torch.cat([ego_feat, common_feat], dim=0)  
183 |         ego_exclusive_feat = self.exclusive_encoder(
184 |             exclusive_feat, exclusive_map).unsqueeze(0)  
185 |         ego_common_feat = self.common_encoder(
186 |             common_feat, common_map).unsqueeze(0)
187 | 
188 | 
189 |         x = self.late_fusion(ego_exclusive_feat, ego_common_feat)
190 |         ego_feat = x
191 | 
192 |         return ego_feat[0], [temporal_feat[0], ego_exclusive_feat[0], ego_common_feat[0]]
193 | 


--------------------------------------------------------------------------------
/v2xvit/models/point_pillar_how2comm.py:
--------------------------------------------------------------------------------
  1 | from numpy import record
  2 | import torch.nn as nn
  3 | 
  4 | from v2xvit.models.sub_modules.pillar_vfe import PillarVFE
  5 | from v2xvit.models.sub_modules.point_pillar_scatter import PointPillarScatter
  6 | from v2xvit.models.sub_modules.base_bev_backbone import BaseBEVBackbone
  7 | from v2xvit.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone
  8 | from v2xvit.models.sub_modules.downsample_conv import DownsampleConv
  9 | from v2xvit.models.sub_modules.naive_compress import NaiveCompressor
 10 | from v2xvit.models.fuse_modules.how2comm_deformable import How2comm
 11 | import torch
 12 | from v2xvit.models.sub_modules.torch_transformation_utils import warp_affine_simple
 13 | 
 14 | def transform_feature(feature_list, delay):
 15 |     return feature_list[delay]
 16 | 
 17 | 
 18 | class PointPillarHow2comm(nn.Module):
 19 |     def __init__(self, args):
 20 |         super(PointPillarHow2comm, self).__init__()
 21 | 
 22 |         self.pillar_vfe = PillarVFE(args['pillar_vfe'],
 23 |                                     num_point_features=4,
 24 |                                     voxel_size=args['voxel_size'],
 25 |                                     point_cloud_range=args['lidar_range'])
 26 |         self.scatter = PointPillarScatter(args['point_pillar_scatter'])
 27 |         if 'resnet' in args['base_bev_backbone']:
 28 |             self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64)
 29 |         else:
 30 |             self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64)
 31 | 
 32 |         # used to downsample the feature map for efficient computation
 33 |         self.shrink_flag = False
 34 |         if 'shrink_header' in args:
 35 |             self.shrink_flag = True
 36 |             self.shrink_conv = DownsampleConv(args['shrink_header'])
 37 |         self.compression = False
 38 | 
 39 |         if args['compression'] > 0:
 40 |             self.compression = True
 41 |             self.naive_compressor = NaiveCompressor(256, args['compression'])
 42 | 
 43 |         self.dcn = False
 44 | 
 45 |         self.fusion_net = How2comm(args['fusion_args'], args)
 46 |         self.frame = args['fusion_args']['frame']
 47 |         self.delay = 1
 48 |         self.discrete_ratio = args['fusion_args']['voxel_size'][0]
 49 |         self.downsample_rate = args['fusion_args']['downsample_rate']
 50 |         self.multi_scale = args['fusion_args']['multi_scale']
 51 | 
 52 |         self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'],
 53 |                                   kernel_size=1)
 54 |         self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'],
 55 |                                   kernel_size=1)
 56 |         if args['backbone_fix']:
 57 |             self.backbone_fix()
 58 | 
 59 |     def backbone_fix(self):
 60 |         """
 61 |         Fix the parameters of backbone during finetune on timedelay。
 62 |         """
 63 |         for p in self.pillar_vfe.parameters():
 64 |             p.requires_grad = False
 65 | 
 66 |         for p in self.scatter.parameters():
 67 |             p.requires_grad = False
 68 | 
 69 |         for p in self.backbone.parameters():
 70 |             p.requires_grad = False
 71 | 
 72 |         if self.compression:
 73 |             for p in self.naive_compressor.parameters():
 74 |                 p.requires_grad = False
 75 |         if self.shrink_flag:
 76 |             for p in self.shrink_conv.parameters():
 77 |                 p.requires_grad = False
 78 | 
 79 |         for p in self.cls_head.parameters():
 80 |             p.requires_grad = False
 81 |         for p in self.reg_head.parameters():
 82 |             p.requires_grad = False
 83 | 
 84 |     def regroup(self, x, record_len):
 85 |         cum_sum_len = torch.cumsum(record_len, dim=0)
 86 |         split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
 87 |         return split_x
 88 | 
 89 |     def forward(self, data_dict_list):
 90 |         batch_dict_list = []  
 91 |         feature_list = []  
 92 |         feature_2d_list = []  
 93 |         matrix_list = []
 94 |         regroup_feature_list = []  
 95 |         regroup_feature_list_large = []
 96 |         
 97 | 
 98 |         for origin_data in data_dict_list:
 99 |             data_dict = origin_data['ego']
100 |             voxel_features = data_dict['processed_lidar']['voxel_features']
101 |             voxel_coords = data_dict['processed_lidar']['voxel_coords']
102 |             voxel_num_points = data_dict['processed_lidar']['voxel_num_points']
103 |             record_len = data_dict['record_len']
104 | 
105 |             pairwise_t_matrix = data_dict['pairwise_t_matrix']
106 |             batch_dict = {'voxel_features': voxel_features,
107 |                           'voxel_coords': voxel_coords,
108 |                           'voxel_num_points': voxel_num_points,
109 |                           'record_len': record_len}
110 |             # n, 4 -> n, c encoding voxel feature using point-pillar method
111 |             batch_dict = self.pillar_vfe(batch_dict)
112 |             # n, c -> N, C, H, W
113 |             batch_dict = self.scatter(batch_dict)
114 |             batch_dict = self.backbone(batch_dict)
115 |             # N, C, H', W'
116 |             spatial_features_2d = batch_dict['spatial_features_2d']
117 | 
118 |             # downsample feature to reduce memory
119 |             if self.shrink_flag:
120 |                 spatial_features_2d = self.shrink_conv(spatial_features_2d)
121 |             # compressor
122 |             if self.compression:
123 |                 spatial_features_2d = self.naive_compressor(
124 |                     spatial_features_2d)
125 |             # dcn
126 |             if self.dcn:
127 |                 spatial_features_2d = self.dcn_net(spatial_features_2d)
128 | 
129 |             batch_dict_list.append(batch_dict)
130 |             spatial_features = batch_dict['spatial_features']
131 |             feature_list.append(spatial_features)
132 |             feature_2d_list.append(spatial_features_2d)
133 |             matrix_list.append(pairwise_t_matrix)  
134 |             regroup_feature_list.append(self.regroup(
135 |                 spatial_features_2d, record_len))  
136 |             regroup_feature_list_large.append(
137 |                 self.regroup(spatial_features, record_len))
138 | 
139 |         pairwise_t_matrix = matrix_list[0].clone().detach()  
140 |         
141 | 
142 |         history_feature = transform_feature(regroup_feature_list_large, self.delay)
143 |         spatial_features = feature_list[0]
144 |         spatial_features_2d = feature_2d_list[0]
145 |         batch_dict = batch_dict_list[0]
146 |         record_len = batch_dict['record_len']
147 |         psm_single = self.cls_head(spatial_features_2d)
148 | 
149 |         if self.delay == 0:
150 |             fused_feature, communication_rates, result_dict, offset_loss, commu_loss, _, _ = self.fusion_net(spatial_features, psm_single, record_len,pairwise_t_matrix,self.backbone,[self.shrink_conv, self.cls_head, self.reg_head])
151 |         elif self.delay > 0:
152 |             fused_feature, communication_rates, result_dict, offset_loss, commu_loss, _, _ = self.fusion_net(spatial_features, psm_single,record_len,pairwise_t_matrix,self.backbone,[self.shrink_conv, self.cls_head, self.reg_head], history=history_feature)
153 |         if self.shrink_flag:
154 |             fused_feature = self.shrink_conv(fused_feature)
155 | 
156 |         psm = self.cls_head(fused_feature)
157 |         rm = self.reg_head(fused_feature)
158 |         
159 |         output_dict = {'psm': psm,
160 |                         'rm': rm
161 |                     }
162 | 
163 |         output_dict.update(result_dict)
164 |         output_dict.update({'comm_rate': communication_rates,
165 |                             "offset_loss": offset_loss,
166 |                             'commu_loss': commu_loss
167 |                             })
168 |         return output_dict
169 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/models/sub_modules/__init__.py


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/base_bev_backbone.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | 
  6 | class BaseBEVBackbone(nn.Module):
  7 |     def __init__(self, model_cfg, input_channels):
  8 |         super().__init__()
  9 |         self.model_cfg = model_cfg
 10 | 
 11 |         if 'layer_nums' in self.model_cfg:
 12 | 
 13 |             assert len(self.model_cfg['layer_nums']) == \
 14 |                    len(self.model_cfg['layer_strides']) == \
 15 |                    len(self.model_cfg['num_filters'])
 16 | 
 17 |             layer_nums = self.model_cfg['layer_nums']
 18 |             layer_strides = self.model_cfg['layer_strides']
 19 |             num_filters = self.model_cfg['num_filters']
 20 |         else:
 21 |             layer_nums = layer_strides = num_filters = []
 22 | 
 23 |         if 'upsample_strides' in self.model_cfg:
 24 |             assert len(self.model_cfg['upsample_strides']) \
 25 |                    == len(self.model_cfg['num_upsample_filter'])
 26 | 
 27 |             num_upsample_filters = self.model_cfg['num_upsample_filter']
 28 |             upsample_strides = self.model_cfg['upsample_strides']
 29 | 
 30 |         else:
 31 |             upsample_strides = num_upsample_filters = []
 32 | 
 33 |         num_levels = len(layer_nums)
 34 |         c_in_list = [input_channels, *num_filters[:-1]]
 35 | 
 36 |         self.blocks = nn.ModuleList()
 37 |         self.deblocks = nn.ModuleList()
 38 | 
 39 |         for idx in range(num_levels):
 40 |             cur_layers = [
 41 |                 nn.ZeroPad2d(1),
 42 |                 nn.Conv2d(
 43 |                     c_in_list[idx], num_filters[idx], kernel_size=3,
 44 |                     stride=layer_strides[idx], padding=0, bias=False
 45 |                 ),
 46 |                 nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01),
 47 |                 nn.ReLU()
 48 |             ]
 49 |             for k in range(layer_nums[idx]):
 50 |                 cur_layers.extend([
 51 |                     nn.Conv2d(num_filters[idx], num_filters[idx],
 52 |                               kernel_size=3, padding=1, bias=False),
 53 |                     nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01),
 54 |                     nn.ReLU()
 55 |                 ])
 56 | 
 57 |             self.blocks.append(nn.Sequential(*cur_layers))
 58 |             if len(upsample_strides) > 0:
 59 |                 stride = upsample_strides[idx]
 60 |                 if stride >= 1:
 61 |                     self.deblocks.append(nn.Sequential(
 62 |                         nn.ConvTranspose2d(
 63 |                             num_filters[idx], num_upsample_filters[idx],
 64 |                             upsample_strides[idx],
 65 |                             stride=upsample_strides[idx], bias=False
 66 |                         ),
 67 |                         nn.BatchNorm2d(num_upsample_filters[idx],
 68 |                                        eps=1e-3, momentum=0.01),
 69 |                         nn.ReLU()
 70 |                     ))
 71 |                 else:
 72 |                     stride = np.round(1 / stride).astype(np.int)
 73 |                     self.deblocks.append(nn.Sequential(
 74 |                         nn.Conv2d(
 75 |                             num_filters[idx], num_upsample_filters[idx],
 76 |                             stride,
 77 |                             stride=stride, bias=False
 78 |                         ),
 79 |                         nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3,
 80 |                                        momentum=0.01),
 81 |                         nn.ReLU()
 82 |                     ))
 83 | 
 84 |         c_in = sum(num_upsample_filters)
 85 |         if len(upsample_strides) > num_levels:
 86 |             self.deblocks.append(nn.Sequential(
 87 |                 nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1],
 88 |                                    stride=upsample_strides[-1], bias=False),
 89 |                 nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01),
 90 |                 nn.ReLU(),
 91 |             ))
 92 | 
 93 |         self.num_bev_features = c_in
 94 | 
 95 |     def forward(self, data_dict):
 96 |         spatial_features = data_dict['spatial_features']
 97 | 
 98 |         ups = []
 99 |         ret_dict = {}
100 |         x = spatial_features
101 | 
102 |         for i in range(len(self.blocks)):
103 |             x = self.blocks[i](x)
104 | 
105 |             stride = int(spatial_features.shape[2] / x.shape[2])
106 |             ret_dict['spatial_features_%dx' % stride] = x
107 | 
108 |             if len(self.deblocks) > 0:
109 |                 ups.append(self.deblocks[i](x))
110 |             else:
111 |                 ups.append(x)
112 | 
113 |         if len(ups) > 1:
114 |             x = torch.cat(ups, dim=1)
115 |         elif len(ups) == 1:
116 |             x = ups[0]
117 | 
118 |         if len(self.deblocks) > len(self.blocks):
119 |             x = self.deblocks[-1](x)
120 | 
121 |         data_dict['spatial_features_2d'] = x
122 |         return data_dict
123 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/base_bev_backbone_resnet.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | from v2xvit.models.sub_modules.resblock import ResNetModified, BasicBlock
  6 | 
  7 | DEBUG = False
  8 | 
  9 | class ResNetBEVBackbone(nn.Module):
 10 |     def __init__(self, model_cfg, input_channels):
 11 |         super().__init__()
 12 |         self.model_cfg = model_cfg
 13 | 
 14 |         if 'layer_nums' in self.model_cfg:
 15 | 
 16 |             assert len(self.model_cfg['layer_nums']) == \
 17 |                    len(self.model_cfg['layer_strides']) == \
 18 |                    len(self.model_cfg['num_filters'])
 19 | 
 20 |             layer_nums = self.model_cfg['layer_nums']
 21 |             layer_strides = self.model_cfg['layer_strides']
 22 |             num_filters = self.model_cfg['num_filters']
 23 |         else:
 24 |             layer_nums = layer_strides = num_filters = []
 25 | 
 26 |         if 'upsample_strides' in self.model_cfg:
 27 |             assert len(self.model_cfg['upsample_strides']) \
 28 |                    == len(self.model_cfg['num_upsample_filter'])
 29 | 
 30 |             num_upsample_filters = self.model_cfg['num_upsample_filter']
 31 |             upsample_strides = self.model_cfg['upsample_strides']
 32 | 
 33 |         else:
 34 |             upsample_strides = num_upsample_filters = []
 35 | 
 36 |         self.resnet = ResNetModified(BasicBlock, 
 37 |                                         layer_nums,
 38 |                                         layer_strides,
 39 |                                         num_filters)
 40 | 
 41 |         num_levels = len(layer_nums)
 42 |         self.num_levels = len(layer_nums)
 43 |         self.deblocks = nn.ModuleList()
 44 | 
 45 |         for idx in range(num_levels):
 46 |             if len(upsample_strides) > 0:
 47 |                 stride = upsample_strides[idx]
 48 |                 if stride >= 1:
 49 |                     self.deblocks.append(nn.Sequential(
 50 |                         nn.ConvTranspose2d(
 51 |                             num_filters[idx], num_upsample_filters[idx],
 52 |                             upsample_strides[idx],
 53 |                             stride=upsample_strides[idx], bias=False
 54 |                         ),
 55 |                         nn.BatchNorm2d(num_upsample_filters[idx],
 56 |                                        eps=1e-3, momentum=0.01),
 57 |                         nn.ReLU()
 58 |                     ))
 59 |                 else:
 60 |                     stride = np.round(1 / stride).astype(np.int)
 61 |                     self.deblocks.append(nn.Sequential(
 62 |                         nn.Conv2d(
 63 |                             num_filters[idx], num_upsample_filters[idx],
 64 |                             stride,
 65 |                             stride=stride, bias=False
 66 |                         ),
 67 |                         nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3,
 68 |                                        momentum=0.01),
 69 |                         nn.ReLU()
 70 |                     ))
 71 | 
 72 |         c_in = sum(num_upsample_filters)
 73 |         if len(upsample_strides) > num_levels:
 74 |             self.deblocks.append(nn.Sequential(
 75 |                 nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1],
 76 |                                    stride=upsample_strides[-1], bias=False),
 77 |                 nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01),
 78 |                 nn.ReLU(),
 79 |             ))
 80 | 
 81 |         self.num_bev_features = c_in
 82 | 
 83 |     def forward(self, data_dict):
 84 |         spatial_features = data_dict['spatial_features']
 85 | 
 86 |         x = self.resnet(spatial_features)  # tuple of features
 87 |         ups = []
 88 | 
 89 |         for i in range(self.num_levels):
 90 |             if len(self.deblocks) > 0:
 91 |                 ups.append(self.deblocks[i](x[i]))
 92 |             else:
 93 |                 ups.append(x[i])
 94 | 
 95 |         if len(ups) > 1:
 96 |             x = torch.cat(ups, dim=1)
 97 |         elif len(ups) == 1:
 98 |             x = ups[0]
 99 | 
100 |         if len(self.deblocks) > self.num_levels:
101 |             x = self.deblocks[-1](x)
102 | 
103 |         data_dict['spatial_features_2d'] = x
104 |         return data_dict
105 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/base_transformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | 
  4 | from einops import rearrange
  5 | 
  6 | 
  7 | class PreNorm(nn.Module):
  8 |     def __init__(self, dim, fn):
  9 |         super().__init__()
 10 |         self.norm = nn.LayerNorm(dim)
 11 |         self.fn = fn
 12 | 
 13 |     def forward(self, x, **kwargs):
 14 |         return self.fn(self.norm(x), **kwargs)
 15 | 
 16 | 
 17 | class FeedForward(nn.Module):
 18 |     def __init__(self, dim, hidden_dim, dropout=0.):
 19 |         super().__init__()
 20 |         self.net = nn.Sequential(
 21 |             nn.Linear(dim, hidden_dim),
 22 |             nn.GELU(),
 23 |             nn.Dropout(dropout),
 24 |             nn.Linear(hidden_dim, dim),
 25 |             nn.Dropout(dropout)
 26 |         )
 27 | 
 28 |     def forward(self, x):
 29 |         return self.net(x)
 30 | 
 31 | 
 32 | class CavAttention(nn.Module):
 33 |     """
 34 |     Vanilla CAV attention.
 35 |     """
 36 |     def __init__(self, dim, heads, dim_head=64, dropout=0.1):
 37 |         super().__init__()
 38 |         inner_dim = heads * dim_head
 39 | 
 40 |         self.heads = heads
 41 |         self.scale = dim_head ** -0.5
 42 | 
 43 |         self.attend = nn.Softmax(dim=-1)
 44 |         self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False)
 45 | 
 46 |         self.to_out = nn.Sequential(
 47 |             nn.Linear(inner_dim, dim),
 48 |             nn.Dropout(dropout)
 49 |         )
 50 | 
 51 |     def forward(self, x, mask, prior_encoding):
 52 |         # x: (B, L, H, W, C) -> (B, H, W, L, C)
 53 |         # mask: (B, L)
 54 |         x = x.permute(0, 2, 3, 1, 4)
 55 |         # mask: (B, 1, H, W, L, 1)
 56 |         mask = mask.unsqueeze(1)
 57 | 
 58 |         # qkv: [(B, H, W, L, C_inner) *3]
 59 |         qkv = self.to_qkv(x).chunk(3, dim=-1)
 60 |         # q: (B, M, H, W, L, C)
 61 |         q, k, v = map(lambda t: rearrange(t, 'b h w l (m c) -> b m h w l c',
 62 |                                           m=self.heads), qkv)
 63 | 
 64 |         # attention, (B, M, H, W, L, L)
 65 |         att_map = torch.einsum('b m h w i c, b m h w j c -> b m h w i j',
 66 |                                q, k) * self.scale
 67 |         # add mask
 68 |         att_map = att_map.masked_fill(mask == 0, -float('inf'))
 69 |         # softmax
 70 |         att_map = self.attend(att_map)
 71 | 
 72 |         # out:(B, M, H, W, L, C_head)
 73 |         out = torch.einsum('b m h w i j, b m h w j c -> b m h w i c', att_map,
 74 |                            v)
 75 |         out = rearrange(out, 'b m h w l c -> b h w l (m c)',
 76 |                         m=self.heads)
 77 |         out = self.to_out(out)
 78 |         # (B L H W C)
 79 |         out = out.permute(0, 3, 1, 2, 4)
 80 |         return out
 81 | 
 82 | 
 83 | class BaseEncoder(nn.Module):
 84 |     def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.):
 85 |         super().__init__()
 86 |         self.layers = nn.ModuleList([])
 87 |         for _ in range(depth):
 88 |             self.layers.append(nn.ModuleList([
 89 |                 PreNorm(dim, CavAttention(dim,
 90 |                                           heads=heads,
 91 |                                           dim_head=dim_head,
 92 |                                           dropout=dropout)),
 93 |                 PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout))
 94 |             ]))
 95 | 
 96 |     def forward(self, x, mask):
 97 |         for attn, ff in self.layers:
 98 |             x = attn(x, mask=mask) + x
 99 |             x = ff(x) + x
100 |         return x
101 | 
102 | 
103 | class BaseTransformer(nn.Module):
104 |     def __init__(self, args):
105 |         super().__init__()
106 | 
107 |         dim = args['dim']
108 |         depth = args['depth']
109 |         heads = args['heads']
110 |         dim_head = args['dim_head']
111 |         mlp_dim = args['mlp_dim']
112 |         dropout = args['dropout']
113 |         max_cav = args['max_cav']
114 | 
115 |         self.encoder = BaseEncoder(dim, depth, heads, dim_head, mlp_dim,
116 |                                    dropout)
117 | 
118 |     def forward(self, x, mask):
119 |         # B, L, H, W, C
120 |         output = self.encoder(x, mask)
121 |         # B, H, W, C
122 |         output = output[:, 0]
123 | 
124 |         return output


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/downsample_conv.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Class used to downsample features by 3*3 conv
 3 | """
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | class DoubleConv(nn.Module):
10 |     """
11 |     Double convoltuion
12 |     Args:
13 |         in_channels: input channel num
14 |         out_channels: output channel num
15 |     """
16 | 
17 |     def __init__(self, in_channels, out_channels, kernel_size,
18 |                  stride, padding):
19 |         super().__init__()
20 |         self.double_conv = nn.Sequential(
21 |             nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size,
22 |                       stride=stride, padding=padding),
23 |             nn.ReLU(inplace=True),
24 |             nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
25 |             nn.ReLU(inplace=True)
26 |         )
27 | 
28 |     def forward(self, x):
29 |         return self.double_conv(x)
30 | 
31 | 
32 | class DownsampleConv(nn.Module):
33 |     def __init__(self, config):
34 |         super(DownsampleConv, self).__init__()
35 |         self.layers = nn.ModuleList([])
36 |         input_dim = config['input_dim']
37 | 
38 |         for (ksize, dim, stride, padding) in zip(config['kernal_size'],
39 |                                                  config['dim'],
40 |                                                  config['stride'],
41 |                                                  config['padding']):
42 |             self.layers.append(DoubleConv(input_dim,
43 |                                           dim,
44 |                                           kernel_size=ksize,
45 |                                           stride=stride,
46 |                                           padding=padding))
47 |             input_dim = dim
48 | 
49 |     def forward(self, x):
50 |         for i in range(len(self.layers)):
51 |             x = self.layers[i](x)
52 |         return x


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/fuse_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | from einops import rearrange
 5 | from v2xvit.utils.common_utils import torch_tensor_to_numpy
 6 | 
 7 | 
 8 | def regroup(dense_feature, record_len, max_len):
 9 |     """
10 |     Regroup the data based on the record_len.
11 | 
12 |     Parameters
13 |     ----------
14 |     dense_feature : torch.Tensor
15 |         N, C, H, W
16 |     record_len : list
17 |         [sample1_len, sample2_len, ...]
18 |     max_len : int
19 |         Maximum cav number
20 | 
21 |     Returns
22 |     -------
23 |     regroup_feature : torch.Tensor
24 |         B, L, C, H, W
25 |     """
26 |     cum_sum_len = list(np.cumsum(torch_tensor_to_numpy(record_len)))
27 |     split_features = torch.tensor_split(dense_feature,
28 |                                         cum_sum_len[:-1])
29 |     regroup_features = []
30 |     mask = []
31 | 
32 |     for split_feature in split_features:
33 |         # M, C, H, W
34 |         feature_shape = split_feature.shape
35 | 
36 |         # the maximum M is 5 as most 5 cavs
37 |         padding_len = max_len - feature_shape[0]
38 |         mask.append([1] * feature_shape[0] + [0] * padding_len)
39 | 
40 |         padding_tensor = torch.zeros(padding_len, feature_shape[1],
41 |                                      feature_shape[2], feature_shape[3])
42 |         padding_tensor = padding_tensor.to(split_feature.device)
43 | 
44 |         split_feature = torch.cat([split_feature, padding_tensor],
45 |                                   dim=0)
46 | 
47 |         # 1, 5C, H, W
48 |         split_feature = split_feature.view(-1,
49 |                                            feature_shape[2],
50 |                                            feature_shape[3]).unsqueeze(0)
51 |         regroup_features.append(split_feature)
52 | 
53 |     # B, 5C, H, W
54 |     regroup_features = torch.cat(regroup_features, dim=0)
55 |     # B, L, C, H, W
56 |     regroup_features = rearrange(regroup_features,
57 |                                  'b (l c) h w -> b l c h w',
58 |                                  l=max_len)
59 |     mask = torch.from_numpy(np.array(mask)).to(regroup_features.device)
60 | 
61 |     return regroup_features, mask
62 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/how2comm_preprocess.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.nn import functional as F
 5 | from v2xvit.models.sub_modules.feature_flow import FlowGenerator, ResNetBEVBackbone
 6 | from v2xvit.models.comm_modules.mutual_communication import Communication
 7 | 
 8 | 
 9 | class How2commPreprocess(nn.Module):
10 |     def __init__(self, args, channel, delay):
11 |         super(How2commPreprocess, self).__init__()
12 |         self.flow_flag = args['flow_flag'] 
13 |         self.channel = channel
14 |         self.frame = args['fusion_args']['frame']  
15 |         self.delay = delay  
16 |         self.flow = FlowGenerator(args)
17 | 
18 |         self.commu_module = Communication(
19 |             args['fusion_args']['communication'], in_planes=self.channel)
20 | 
21 |     def regroup(self, x, record_len):
22 |         cum_sum_len = torch.cumsum(record_len, dim=0)
23 |         split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
24 |         return split_x
25 | 
26 |     def get_grid(self, flow):
27 |         m, n = flow.shape[-2:]
28 |         shifts_x = torch.arange(
29 |             0, n, 1, dtype=torch.float32, device=flow.device)
30 |         shifts_y = torch.arange(
31 |             0, m, 1, dtype=torch.float32, device=flow.device)
32 |         shifts_y, shifts_x = torch.meshgrid(shifts_y, shifts_x)
33 | 
34 |         grid_dst = torch.stack((shifts_x, shifts_y)).unsqueeze(0)
35 |         workspace = torch.tensor(
36 |             [(n - 1) / 2, (m - 1) / 2]).view(1, 2, 1, 1).to(flow.device)
37 | 
38 |         flow_grid = ((flow + grid_dst) / workspace - 1).permute(0, 2, 3, 1)
39 | 
40 |         return flow_grid
41 | 
42 |     def resample(self, feats, flow):
43 |         flow_grid = self.get_grid(flow)
44 |         warped_feats = F.grid_sample(
45 |             feats, flow_grid, mode="bilinear", padding_mode="border")
46 | 
47 |         return warped_feats
48 | 
49 |     def communication(self, feats, record_len, history_list, confidence_map_list):
50 |         feat_list = self.regroup(feats, record_len)
51 |         sparse_feat_list, commu_loss, commu_rate, sparse_mask = self.commu_module(
52 |             feat_list,confidence_map_list)
53 |         sparse_feats = torch.cat(sparse_feat_list, dim=0)
54 |         sparse_history_list = []
55 |         for i in range(len(sparse_feat_list)):
56 |             sparse_history = torch.cat([history_list[i][:1], sparse_feat_list[i][1:]], dim=0)
57 |             sparse_history_list.append(sparse_history)
58 |         sparse_history = torch.cat(sparse_history_list, dim=0)
59 |         return sparse_feats, commu_loss, commu_rate, sparse_history
60 | 
61 |     def forward(self, feat_curr, feat_history, record_len, backbone=None, heads=None):
62 |         feat_curr = self.regroup(feat_curr, record_len)
63 |         B = len(feat_curr)
64 |         feat_list = [[] for _ in range(B)]
65 |         for bs in range(B):
66 |             feat_list[bs] += [feat_curr[bs], feat_history[bs]]
67 | 
68 |         if self.flow_flag:
69 |             feat_final, offset_loss = self.flow(feat_list)
70 |         else:
71 |             offset_loss = torch.zeros(1).to(record_len.device)
72 |             x_list = []
73 |             for bs in range(B):
74 |                 delayed_colla_feat = feat_list[bs][self.delay][1:]  
75 |                 ego_feat = feat_list[bs][0][:1]  
76 |                 x_list.append(
77 |                     torch.cat([ego_feat, delayed_colla_feat], dim=0))
78 |             feat_final = torch.cat(x_list, dim=0)
79 | 
80 |         return feat_final, offset_loss
81 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/naive_compress.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class NaiveCompressor(nn.Module):
 6 |     def __init__(self, input_dim, compress_raito):
 7 |         super().__init__()
 8 |         self.encoder = nn.Sequential(
 9 |             nn.Conv2d(input_dim, input_dim//compress_raito, kernel_size=3,
10 |                       stride=1, padding=1),
11 |             nn.BatchNorm2d(input_dim//compress_raito, eps=1e-3, momentum=0.01),
12 |             nn.ReLU()
13 |         )
14 |         self.decoder = nn.Sequential(
15 |             nn.Conv2d(input_dim//compress_raito, input_dim, kernel_size=3,
16 |                       stride=1, padding=1),
17 |             nn.BatchNorm2d(input_dim, eps=1e-3, momentum=0.01),
18 |             nn.ReLU(),
19 |             nn.Conv2d(input_dim, input_dim, kernel_size=3, stride=1, padding=1),
20 |             nn.BatchNorm2d(input_dim, eps=1e-3,
21 |                            momentum=0.01),
22 |             nn.ReLU()
23 |         )
24 | 
25 |     def forward(self, x):
26 |         x = self.encoder(x)
27 |         x = self.decoder(x)
28 | 
29 |         return x


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/pillar_vfe.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Pillar VFE, credits to OpenPCDet.
  3 | """
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class PFNLayer(nn.Module):
 11 |     def __init__(self,
 12 |                  in_channels,
 13 |                  out_channels,
 14 |                  use_norm=True,
 15 |                  last_layer=False):
 16 |         super().__init__()
 17 | 
 18 |         self.last_vfe = last_layer
 19 |         self.use_norm = use_norm
 20 |         if not self.last_vfe:
 21 |             out_channels = out_channels // 2
 22 | 
 23 |         if self.use_norm:
 24 |             self.linear = nn.Linear(in_channels, out_channels, bias=False)
 25 |             self.norm = nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01)
 26 |         else:
 27 |             self.linear = nn.Linear(in_channels, out_channels, bias=True)
 28 | 
 29 |         self.part = 50000
 30 | 
 31 |     def forward(self, inputs):
 32 |         if inputs.shape[0] > self.part:
 33 |             # nn.Linear performs randomly when batch size is too large
 34 |             num_parts = inputs.shape[0] // self.part
 35 |             part_linear_out = [self.linear(
 36 |                 inputs[num_part * self.part:(num_part + 1) * self.part])
 37 |                 for num_part in range(num_parts + 1)]
 38 |             x = torch.cat(part_linear_out, dim=0)
 39 |         else:
 40 |             x = self.linear(inputs)
 41 |         torch.backends.cudnn.enabled = False
 42 |         x = self.norm(x.permute(0, 2, 1)).permute(0, 2,
 43 |                                                   1) if self.use_norm else x
 44 |         torch.backends.cudnn.enabled = True
 45 |         x = F.relu(x)
 46 |         x_max = torch.max(x, dim=1, keepdim=True)[0]
 47 | 
 48 |         if self.last_vfe:
 49 |             return x_max
 50 |         else:
 51 |             x_repeat = x_max.repeat(1, inputs.shape[1], 1)
 52 |             x_concatenated = torch.cat([x, x_repeat], dim=2)
 53 |             return x_concatenated
 54 | 
 55 | 
 56 | class PillarVFE(nn.Module):
 57 |     def __init__(self, model_cfg, num_point_features, voxel_size,
 58 |                  point_cloud_range):
 59 |         super().__init__()
 60 |         self.model_cfg = model_cfg
 61 | 
 62 |         self.use_norm = self.model_cfg['use_norm']
 63 |         self.with_distance = self.model_cfg['with_distance']
 64 | 
 65 |         self.use_absolute_xyz = self.model_cfg['use_absolute_xyz']
 66 |         num_point_features += 6 if self.use_absolute_xyz else 3
 67 |         if self.with_distance:
 68 |             num_point_features += 1
 69 | 
 70 |         self.num_filters = self.model_cfg['num_filters']
 71 |         assert len(self.num_filters) > 0
 72 |         num_filters = [num_point_features] + list(self.num_filters)
 73 | 
 74 |         pfn_layers = []
 75 |         for i in range(len(num_filters) - 1):
 76 |             in_filters = num_filters[i]
 77 |             out_filters = num_filters[i + 1]
 78 |             pfn_layers.append(
 79 |                 PFNLayer(in_filters, out_filters, self.use_norm,
 80 |                          last_layer=(i >= len(num_filters) - 2))
 81 |             )
 82 |         self.pfn_layers = nn.ModuleList(pfn_layers)
 83 | 
 84 |         self.voxel_x = voxel_size[0]
 85 |         self.voxel_y = voxel_size[1]
 86 |         self.voxel_z = voxel_size[2]
 87 |         self.x_offset = self.voxel_x / 2 + point_cloud_range[0]
 88 |         self.y_offset = self.voxel_y / 2 + point_cloud_range[1]
 89 |         self.z_offset = self.voxel_z / 2 + point_cloud_range[2]
 90 | 
 91 |     def get_output_feature_dim(self):
 92 |         return self.num_filters[-1]
 93 | 
 94 |     @staticmethod
 95 |     def get_paddings_indicator(actual_num, max_num, axis=0):
 96 |         actual_num = torch.unsqueeze(actual_num, axis + 1)
 97 |         max_num_shape = [1] * len(actual_num.shape)
 98 |         max_num_shape[axis + 1] = -1
 99 |         max_num = torch.arange(max_num,
100 |                                dtype=torch.int,
101 |                                device=actual_num.device).view(max_num_shape)
102 |         paddings_indicator = actual_num.int() > max_num
103 |         return paddings_indicator
104 | 
105 |     def forward(self, batch_dict):
106 | 
107 |         voxel_features, voxel_num_points, coords = \
108 |             batch_dict['voxel_features'], batch_dict['voxel_num_points'], \
109 |             batch_dict['voxel_coords']
110 |         points_mean = \
111 |             voxel_features[:, :, :3].sum(dim=1, keepdim=True) / \
112 |             voxel_num_points.type_as(voxel_features).view(-1, 1, 1)
113 |         f_cluster = voxel_features[:, :, :3] - points_mean
114 | 
115 |         f_center = torch.zeros_like(voxel_features[:, :, :3])
116 |         f_center[:, :, 0] = voxel_features[:, :, 0] - (
117 |                 coords[:, 3].to(voxel_features.dtype).unsqueeze(
118 |                     1) * self.voxel_x + self.x_offset)
119 |         f_center[:, :, 1] = voxel_features[:, :, 1] - (
120 |                 coords[:, 2].to(voxel_features.dtype).unsqueeze(
121 |                     1) * self.voxel_y + self.y_offset)
122 |         f_center[:, :, 2] = voxel_features[:, :, 2] - (
123 |                 coords[:, 1].to(voxel_features.dtype).unsqueeze(
124 |                     1) * self.voxel_z + self.z_offset)
125 | 
126 |         if self.use_absolute_xyz:
127 |             features = [voxel_features, f_cluster, f_center]
128 |         else:
129 |             features = [voxel_features[..., 3:], f_cluster, f_center]
130 | 
131 |         if self.with_distance:
132 |             points_dist = torch.norm(voxel_features[:, :, :3], 2, 2,
133 |                                      keepdim=True)
134 |             features.append(points_dist)
135 |         features = torch.cat(features, dim=-1)
136 | 
137 |         voxel_count = features.shape[1]
138 |         mask = self.get_paddings_indicator(voxel_num_points, voxel_count,
139 |                                            axis=0)
140 |         mask = torch.unsqueeze(mask, -1).type_as(voxel_features)
141 |         features *= mask
142 |         for pfn in self.pfn_layers:
143 |             features = pfn(features)
144 |         features = features.squeeze()
145 |         batch_dict['pillar_features'] = features
146 |         return batch_dict
147 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/point_pillar_scatter.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class PointPillarScatter(nn.Module):
 6 |     def __init__(self, model_cfg):
 7 |         super().__init__()
 8 | 
 9 |         self.model_cfg = model_cfg
10 |         self.num_bev_features = self.model_cfg['num_features']
11 |         self.nx, self.ny, self.nz = model_cfg['grid_size']
12 |         assert self.nz == 1
13 | 
14 |     def forward(self, batch_dict):
15 |         pillar_features, coords = batch_dict['pillar_features'], batch_dict[
16 |             'voxel_coords']
17 |         batch_spatial_features = []
18 |         batch_size = coords[:, 0].max().int().item() + 1
19 | 
20 |         for batch_idx in range(batch_size):
21 |             spatial_feature = torch.zeros(
22 |                 self.num_bev_features,
23 |                 self.nz * self.nx * self.ny,
24 |                 dtype=pillar_features.dtype,
25 |                 device=pillar_features.device)
26 | 
27 |             batch_mask = coords[:, 0] == batch_idx
28 |             this_coords = coords[batch_mask, :]
29 | 
30 |             indices = this_coords[:, 1] + \
31 |                       this_coords[:, 2] * self.nx + \
32 |                       this_coords[:, 3]
33 |             indices = indices.type(torch.long)
34 | 
35 |             pillars = pillar_features[batch_mask, :]
36 |             pillars = pillars.t()
37 |             spatial_feature[:, indices] = pillars
38 |             batch_spatial_features.append(spatial_feature)
39 | 
40 |         batch_spatial_features = \
41 |             torch.stack(batch_spatial_features, 0)
42 |         batch_spatial_features = \
43 |             batch_spatial_features.view(batch_size, self.num_bev_features *
44 |                                         self.nz, self.ny, self.nx)
45 |         batch_dict['spatial_features'] = batch_spatial_features
46 | 
47 |         return batch_dict
48 | 
49 | 


--------------------------------------------------------------------------------
/v2xvit/models/sub_modules/self_attn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class ScaledDotProductAttention(nn.Module):
 8 |     """
 9 |     Scaled Dot-Product Attention proposed in "Attention Is All You Need"
10 |     Compute the dot products of the query with all keys, divide each by sqrt(dim),
11 |     and apply a softmax function to obtain the weights on the values
12 |     Args: dim, mask
13 |         dim (int): dimention of attention
14 |         mask (torch.Tensor): tensor containing indices to be masked
15 |     Inputs: query, key, value, mask
16 |         - **query** (batch, q_len, d_model): tensor containing projection vector for decoder.
17 |         - **key** (batch, k_len, d_model): tensor containing projection vector for encoder.
18 |         - **value** (batch, v_len, d_model): tensor containing features of the encoded input sequence.
19 |         - **mask** (-): tensor containing indices to be masked
20 |     Returns: context, attn
21 |         - **context**: tensor containing the context vector from attention mechanism.
22 |         - **attn**: tensor containing the attention (alignment) from the encoder outputs.
23 |     """
24 | 
25 |     def __init__(self, dim):
26 |         super(ScaledDotProductAttention, self).__init__()
27 |         self.sqrt_dim = np.sqrt(dim)
28 | 
29 |     def forward(self, query, key, value):
30 |         score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim
31 |         attn = F.softmax(score, -1)  
32 |         context = torch.bmm(attn, value)  
33 |         return context
34 | 
35 | 
36 | class AttFusion(nn.Module):
37 |     def __init__(self, feature_dim):
38 |         super(AttFusion, self).__init__()
39 |         self.att = ScaledDotProductAttention(feature_dim)
40 | 
41 |     def forward(self, x, record_len):
42 |         split_x = self.regroup(x, record_len)
43 |         batch_size = len(record_len)  
44 |         C, W, H = split_x[0].shape[1:]  
45 |         out = []
46 |         for xx in split_x:  
47 |             cav_num = xx.shape[0]  
48 |             query = xx[0,:].unsqueeze(0)
49 |             query = query.view(1,C,-1).permute(2, 0, 1)  
50 |             key = xx.view(cav_num, C, -1).permute(2, 0, 1)  
51 |             value = xx.view(cav_num, C, -1).permute(2, 0, 1)
52 |             h = self.att(query, key, value)  
53 |             h = h.permute(1, 2, 0).view(1, C, W, H)[0, ...].unsqueeze(0)
54 |             out.append(h)
55 |         return torch.cat(out, dim=0)
56 | 
57 |     def regroup(self, x, record_len):  
58 |         cum_sum_len = torch.cumsum(record_len, dim=0)  
59 |         split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu())
60 |         return split_x


--------------------------------------------------------------------------------
/v2xvit/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/tools/__init__.py


--------------------------------------------------------------------------------
/v2xvit/tools/debug_utils.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import torch
 4 | from torch.utils.data import DataLoader
 5 | 
 6 | import v2xvit.hypes_yaml.yaml_utils as yaml_utils
 7 | from v2xvit.tools import train_utils
 8 | from v2xvit.data_utils.datasets import build_dataset
 9 | from v2xvit.visualization import vis_utils
10 | 
11 | 
12 | def test_parser():
13 |     parser = argparse.ArgumentParser(description="synthetic data generation")
14 |     parser.add_argument('--model_dir', type=str, required=True,
15 |                         help='Continued training path')
16 |     parser.add_argument('--fusion_method', type=str, default='late',
17 |                         help='late, early or intermediate')
18 |     opt = parser.parse_args()
19 |     return opt
20 | 
21 | 
22 | def test_bev_post_processing():
23 |     opt = test_parser()
24 |     assert opt.fusion_method in ['late', 'early', 'intermediate']
25 | 
26 |     hypes = yaml_utils.load_yaml(None, opt)
27 | 
28 |     print('Dataset Building')
29 |     opencood_dataset = build_dataset(hypes, visualize=True, train=False)
30 |     data_loader = DataLoader(opencood_dataset,
31 |                              batch_size=1,
32 |                              num_workers=0,
33 |                              collate_fn=opencood_dataset.collate_batch_test,
34 |                              shuffle=False,
35 |                              pin_memory=False,
36 |                              drop_last=False)
37 | 
38 |     print('Creating Model')
39 |     model = train_utils.create_model(hypes)
40 |     # we assume gpu is necessary
41 |     if torch.cuda.is_available():
42 |         model.cuda()
43 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
44 | 
45 |     print('Loading Model from checkpoint')
46 |     saved_path = opt.model_dir
47 |     _, model = train_utils.load_saved_model(saved_path, model)
48 |     model.eval()
49 |     for i, batch_data in enumerate(data_loader):
50 |         batch_data = train_utils.to_device(batch_data, device)
51 |         label_map = batch_data["ego"]["label_dict"]["label_map"]
52 |         output_dict = {
53 |             "cls": label_map[:, 0, :, :],
54 |             "reg": label_map[:, 1:, :, :]
55 |         }
56 |         gt_box_tensor, _ = opencood_dataset.post_processor.post_process_debug(
57 |             batch_data["ego"], output_dict)
58 |         vis_utils.visualize_single_sample_output_bev(gt_box_tensor,
59 |                                                      batch_data['ego'][
60 |                                                          'origin_lidar'].squeeze(
61 |                                                          0),
62 |                                                      opencood_dataset)
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     test_bev_post_processing()
67 | 


--------------------------------------------------------------------------------
/v2xvit/tools/inference.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import time
  4 | from collections import OrderedDict
  5 | 
  6 | import torch
  7 | import os
  8 | import open3d as o3d
  9 | from torch.utils.data import DataLoader
 10 | 
 11 | import v2xvit.hypes_yaml.yaml_utils as yaml_utils
 12 | from v2xvit.tools import train_utils, infrence_utils
 13 | from v2xvit.data_utils.datasets import build_dataset
 14 | from v2xvit.visualization import vis_utils
 15 | from v2xvit.utils import eval_utils
 16 | import torch.multiprocessing
 17 | torch.multiprocessing.set_sharing_strategy('file_system')
 18 | 
 19 | 
 20 | def test_parser():
 21 |     parser = argparse.ArgumentParser(description="synthetic data generation")
 22 |     parser.add_argument('--model_dir', type=str, required=True,
 23 |                         help='Continued training path')
 24 |     parser.add_argument('--fusion_method', required=False, type=str,
 25 |                         default='intermediate_with_comm',
 26 |                         help='late, early or intermediate')
 27 |     parser.add_argument('--show_vis', action='store_true',
 28 |                         help='whether to show image visualization result')
 29 |     parser.add_argument('--show_sequence', action='store_true',
 30 |                         help='whether to show video visualization result.'
 31 |                              'it can note be set true with show_vis together ')
 32 |     parser.add_argument('--eval_epoch', type=str, default=14,
 33 |                         help='Set the checkpoint')
 34 |     parser.add_argument('--save_vis', action='store_true',
 35 |                         help='whether to save visualization result')
 36 |     parser.add_argument('--save_npy', action='store_true',
 37 |                         help='whether to save prediction and gt result'
 38 |                              'in npy file')
 39 |     parser.add_argument('--comm_thre', type=float, default=None,
 40 |                         help='Communication confidence threshold')
 41 |     parser.add_argument('--score_thre', type=float, default=None,
 42 |                     help='Confidence score threshold')
 43 |     parser.add_argument('--xyz_std', type=float, default=None,
 44 |                     help='position error')
 45 |     parser.add_argument('--ryp_std', type=float, default=None,
 46 |                 help='rotation error')
 47 |     opt = parser.parse_args()
 48 |     return opt
 49 | 
 50 | 
 51 | def main():
 52 |     opt = test_parser()
 53 |     assert opt.fusion_method in ['late', 'early', 'intermediate',"intermediate_with_comm"]
 54 |     assert not (opt.show_vis and opt.show_sequence), \
 55 |         'you can only visualize ' \
 56 |         'the results in single ' \
 57 |         'image mode or video mode'
 58 | 
 59 |     hypes = yaml_utils.load_yaml(None, opt)
 60 |     if opt.comm_thre is not None:
 61 |         hypes['model']['args']['fusion_args']['communication']['thre'] = opt.comm_thre
 62 |     if opt.score_thre is not None:
 63 |         hypes['postprocess']['target_args']['score_threshold'] = opt.score_thre
 64 |     score_threshold = hypes['postprocess']['target_args']['score_threshold']
 65 |     if opt.xyz_std is not None:
 66 |         hypes['wild_setting']['xyz_std'] = opt.xyz_std
 67 |     if opt.ryp_std is not None:
 68 |         hypes['wild_setting']['ryp_std'] = opt.ryp_std
 69 | 
 70 |     print('Dataset Building')
 71 |     opencood_dataset = build_dataset(hypes, visualize=True, train=False)
 72 |     data_loader = DataLoader(opencood_dataset,
 73 |                              batch_size=1,
 74 |                              num_workers=10,
 75 |                              collate_fn=opencood_dataset.collate_batch_test,
 76 |                              shuffle=False,
 77 |                              pin_memory=False,
 78 |                              drop_last=False)
 79 | 
 80 |     print('Creating Model')
 81 |     model = train_utils.create_model(hypes)
 82 |     # we assume gpu is necessary
 83 |     if torch.cuda.is_available():
 84 |         model.cuda()
 85 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 86 | 
 87 |     print('Loading Model from checkpoint')
 88 |     saved_path = opt.model_dir
 89 |     last_epoch = train_utils.findLastCheckpoint(saved_path)
 90 |     if opt.eval_epoch is None:
 91 |         epoch_id_list = [last_epoch]
 92 |     else:
 93 |         epoch_id_list = [opt.eval_epoch]
 94 |     
 95 |     for epoch_id in epoch_id_list:
 96 |         epoch_id, model = train_utils.load_saved_model(saved_path, model, epoch_id)
 97 |         model.eval()
 98 | 
 99 |         # Create the dictionary for evaluation
100 |         result_stat = {0.3: {'tp': [], 'fp': [], 'gt': 0},
101 |                     0.5: {'tp': [], 'fp': [], 'gt': 0},
102 |                     0.7: {'tp': [], 'fp': [], 'gt': 0}}
103 | 
104 |         total_comm_rates = []
105 |         for i, batch_data_list in enumerate(data_loader):
106 |             print("{}".format(i))
107 |             with torch.no_grad():
108 |                 torch.cuda.synchronize()
109 |                 batch_data = batch_data_list[0]
110 |                 batch_data = train_utils.to_device(batch_data, device)  
111 |                 batch_data_list = train_utils.to_device(batch_data_list, device)  
112 |                 if opt.fusion_method == 'late':
113 |                     pred_box_tensor, pred_score, gt_box_tensor = \
114 |                         infrence_utils.inference_late_fusion(batch_data,
115 |                                                             model,
116 |                                                             opencood_dataset)
117 |                 elif opt.fusion_method == 'early':
118 |                     pred_box_tensor, pred_score, gt_box_tensor = \
119 |                         infrence_utils.inference_early_fusion(batch_data,
120 |                                                             model,
121 |                                                             opencood_dataset)
122 |                 elif opt.fusion_method == 'intermediate':
123 |                     pred_box_tensor, pred_score, gt_box_tensor = \
124 |                         infrence_utils.inference_intermediate_fusion(batch_data_list,
125 |                                                                     model,
126 |                                                                     opencood_dataset)
127 |                 elif opt.fusion_method == 'intermediate_with_comm':
128 |                     pred_box_tensor, pred_score, gt_box_tensor, comm_rates = \
129 |                         infrence_utils.inference_intermediate_fusion_withcomm(batch_data_list,
130 |                                                                     model,
131 |                                                                     opencood_dataset)
132 |                     total_comm_rates.append(comm_rates)
133 |                 else:
134 |                     raise NotImplementedError('Only early, late and intermediate'
135 |                                             'fusion is supported.')
136 |                 eval_utils.caluclate_tp_fp(pred_box_tensor,
137 |                                         pred_score,
138 |                                         gt_box_tensor,
139 |                                         result_stat,
140 |                                         0.3)
141 |                 eval_utils.caluclate_tp_fp(pred_box_tensor,
142 |                                         pred_score,
143 |                                         gt_box_tensor,
144 |                                         result_stat,
145 |                                         0.5)
146 |                 eval_utils.caluclate_tp_fp(pred_box_tensor,
147 |                                         pred_score,
148 |                                         gt_box_tensor,
149 |                                         result_stat,
150 |                                         0.7)
151 |                 if opt.save_npy:
152 |                     npy_save_path = os.path.join(opt.model_dir, 'npy')
153 |                     if not os.path.exists(npy_save_path):
154 |                         os.makedirs(npy_save_path)
155 |                     infrence_utils.save_prediction_gt(pred_box_tensor,
156 |                                                     gt_box_tensor,
157 |                                                     batch_data['ego'][
158 |                                                         'origin_lidar'][0],
159 |                                                     i,
160 |                                                     npy_save_path)
161 | 
162 |                 if opt.show_vis or opt.save_vis:
163 |                     vis_save_path = ''
164 |                     if opt.save_vis:
165 |                         vis_save_path = os.path.join(opt.model_dir, 'vis')
166 |                         if not os.path.exists(vis_save_path):
167 |                             os.makedirs(vis_save_path)
168 |                         vis_save_path = os.path.join(vis_save_path, '%05d.png' % i)
169 | 
170 |                     opencood_dataset.visualize_result(pred_box_tensor,
171 |                                                     gt_box_tensor,
172 |                                                     batch_data['ego'][
173 |                                                         'origin_lidar'][0],
174 |                                                     opt.show_vis,
175 |                                                     vis_save_path,
176 |                                                     dataset=opencood_dataset)
177 |         if len(total_comm_rates) > 0:
178 |             comm_rates = (sum(total_comm_rates)/len(total_comm_rates)).item()
179 |         else:
180 |             comm_rates = 0
181 |         ap_30, ap_50, ap_70 = eval_utils.eval_final_results(result_stat, opt.model_dir)
182 |         current_time = time.ctime()
183 |         
184 |         with open(os.path.join(saved_path, 'result.txt'), 'a+') as f:
185 |             msg = 'Epoch: {} | AP @0.3: {:.04f} | AP @0.5: {:.04f} | AP @0.7: {:.04f} | comm_rate: {:.06f}\n'.format(epoch_id, ap_30, ap_50, ap_70, comm_rates)
186 |             if opt.comm_thre is not None:
187 |                 msg = 'Epoch: {} | AP @0.3: {:.04f} | AP @0.5: {:.04f} | AP @0.7: {:.04f} | comm_rate: {:.06f} | comm_thre: {:.04f} | score_threshold: {:.02f} | xyz_std: {:.01f} | ryp_std: {:.01f} | time: {}\n'.format(epoch_id, ap_30, ap_50, ap_70, comm_rates, opt.comm_thre,score_threshold,opt.xyz_std,opt.ryp_std,current_time)
188 |             f.write(msg)
189 |             print(msg)
190 | 
191 | if __name__ == '__main__':
192 |     main()


--------------------------------------------------------------------------------
/v2xvit/tools/infrence_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from collections import OrderedDict
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | 
  7 | from v2xvit.utils.common_utils import torch_tensor_to_numpy
  8 | 
  9 | 
 10 | def inference_late_fusion(batch_data, model, dataset):
 11 |     """
 12 |     Model inference for late fusion.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     batch_data : dict
 17 |     model : opencood.object
 18 |     dataset : opencood.LateFusionDataset
 19 | 
 20 |     Returns
 21 |     -------
 22 |     pred_box_tensor : torch.Tensor
 23 |         The tensor of prediction bounding box after NMS.
 24 |     gt_box_tensor : torch.Tensor
 25 |         The tensor of gt bounding box.
 26 |     """
 27 |     output_dict = OrderedDict()
 28 | 
 29 |     for cav_id, cav_content in batch_data.items():
 30 |         output_dict[cav_id] = model(cav_content)
 31 | 
 32 |     pred_box_tensor, pred_score, gt_box_tensor = \
 33 |         dataset.post_process(batch_data,
 34 |                              output_dict)
 35 | 
 36 |     return pred_box_tensor, pred_score, gt_box_tensor
 37 | 
 38 | 
 39 | def inference_early_fusion(batch_data, model, dataset):
 40 |     """
 41 |     Model inference for early fusion.
 42 | 
 43 |     Parameters
 44 |     ----------
 45 |     batch_data : dict
 46 |     model : opencood.object
 47 |     dataset : opencood.EarlyFusionDataset
 48 | 
 49 |     Returns
 50 |     -------
 51 |     pred_box_tensor : torch.Tensor
 52 |         The tensor of prediction bounding box after NMS.
 53 |     gt_box_tensor : torch.Tensor
 54 |         The tensor of gt bounding box.
 55 |     """
 56 |     output_dict = OrderedDict()
 57 |     cav_content = batch_data
 58 | 
 59 |     output_dict['ego'] = model(cav_content) 
 60 | 
 61 |     pred_box_tensor, pred_score, gt_box_tensor = \
 62 |         dataset.post_process(batch_data[0],
 63 |                              output_dict)
 64 | 
 65 |     return pred_box_tensor, pred_score, gt_box_tensor
 66 | 
 67 | def inference_intermediate_fusion_withcomm(batch_data_list, model, dataset, tail=""):
 68 |     """
 69 |     Model inference for early fusion.
 70 | 
 71 |     Parameters
 72 |     ----------
 73 |     batch_data : dict
 74 |     model : opencood.object
 75 |     dataset : opencood.EarlyFusionDataset
 76 | 
 77 |     Returns
 78 |     -------
 79 |     pred_box_tensor : torch.Tensor
 80 |         The tensor of prediction bounding box after NMS.
 81 |     gt_box_tensor : torch.Tensor
 82 |         The tensor of gt bounding box.
 83 |     """
 84 |     output_dict = OrderedDict()
 85 |     batch_data = batch_data_list[0]
 86 | 
 87 |     output_dict['ego'] = model(batch_data_list)
 88 |     
 89 |     pred_box_tensor, pred_score, gt_box_tensor = \
 90 |         dataset.post_process(batch_data,
 91 |                              output_dict)
 92 |     comm_rates = output_dict['ego']['comm_rate']
 93 |     return pred_box_tensor, pred_score, gt_box_tensor, comm_rates
 94 | 
 95 | 
 96 | def inference_intermediate_fusion(batch_data, model, dataset):
 97 |     """
 98 |     Model inference for early fusion.
 99 | 
100 |     Parameters
101 |     ----------
102 |     batch_data : dict
103 |     model : opencood.object
104 |     dataset : opencood.EarlyFusionDataset
105 | 
106 |     Returns
107 |     -------
108 |     pred_box_tensor : torch.Tensor
109 |         The tensor of prediction bounding box after NMS.
110 |     gt_box_tensor : torch.Tensor
111 |         The tensor of gt bounding box.
112 |     """
113 |     return inference_early_fusion(batch_data, model, dataset)
114 | 
115 | 
116 | def save_prediction_gt(pred_tensor, gt_tensor, pcd, timestamp, save_path):
117 |     """
118 |     Save prediction and gt tensor to txt file.
119 |     """
120 |     pred_np = torch_tensor_to_numpy(pred_tensor)
121 |     gt_np = torch_tensor_to_numpy(gt_tensor)
122 |     pcd_np = torch_tensor_to_numpy(pcd)
123 | 
124 |     np.save(os.path.join(save_path, '%04d_pcd.npy' % timestamp), pcd_np)
125 |     np.save(os.path.join(save_path, '%04d_pred.npy' % timestamp), pred_np)
126 |     np.save(os.path.join(save_path, '%04d_gt.npy' % timestamp), gt_np)
127 | 


--------------------------------------------------------------------------------
/v2xvit/tools/multi_gpu_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import torch.distributed as dist
 4 | 
 5 | 
 6 | def get_dist_info():
 7 |     if dist.is_available() and dist.is_initialized():
 8 |         rank = dist.get_rank()
 9 |         world_size = dist.get_world_size()
10 |     else:
11 |         rank = 0
12 |         world_size = 1
13 |     return rank, world_size
14 | 
15 | 
16 | def init_distributed_mode(args):
17 |     if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
18 |         args.rank = int(os.environ["RANK"])
19 |         args.world_size = int(os.environ['WORLD_SIZE'])
20 |         args.gpu = int(os.environ['LOCAL_RANK'])
21 |     elif 'SLURM_PROCID' in os.environ:
22 |         args.rank = int(os.environ['SLURM_PROCID'])
23 |         args.gpu = args.rank % torch.cuda.device_count()
24 |     else:
25 |         print('Not using distributed mode')
26 |         args.distributed = False
27 |         return
28 | 
29 |     args.distributed = True
30 | 
31 |     torch.cuda.set_device(args.gpu)
32 |     args.dist_backend = 'nccl'
33 |     print('| distributed init (rank {}): {}'.format(
34 |         args.rank, args.dist_url), flush=True)
35 |     torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
36 |                                          world_size=args.world_size, rank=args.rank)
37 |     torch.distributed.barrier()
38 |     setup_for_distributed(args.rank == 0)
39 | 
40 | 
41 | def setup_for_distributed(is_master):
42 |     """
43 |     This function disables printing when not in master process
44 |     """
45 |     import builtins as __builtin__
46 |     builtin_print = __builtin__.print
47 | 
48 |     def print(*args, **kwargs):
49 |         force = kwargs.pop('force', False)
50 |         if is_master or force:
51 |             builtin_print(*args, **kwargs)
52 | 
53 |     __builtin__.print = print


--------------------------------------------------------------------------------
/v2xvit/tools/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os,sys,random
  3 | import statistics
  4 | 
  5 | import torch
  6 | import os,time
  7 | torch.autograd.set_detect_anomaly(True)
  8 | import tqdm
  9 | from torch.utils.data import DataLoader, DistributedSampler
 10 | from tensorboardX import SummaryWriter
 11 | 
 12 | import v2xvit.hypes_yaml.yaml_utils as yaml_utils
 13 | from v2xvit.tools import train_utils,infrence_utils
 14 | from v2xvit.data_utils.datasets import build_dataset
 15 | from v2xvit.tools import multi_gpu_utils
 16 | 
 17 | 
 18 | def train_parser():
 19 |     parser = argparse.ArgumentParser(description="synthetic data generation")
 20 |     parser.add_argument("--hypes_yaml", type=str, required=True,
 21 |                         help='data generation yaml file needed ')
 22 |     parser.add_argument('--model_dir', default='',
 23 |                         help='Continued training path')
 24 |     parser.add_argument("--half", action='store_true', help="whether train with half precision")
 25 |     parser.add_argument('--dist_url', default='env://',
 26 |                         help='url used to set up distributed training')
 27 |     opt = parser.parse_args()
 28 |     return opt
 29 | 
 30 | def main():
 31 |     opt = train_parser()
 32 |     hypes = yaml_utils.load_yaml(opt.hypes_yaml, opt)
 33 |     multi_gpu_utils.init_distributed_mode(opt)
 34 | 
 35 |     print('-----------------Dataset Building------------------')
 36 |     opencood_train_dataset = build_dataset(hypes, visualize=False, train=True)
 37 |     opencood_validate_dataset = build_dataset(hypes,
 38 |                                               visualize=False,
 39 |                                               train=False)
 40 |     if opt.distributed:
 41 |         sampler_train = DistributedSampler(opencood_train_dataset)
 42 |         sampler_val = DistributedSampler(opencood_validate_dataset,
 43 |                                          shuffle=False)
 44 | 
 45 |         batch_sampler_train = torch.utils.data.BatchSampler(
 46 |             sampler_train, hypes['train_params']['batch_size'], drop_last=True)
 47 | 
 48 |         train_loader = DataLoader(opencood_train_dataset,
 49 |                                   batch_sampler=batch_sampler_train,
 50 |                                   num_workers=8,
 51 |                                   collate_fn=opencood_train_dataset.collate_batch_train)
 52 |         val_loader = DataLoader(opencood_validate_dataset,
 53 |                                 sampler=sampler_val,
 54 |                                 num_workers=8,
 55 |                                 collate_fn=opencood_train_dataset.collate_batch_train,
 56 |                                 drop_last=False)
 57 |     else:
 58 |         train_loader = DataLoader(opencood_train_dataset,
 59 |                                 batch_size=hypes['train_params']['batch_size'],
 60 |                                 num_workers=8,
 61 |                                 collate_fn=opencood_train_dataset.collate_batch_train,
 62 |                                 shuffle=True,
 63 |                                 pin_memory=False,
 64 |                                 drop_last=True)
 65 |         val_loader = DataLoader(opencood_validate_dataset,
 66 |                                 batch_size=hypes['train_params']['batch_size'],
 67 |                                 num_workers=8,
 68 |                                 collate_fn=opencood_train_dataset.collate_batch_train,
 69 |                                 shuffle=False,
 70 |                                 pin_memory=False,
 71 |                                 drop_last=True)
 72 | 
 73 |     print('---------------Creating Model------------------')
 74 |     model = train_utils.create_model(hypes)
 75 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 76 |     
 77 |     # if we want to train from last checkpoint.
 78 |     if opt.model_dir:
 79 |         saved_path = opt.model_dir
 80 |         init_epoch, model = train_utils.load_saved_model(saved_path, model)
 81 | 
 82 |     else:
 83 |         init_epoch = 0
 84 |         # if we train the model from scratch, we need to create a folder
 85 |         # to save the model,
 86 |         saved_path = train_utils.setup_train(hypes)
 87 | 
 88 |     # we assume gpu is necessary
 89 |     if torch.cuda.is_available():
 90 |         model.to(device)
 91 |     model_without_ddp = model
 92 |     
 93 |     if opt.distributed:
 94 |         model = \
 95 |             torch.nn.parallel.DistributedDataParallel(model,
 96 |                                                       device_ids=[opt.gpu],
 97 |                                                       find_unused_parameters=True)
 98 |         model_without_ddp = model.module
 99 |     
100 |     # define the loss
101 |     criterion = train_utils.create_loss(hypes)
102 | 
103 |     # optimizer setup
104 |     # optimizer = train_utils.setup_optimizer(hypes, model)
105 |     optimizer = train_utils.setup_optimizer(hypes, model_without_ddp)
106 |     # lr scheduler setup
107 |     num_steps = len(train_loader)
108 |     scheduler = train_utils.setup_lr_schedular(hypes, optimizer, num_steps)
109 | 
110 |     # record training
111 |     writer = SummaryWriter(saved_path)
112 | 
113 |     # half precision training
114 |     if opt.half:
115 |         scaler = torch.cuda.amp.GradScaler()
116 | 
117 |     print('Training start')
118 |     epoches = hypes['train_params']['epoches']
119 |     # used to help schedule learning rate
120 |     for epoch in range(init_epoch, max(epoches, init_epoch)):
121 |         if hypes['lr_scheduler']['core_method'] != 'cosineannealwarm':
122 |             scheduler.step(epoch)
123 |         if hypes['lr_scheduler']['core_method'] == 'cosineannealwarm':
124 |             scheduler.step_update(epoch * num_steps + 0)
125 |         for param_group in optimizer.param_groups:
126 |             print('learning rate %.7f' % param_group["lr"])
127 | 
128 |         if opt.distributed:
129 |             sampler_train.set_epoch(epoch)
130 | 
131 |         pbar2 = tqdm.tqdm(total=len(train_loader), leave=True)
132 |         record_len_list = []
133 |         for i, batch_data_list in enumerate(train_loader):
134 |             for v in batch_data_list:
135 |                 record_len_list.append(v['ego']['record_len'][0].item())
136 |             if len(set(record_len_list)) != 1:
137 |                 record_len_list = []
138 |                 continue
139 |             print(record_len_list)
140 |             record_len_list = []
141 |             # the model will be evaluation mode during validation
142 |             model.train()
143 |             model.zero_grad()
144 |             optimizer.zero_grad()
145 |             
146 |             batch_data = batch_data_list[0]
147 | 
148 |             batch_data_list = train_utils.to_device(batch_data_list, device)
149 |             batch_data = train_utils.to_device(batch_data, device)
150 | 
151 |             # case1 : late fusion train --> only ego needed
152 |             # case2 : early fusion train --> all data projected to ego
153 |             # case3 : intermediate fusion --> ['ego']['processed_lidar']
154 |             # becomes a list, which containing all data from other cavs
155 |             # as well
156 |             if not opt.half:
157 |                 ouput_dict = model(batch_data_list)
158 |                 final_loss = criterion(ouput_dict,
159 |                                        batch_data['ego']['label_dict'])
160 |                 final_loss += ouput_dict["offset_loss"][0] + ouput_dict["commu_loss"][0]
161 |             else:
162 |                 with torch.cuda.amp.autocast():
163 |                     ouput_dict = model(batch_data_list)
164 |                     # first argument is always your output dictionary,
165 |                     # second argument is always your label dictionary.
166 |                     final_loss = criterion(ouput_dict,
167 |                                        batch_data['ego']['label_dict'])
168 |                     final_loss += ouput_dict["offset_loss"][0] + ouput_dict["commu_loss"][0]
169 |             criterion.logging(epoch, i, len(train_loader), writer)
170 |             pbar2.update(1)
171 |             time.sleep(0.001)
172 |             # back-propagation
173 |             if not opt.half:
174 |                 final_loss.backward()
175 |                 optimizer.step()
176 |             else:
177 |                 scaler.scale(final_loss).backward()
178 |                 scaler.step(optimizer)
179 |                 scaler.update()
180 |                 
181 |         if epoch % hypes['train_params']['save_freq'] == 0:
182 |             torch.save(model.state_dict(),
183 |                        os.path.join(saved_path,
184 |                                     'net_epoch%d.pth' % (epoch + 1)))
185 | 
186 |     print('Training Finished, checkpoints saved to %s' % saved_path)
187 |     torch.cuda.empty_cache()
188 | 
189 | if __name__ == '__main__':
190 |     main()
191 | 


--------------------------------------------------------------------------------
/v2xvit/tools/train_utils.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import importlib
  3 | import yaml
  4 | import os
  5 | import re
  6 | from datetime import datetime
  7 | 
  8 | import torch
  9 | import torch.optim as optim
 10 | 
 11 | def findLastCheckpoint(save_dir):
 12 |     file_list = glob.glob(os.path.join(save_dir, '*epoch*.pth'))
 13 |     if file_list:
 14 |         epochs_exist = []
 15 |         for file_ in file_list:
 16 |             result = re.findall(".*epoch(.*).pth.*", file_)
 17 |             epochs_exist.append(int(result[0]))
 18 |         initial_epoch_ = max(epochs_exist)
 19 |     else:
 20 |         initial_epoch_ = 0
 21 |     return initial_epoch_
 22 | 
 23 | 
 24 | def load_saved_model(saved_path, model, epoch=None):
 25 |     """
 26 |     Load saved model if exiseted
 27 | 
 28 |     Parameters
 29 |     __________
 30 |     saved_path : str
 31 |        model saved path
 32 |     model : opencood object
 33 |         The model instance.
 34 | 
 35 |     Returns
 36 |     -------
 37 |     model : opencood object
 38 |         The model instance loaded pretrained params.
 39 |     """
 40 |     assert os.path.exists(saved_path), '{} not found'.format(saved_path)
 41 | 
 42 |     if os.path.exists(os.path.join(saved_path, 'net_latest.pth')):
 43 |         model.load_state_dict(torch.load(
 44 |             os.path.join(saved_path,
 45 |                          'net_latest.pth')))
 46 |         return 100, model
 47 |     else:
 48 |         if epoch is None:
 49 |             initial_epoch = findLastCheckpoint(saved_path)
 50 |         else:
 51 |             initial_epoch = int(epoch)
 52 |             
 53 |         if initial_epoch > 0:
 54 |             print('resuming by loading epoch %d' % initial_epoch)
 55 |         
 56 |         state_dict_ = torch.load(os.path.join(saved_path, 'net_epoch%d.pth' % initial_epoch), map_location="cuda:0")
 57 |         state_dict = {}
 58 |         # convert data_parallal to model
 59 |         for k in state_dict_:
 60 |             if k.startswith('module') and not k.startswith('module_list'):
 61 |                 state_dict[k[7:]] = state_dict_[k]
 62 |             else:
 63 |                 state_dict[k] = state_dict_[k]
 64 |         
 65 |         model_state_dict = model.state_dict()
 66 | 
 67 |         for k in state_dict:
 68 |             if k in model_state_dict:
 69 |                 if state_dict[k].shape != model_state_dict[k].shape:
 70 |                     print('Skip loading parameter {}, required shape{}, ' \
 71 |                         'loaded shape{}.'.format(
 72 |                         k, model_state_dict[k].shape, state_dict[k].shape))
 73 |                     state_dict[k] = model_state_dict[k]
 74 |             else:
 75 |                 print('Drop parameter {}.'.format(k))
 76 |         for k in model_state_dict:
 77 |             if not (k in state_dict):
 78 |                 print('No param {}.'.format(k))
 79 |                 state_dict[k] = model_state_dict[k]
 80 |         model.load_state_dict(state_dict, strict=False)
 81 |         return initial_epoch, model
 82 | 
 83 | 
 84 | def setup_train(hypes):
 85 |     """
 86 |     Create folder for saved model based on current timestep and model name
 87 | 
 88 |     Parameters
 89 |     ----------
 90 |     hypes: dict
 91 |         Config yaml dictionary for training:
 92 |     """
 93 |     model_name = hypes['name']
 94 |     current_time = datetime.now()
 95 | 
 96 |     folder_name = current_time.strftime("_%Y_%m_%d_%H_%M_%S")
 97 |     folder_name = model_name + folder_name
 98 | 
 99 |     current_path = os.path.dirname(__file__)
100 |     current_path = os.path.join(current_path, '../logs')
101 | 
102 |     full_path = os.path.join(current_path, folder_name)
103 | 
104 |     if not os.path.exists(full_path):
105 |         os.makedirs(full_path)
106 |         # save the yaml file
107 |         save_name = os.path.join(full_path, 'config.yaml')
108 |         with open(save_name, 'w') as outfile:
109 |             yaml.dump(hypes, outfile)
110 | 
111 |     return full_path
112 | 
113 | 
114 | def create_model(hypes):
115 |     """
116 |     Import the module "models/[model_name].py
117 | 
118 |     Parameters
119 |     __________
120 |     hypes : dict
121 |         Dictionary containing parameters.
122 | 
123 |     Returns
124 |     -------
125 |     model : opencood,object
126 |         Model object.
127 |     """
128 |     backbone_name = hypes['model']['core_method']
129 |     backbone_config = hypes['model']['args']
130 | 
131 |     model_filename = "v2xvit.models." + backbone_name
132 |     model_lib = importlib.import_module(model_filename)
133 |     model = None
134 |     target_model_name = backbone_name.replace('_', '')
135 | 
136 |     for name, cls in model_lib.__dict__.items():
137 |         if name.lower() == target_model_name.lower():
138 |             model = cls
139 | 
140 |     if model is None:
141 |         print('backbone not found in models folder. Please make sure you '
142 |               'have a python file named %s and has a class '
143 |               'called %s ignoring upper/lower case' % (model_filename,
144 |                                                        target_model_name))
145 |         exit(0)
146 |     instance = model(backbone_config)
147 |     return instance
148 | 
149 | 
150 | def create_loss(hypes):
151 |     """
152 |     Create the loss function based on the given loss name.
153 | 
154 |     Parameters
155 |     ----------
156 |     hypes : dict
157 |         Configuration params for training.
158 |     Returns
159 |     -------
160 |     criterion : opencood.object
161 |         The loss function.
162 |     """
163 |     loss_func_name = hypes['loss']['core_method']
164 |     loss_func_config = hypes['loss']['args']
165 | 
166 |     loss_filename = "v2xvit.loss." + loss_func_name
167 |     loss_lib = importlib.import_module(loss_filename)
168 |     loss_func = None
169 |     target_loss_name = loss_func_name.replace('_', '')
170 | 
171 |     for name, lfunc in loss_lib.__dict__.items():
172 |         if name.lower() == target_loss_name.lower():
173 |             loss_func = lfunc
174 | 
175 |     if loss_func is None:
176 |         print('loss function not found in loss folder. Please make sure you '
177 |               'have a python file named %s and has a class '
178 |               'called %s ignoring upper/lower case' % (loss_filename,
179 |                                                        target_loss_name))
180 |         exit(0)
181 | 
182 |     criterion = loss_func(loss_func_config)
183 |     return criterion
184 | 
185 | 
186 | def setup_optimizer(hypes, model):
187 |     """
188 |     Create optimizer corresponding to the yaml file
189 | 
190 |     Parameters
191 |     ----------
192 |     hypes : dict
193 |         The training configurations.
194 |     model : opencood model
195 |         The pytorch model
196 |     """
197 |     method_dict = hypes['optimizer']
198 |     optimizer_method = getattr(optim, method_dict['core_method'], None)
199 |     if not optimizer_method:
200 |         raise ValueError('{} is not supported'.format(method_dict['name']))
201 |     if 'args' in method_dict:
202 |         return optimizer_method(filter(lambda p: p.requires_grad,
203 |                                        model.parameters()),
204 |                                 lr=method_dict['lr'],
205 |                                 **method_dict['args'])
206 |     else:
207 |         return optimizer_method(filter(lambda p: p.requires_grad,
208 |                                        model.parameters()),
209 |                                 lr=method_dict['lr'])
210 | 
211 | 
212 | def setup_lr_schedular(hypes, optimizer, init_epoch=None):
213 |     """
214 |     Set up the learning rate schedular.
215 | 
216 |     Parameters
217 |     ----------
218 |     hypes : dict
219 |         The training configurations.
220 | 
221 |     optimizer : torch.optimizer
222 |     """
223 |     lr_schedule_config = hypes['lr_scheduler']
224 |     last_epoch = init_epoch if init_epoch is not None else 0
225 |     
226 | 
227 |     if lr_schedule_config['core_method'] == 'step':
228 |         from torch.optim.lr_scheduler import StepLR
229 |         step_size = lr_schedule_config['step_size']
230 |         gamma = lr_schedule_config['gamma']
231 |         scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma)
232 | 
233 |     elif lr_schedule_config['core_method'] == 'multistep':
234 |         from torch.optim.lr_scheduler import MultiStepLR
235 |         milestones = lr_schedule_config['step_size']
236 |         gamma = lr_schedule_config['gamma']
237 |         scheduler = MultiStepLR(optimizer,
238 |                                 milestones=milestones,
239 |                                 gamma=gamma)
240 | 
241 |     else:
242 |         from torch.optim.lr_scheduler import ExponentialLR
243 |         gamma = lr_schedule_config['gamma']
244 |         scheduler = ExponentialLR(optimizer, gamma)
245 | 
246 |     for _ in range(last_epoch):
247 |         scheduler.step()
248 | 
249 |     return scheduler
250 | 
251 | 
252 | def to_device(inputs, device):
253 |     if isinstance(inputs, list):
254 |         return [to_device(x, device) for x in inputs]
255 |     elif isinstance(inputs, dict):
256 |         return {k: to_device(v, device) for k, v in inputs.items()}
257 |     else:
258 |         if isinstance(inputs, int) or isinstance(inputs, float) \
259 |                 or isinstance(inputs, str):
260 |             return inputs
261 |         return inputs.to(device)
262 | 


--------------------------------------------------------------------------------
/v2xvit/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/utils/__init__.py


--------------------------------------------------------------------------------
/v2xvit/utils/box_overlaps.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Sergey Karayev
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | from cython.parallel import prange, parallel
 11 | 
 12 | 
 13 | DTYPE = np.float32
 14 | ctypedef float DTYPE_t
 15 | 
 16 | 
 17 | def bbox_overlaps(
 18 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 19 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 20 |     """
 21 |     Parameters
 22 |     ----------
 23 |     boxes: (N, 4) ndarray of float
 24 |     query_boxes: (K, 4) ndarray of float
 25 |     Returns
 26 |     -------
 27 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 28 |     """
 29 |     cdef unsigned int N = boxes.shape[0]
 30 |     cdef unsigned int K = query_boxes.shape[0]
 31 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
 32 |     cdef DTYPE_t iw, ih, box_area
 33 |     cdef DTYPE_t ua
 34 |     cdef unsigned int k, n
 35 |     for k in range(K):
 36 |         box_area = (
 37 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 38 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 39 |         )
 40 |         for n in range(N):
 41 |             iw = (
 42 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 43 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 44 |             )
 45 |             if iw > 0:
 46 |                 ih = (
 47 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 48 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 49 |                 )
 50 |                 if ih > 0:
 51 |                     ua = float(
 52 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
 53 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
 54 |                         box_area - iw * ih
 55 |                     )
 56 |                     overlaps[n, k] = iw * ih / ua
 57 |     return overlaps
 58 | 
 59 | def bbox_intersections(
 60 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 61 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 62 |     """
 63 |     For each query box compute the intersection ratio covered by boxes
 64 |     ----------
 65 |     Parameters
 66 |     ----------
 67 |     boxes: (N, 4) ndarray of float
 68 |     query_boxes: (K, 4) ndarray of float
 69 |     Returns
 70 |     -------
 71 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
 72 |     """
 73 |     cdef unsigned int N = boxes.shape[0]
 74 |     cdef unsigned int K = query_boxes.shape[0]
 75 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
 76 |     cdef DTYPE_t iw, ih, box_area
 77 |     cdef DTYPE_t ua
 78 |     cdef unsigned int k, n
 79 |     for k in range(K):
 80 |         box_area = (
 81 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 82 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 83 |         )
 84 |         for n in range(N):
 85 |             iw = (
 86 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 87 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 88 |             )
 89 |             if iw > 0:
 90 |                 ih = (
 91 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 92 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 93 |                 )
 94 |                 if ih > 0:
 95 |                     intersec[n, k] = iw * ih / box_area
 96 |     return intersec
 97 | 
 98 | # Compute bounding box voting
 99 | def box_vote(
100 |         np.ndarray[float, ndim=2] dets_NMS,
101 |         np.ndarray[float, ndim=2] dets_all):
102 |     cdef np.ndarray[float, ndim=2] dets_voted = np.zeros((dets_NMS.shape[0], dets_NMS.shape[1]), dtype=np.float32)
103 |     cdef unsigned int N = dets_NMS.shape[0]
104 |     cdef unsigned int M = dets_all.shape[0]
105 | 
106 |     cdef np.ndarray[float, ndim=1] det
107 |     cdef np.ndarray[float, ndim=1] acc_box
108 |     cdef float acc_score
109 | 
110 |     cdef np.ndarray[float, ndim=1] det2
111 |     cdef float bi0, bi1, bit2, bi3
112 |     cdef float iw, ih, ua
113 | 
114 |     cdef float thresh=0.5
115 | 
116 |     for i in range(N):
117 |         det = dets_NMS[i, :]
118 |         acc_box = np.zeros((4), dtype=np.float32)
119 |         acc_score = 0.0
120 | 
121 |         for m in range(M):
122 |             det2 = dets_all[m, :]
123 | 
124 |             bi0 = max(det[0], det2[0])
125 |             bi1 = max(det[1], det2[1])
126 |             bi2 = min(det[2], det2[2])
127 |             bi3 = min(det[3], det2[3])
128 | 
129 |             iw = bi2 - bi0 + 1
130 |             ih = bi3 - bi1 + 1
131 | 
132 |             if not (iw > 0 and ih > 0):
133 |                 continue
134 | 
135 |             ua = (det[2] - det[0] + 1) * (det[3] - det[1] + 1) + (det2[2] - det2[0] + 1) * (det2[3] - det2[1] + 1) - iw * ih
136 |             ov = iw * ih / ua
137 | 
138 |             if (ov < thresh):
139 |                 continue
140 | 
141 |             acc_box += det2[4] * det2[0:4]
142 |             acc_score += det2[4]
143 | 
144 |         dets_voted[i][0:4] = acc_box / acc_score
145 |         dets_voted[i][4] = det[4]       # Keep the original score
146 | 
147 |     return dets_voted
148 | 


--------------------------------------------------------------------------------
/v2xvit/utils/common_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Common utilities
  3 | """
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | from shapely.geometry import Polygon
  8 | 
  9 | 
 10 | def check_numpy_to_torch(x):
 11 |     if isinstance(x, np.ndarray):
 12 |         return torch.from_numpy(x).float(), True
 13 |     return x, False
 14 | 
 15 | 
 16 | def check_contain_nan(x):
 17 |     if isinstance(x, dict):
 18 |         return any(check_contain_nan(v) for k, v in x.items())
 19 |     if isinstance(x, list):
 20 |         return any(check_contain_nan(itm) for itm in x)
 21 |     if isinstance(x, int) or isinstance(x, float):
 22 |         return False
 23 |     if isinstance(x, np.ndarray):
 24 |         return np.any(np.isnan(x))
 25 |     return torch.any(x.isnan()).detach().cpu().item()
 26 | 
 27 | 
 28 | def rotate_points_along_z(points, angle):
 29 |     """
 30 |     Args:
 31 |         points: (B, N, 3 + C)
 32 |         angle: (B), radians, angle along z-axis, angle increases x ==> y
 33 |     Returns:
 34 | 
 35 |     """
 36 |     points, is_numpy = check_numpy_to_torch(points)
 37 |     angle, _ = check_numpy_to_torch(angle)
 38 | 
 39 |     cosa = torch.cos(angle)
 40 |     sina = torch.sin(angle)
 41 |     zeros = angle.new_zeros(points.shape[0])
 42 |     ones = angle.new_ones(points.shape[0])
 43 |     rot_matrix = torch.stack((
 44 |         cosa, sina, zeros,
 45 |         -sina, cosa, zeros,
 46 |         zeros, zeros, ones
 47 |     ), dim=1).view(-1, 3, 3).float()
 48 |     points_rot = torch.matmul(points[:, :, 0:3].float(), rot_matrix)
 49 |     points_rot = torch.cat((points_rot, points[:, :, 3:]), dim=-1)
 50 |     return points_rot.numpy() if is_numpy else points_rot
 51 | 
 52 | 
 53 | def rotate_points_along_z_2d(points, angle):
 54 |     """
 55 |     Rorate the points along z-axis.
 56 |     Parameters
 57 |     ----------
 58 |     points : torch.Tensor / np.ndarray
 59 |         (N, 2).
 60 |     angle : torch.Tensor / np.ndarray
 61 |         (N,)
 62 | 
 63 |     Returns
 64 |     -------
 65 |     points_rot : torch.Tensor / np.ndarray
 66 |         Rorated points with shape (N, 2)
 67 | 
 68 |     """
 69 |     points, is_numpy = check_numpy_to_torch(points)
 70 |     angle, _ = check_numpy_to_torch(angle)
 71 |     cosa = torch.cos(angle)
 72 |     sina = torch.sin(angle)
 73 |     # (N, 2, 2)
 74 |     rot_matrix = torch.stack((cosa, sina, -sina, cosa), dim=1).view(-1, 2,
 75 |                                                                     2).float()
 76 |     points_rot = torch.einsum("ik, ikj->ij", points.float(), rot_matrix)
 77 |     return points_rot.numpy() if is_numpy else points_rot
 78 | 
 79 | 
 80 | def remove_ego_from_objects(objects, ego_id):
 81 |     """
 82 |     Avoid adding ego vehicle to the object dictionary.
 83 | 
 84 |     Parameters
 85 |     ----------
 86 |     objects : dict
 87 |         The dictionary contained all objects.
 88 | 
 89 |     ego_id : int
 90 |         Ego id.
 91 |     """
 92 |     if ego_id in objects:
 93 |         del objects[ego_id]
 94 | 
 95 | 
 96 | def retrieve_ego_id(base_data_dict):
 97 |     """
 98 |     Retrieve the ego vehicle id from sample(origin format).
 99 | 
100 |     Parameters
101 |     ----------
102 |     base_data_dict : dict
103 |         Data sample in origin format.
104 | 
105 |     Returns
106 |     -------
107 |     ego_id : str
108 |         The id of ego vehicle.
109 |     """
110 |     ego_id = None
111 | 
112 |     for cav_id, cav_content in base_data_dict.items():
113 |         if cav_content['ego']:
114 |             ego_id = cav_id
115 |             break
116 |     return ego_id
117 | 
118 | 
119 | def compute_iou(box, boxes):
120 |     """
121 |     Compute iou between box and boxes list
122 |     Parameters
123 |     ----------
124 |     box : shapely.geometry.Polygon
125 |         Bounding box Polygon.
126 | 
127 |     boxes : list
128 |         List of shapely.geometry.Polygon.
129 | 
130 |     Returns
131 |     -------
132 |     iou : np.ndarray
133 |         Array of iou between box and boxes.
134 | 
135 |     """
136 |     # Calculate intersection areas
137 |     iou = [box.intersection(b).area / box.union(b).area for b in boxes]
138 | 
139 |     return np.array(iou, dtype=np.float32)
140 | 
141 | 
142 | def convert_format(boxes_array):
143 |     """
144 |     Convert boxes array to shapely.geometry.Polygon format.
145 |     Parameters
146 |     ----------
147 |     boxes_array : np.ndarray
148 |         (N, 4, 2) or (N, 8, 3).
149 | 
150 |     Returns
151 |     -------
152 |         list of converted shapely.geometry.Polygon object.
153 | 
154 |     """
155 |     polygons = [Polygon([(box[i, 0], box[i, 1]) for i in range(4)]) for box in
156 |                 boxes_array]
157 |     return np.array(polygons)
158 | 
159 | 
160 | def torch_tensor_to_numpy(torch_tensor):
161 |     """
162 |     Convert a torch tensor to numpy.
163 | 
164 |     Parameters
165 |     ----------
166 |     torch_tensor : torch.Tensor
167 | 
168 |     Returns
169 |     -------
170 |     A numpy array.
171 |     """
172 |     return torch_tensor.numpy() if not torch_tensor.is_cuda else \
173 |         torch_tensor.cpu().detach().numpy()
174 | 


--------------------------------------------------------------------------------
/v2xvit/utils/eval_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | 
  6 | from v2xvit.utils import common_utils
  7 | from v2xvit.hypes_yaml import yaml_utils
  8 | 
  9 | 
 10 | def voc_ap(rec, prec):
 11 |     """
 12 |     VOC 2010 Average Precision.
 13 |     """
 14 |     rec.insert(0, 0.0)
 15 |     rec.append(1.0)
 16 |     mrec = rec[:]
 17 | 
 18 |     prec.insert(0, 0.0)
 19 |     prec.append(0.0)
 20 |     mpre = prec[:]
 21 | 
 22 |     for i in range(len(mpre) - 2, -1, -1):
 23 |         mpre[i] = max(mpre[i], mpre[i + 1])
 24 | 
 25 |     i_list = []
 26 |     for i in range(1, len(mrec)):
 27 |         if mrec[i] != mrec[i - 1]:
 28 |             i_list.append(i)
 29 | 
 30 |     ap = 0.0
 31 |     for i in i_list:
 32 |         ap += ((mrec[i] - mrec[i - 1]) * mpre[i])
 33 |     return ap, mrec, mpre
 34 | 
 35 | 
 36 | def caluclate_tp_fp(det_boxes, det_score, gt_boxes, result_stat, iou_thresh):
 37 |     """
 38 |     Calculate the true positive and false positive numbers of the current
 39 |     frames.
 40 | 
 41 |     Parameters
 42 |     ----------
 43 |     det_boxes : torch.Tensor
 44 |         The detection bounding box, shape (N, 8, 3) or (N, 4, 2).
 45 |     det_score :torch.Tensor
 46 |         The confidence score for each preditect bounding box.
 47 |     gt_boxes : torch.Tensor
 48 |         The groundtruth bounding box.
 49 |     result_stat: dict
 50 |         A dictionary contains fp, tp and gt number.
 51 |     iou_thresh : float
 52 |         The iou thresh.
 53 |     """
 54 |     # fp, tp and gt in the current frame
 55 |     fp = []
 56 |     tp = []
 57 |     gt = gt_boxes.shape[0]
 58 |     if det_boxes is not None:
 59 |         # convert bounding boxes to numpy array
 60 |         det_boxes = common_utils.torch_tensor_to_numpy(det_boxes)
 61 |         det_score = common_utils.torch_tensor_to_numpy(det_score)
 62 |         gt_boxes = common_utils.torch_tensor_to_numpy(gt_boxes)
 63 | 
 64 |         # sort the prediction bounding box by score
 65 |         score_order_descend = np.argsort(-det_score)
 66 |         det_polygon_list = list(common_utils.convert_format(det_boxes))
 67 |         gt_polygon_list = list(common_utils.convert_format(gt_boxes))
 68 | 
 69 |         # match prediction and gt bounding box
 70 |         for i in range(score_order_descend.shape[0]):
 71 |             det_polygon = det_polygon_list[score_order_descend[i]]
 72 |             ious = common_utils.compute_iou(det_polygon, gt_polygon_list)
 73 | 
 74 |             if len(gt_polygon_list) == 0 or np.max(ious) < iou_thresh:
 75 |                 fp.append(1)
 76 |                 tp.append(0)
 77 |                 continue
 78 | 
 79 |             fp.append(0)
 80 |             tp.append(1)
 81 | 
 82 |             gt_index = np.argmax(ious)
 83 |             gt_polygon_list.pop(gt_index)
 84 | 
 85 |     result_stat[iou_thresh]['fp'] += fp
 86 |     result_stat[iou_thresh]['tp'] += tp
 87 |     result_stat[iou_thresh]['gt'] += gt
 88 | 
 89 | 
 90 | def calculate_ap(result_stat, iou):
 91 |     """
 92 |     Calculate the average precision and recall, and save them into a txt.
 93 | 
 94 |     Parameters
 95 |     ----------
 96 |     result_stat : dict
 97 |         A dictionary contains fp, tp and gt number.
 98 |     iou : float
 99 |     """
100 |     iou_5 = result_stat[iou]
101 | 
102 |     fp = iou_5['fp']
103 |     tp = iou_5['tp']
104 |     assert len(fp) == len(tp)
105 | 
106 |     gt_total = iou_5['gt']
107 | 
108 |     cumsum = 0
109 |     for idx, val in enumerate(fp):
110 |         fp[idx] += cumsum
111 |         cumsum += val
112 | 
113 |     cumsum = 0
114 |     for idx, val in enumerate(tp):
115 |         tp[idx] += cumsum
116 |         cumsum += val
117 | 
118 |     rec = tp[:]
119 |     for idx, val in enumerate(tp):
120 |         rec[idx] = float(tp[idx]) / gt_total
121 | 
122 |     prec = tp[:]
123 |     for idx, val in enumerate(tp):
124 |         prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx])
125 | 
126 |     ap, mrec, mprec = voc_ap(rec[:], prec[:])
127 | 
128 |     return ap, mrec, mprec
129 | 
130 | 
131 | def eval_final_results(result_stat, save_path):
132 |     dump_dict = {}
133 | 
134 |     ap_30, mrec_30, mpre_30 = calculate_ap(result_stat, 0.30)
135 |     ap_50, mrec_50, mpre_50 = calculate_ap(result_stat, 0.50)
136 |     ap_70, mrec_70, mpre_70 = calculate_ap(result_stat, 0.70)
137 | 
138 |     dump_dict.update({'ap30': ap_30,
139 |                       'ap_50': ap_50,
140 |                       'ap_70': ap_70,
141 |                       'mpre_50': mpre_50,
142 |                       'mrec_50': mrec_50,
143 |                       'mpre_70': mpre_70,
144 |                       'mrec_70': mrec_70,
145 |                       })
146 |     yaml_utils.save_yaml(dump_dict, os.path.join(save_path, 'eval.yaml'))
147 | 
148 |     print('The Average Precision at IOU 0.3 is %.2f, '
149 |           'The Average Precision at IOU 0.5 is %.2f, '
150 |           'The Average Precision at IOU 0.7 is %.2f' % (ap_30, ap_50, ap_70))
151 |     return ap_30,ap_50,ap_70
152 | 


--------------------------------------------------------------------------------
/v2xvit/utils/pcd_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utility functions related to point cloud
  3 | """
  4 | 
  5 | import open3d as o3d
  6 | import numpy as np
  7 | 
  8 | 
  9 | def pcd_to_np(pcd_file):
 10 |     """
 11 |     Read  pcd and return numpy array.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     pcd_file : str
 16 |         The pcd file that contains the point cloud.
 17 | 
 18 |     Returns
 19 |     -------
 20 |     pcd : o3d.PointCloud
 21 |         PointCloud object, used for visualization
 22 |     pcd_np : np.ndarray
 23 |         The lidar data in numpy format, shape:(n, 4)
 24 | 
 25 |     """
 26 |     pcd = o3d.io.read_point_cloud(pcd_file)
 27 | 
 28 |     xyz = np.asarray(pcd.points)
 29 |     # we save the intensity in the first channel
 30 |     intensity = np.expand_dims(np.asarray(pcd.colors)[:, 0], -1)
 31 |     pcd_np = np.hstack((xyz, intensity))
 32 | 
 33 |     return np.asarray(pcd_np, dtype=np.float32)
 34 | 
 35 | 
 36 | def mask_points_by_range(points, limit_range):
 37 |     """
 38 |     Remove the lidar points out of the boundary.
 39 | 
 40 |     Parameters
 41 |     ----------
 42 |     points : np.ndarray
 43 |         Lidar points under lidar sensor coordinate system.
 44 | 
 45 |     limit_range : list
 46 |         [x_min, y_min, z_min, x_max, y_max, z_max]
 47 | 
 48 |     Returns
 49 |     -------
 50 |     points : np.ndarray
 51 |         Filtered lidar points.
 52 |     """
 53 | 
 54 |     mask = (points[:, 0] > limit_range[0]) & (points[:, 0] < limit_range[3])\
 55 |            & (points[:, 1] > limit_range[1]) & (
 56 |                    points[:, 1] < limit_range[4]) \
 57 |            & (points[:, 2] > limit_range[2]) & (
 58 |                    points[:, 2] < limit_range[5])
 59 | 
 60 |     points = points[mask]
 61 | 
 62 |     return points
 63 | 
 64 | 
 65 | def mask_ego_points(points):
 66 |     """
 67 |     Remove the lidar points of the ego vehicle itself.
 68 | 
 69 |     Parameters
 70 |     ----------
 71 |     points : np.ndarray
 72 |         Lidar points under lidar sensor coordinate system.
 73 | 
 74 |     Returns
 75 |     -------
 76 |     points : np.ndarray
 77 |         Filtered lidar points.
 78 |     """
 79 |     mask = (points[:, 0] >= -1.95) & (points[:, 0] <= 2.95) \
 80 |            & (points[:, 1] >= -1.1) & (points[:, 1] <= 1.1)
 81 |     points = points[np.logical_not(mask)]
 82 | 
 83 |     return points
 84 | 
 85 | 
 86 | def shuffle_points(points):
 87 |     shuffle_idx = np.random.permutation(points.shape[0])
 88 |     points = points[shuffle_idx]
 89 | 
 90 |     return points
 91 | 
 92 | 
 93 | def lidar_project(lidar_data, extrinsic):
 94 |     """
 95 |     Given the extrinsic matrix, project lidar data to another space.
 96 | 
 97 |     Parameters
 98 |     ----------
 99 |     lidar_data : np.ndarray
100 |         Lidar data, shape: (n, 4)
101 | 
102 |     extrinsic : np.ndarray
103 |         Extrinsic matrix, shape: (4, 4)
104 | 
105 |     Returns
106 |     -------
107 |     projected_lidar : np.ndarray
108 |         Projected lida data, shape: (n, 4)
109 |     """
110 | 
111 |     lidar_xyz = lidar_data[:, :3].T
112 |     # (3, n) -> (4, n), homogeneous transformation
113 |     lidar_xyz = np.r_[lidar_xyz, [np.ones(lidar_xyz.shape[1])]]
114 |     lidar_int = lidar_data[:, 3]
115 | 
116 |     # transform to ego vehicle space, (3, n)
117 |     project_lidar_xyz = np.dot(extrinsic, lidar_xyz)[:3, :]
118 |     # (n, 3)
119 |     project_lidar_xyz = project_lidar_xyz.T
120 |     # concatenate the intensity with xyz, (n, 4)
121 |     projected_lidar = np.hstack((project_lidar_xyz,
122 |                                  np.expand_dims(lidar_int, -1)))
123 | 
124 |     return projected_lidar
125 | 
126 | 
127 | def projected_lidar_stack(projected_lidar_list):
128 |     """
129 |     Stack all projected lidar together.
130 | 
131 |     Parameters
132 |     ----------
133 |     projected_lidar_list : list
134 |         The list containing all projected lidar.
135 | 
136 |     Returns
137 |     -------
138 |     stack_lidar : np.ndarray
139 |         Stack all projected lidar data together.
140 |     """
141 |     stack_lidar = []
142 |     for lidar_data in projected_lidar_list:
143 |         stack_lidar.append(lidar_data)
144 | 
145 |     return np.vstack(stack_lidar)
146 | 
147 | 
148 | def downsample_lidar(pcd_np, num):
149 |     """
150 |     Downsample the lidar points to a certain number.
151 | 
152 |     Parameters
153 |     ----------
154 |     pcd_np : np.ndarray
155 |         The lidar points, (n, 4).
156 | 
157 |     num : int
158 |         The downsample target number.
159 | 
160 |     Returns
161 |     -------
162 |     pcd_np : np.ndarray
163 |         The downsampled lidar points.
164 |     """
165 |     assert pcd_np.shape[0] >= num
166 | 
167 |     selected_index = np.random.choice((pcd_np.shape[0]),
168 |                                       num,
169 |                                       replace=False)
170 |     pcd_np = pcd_np[selected_index]
171 | 
172 |     return pcd_np
173 | 
174 | 
175 | def downsample_lidar_minimum(pcd_np_list):
176 |     """
177 |     Given a list of pcd, find the minimum number and downsample all
178 |     point clouds to the minimum number.
179 | 
180 |     Parameters
181 |     ----------
182 |     pcd_np_list : list
183 |         A list of pcd numpy array(n, 4).
184 |     Returns
185 |     -------
186 |     pcd_np_list : list
187 |         Downsampled point clouds.
188 |     """
189 |     minimum = np.Inf
190 | 
191 |     for i in range(len(pcd_np_list)):
192 |         num = pcd_np_list[i].shape[0]
193 |         minimum = num if minimum > num else minimum
194 | 
195 |     for (i, pcd_np) in enumerate(pcd_np_list):
196 |         pcd_np_list[i] = downsample_lidar(pcd_np, minimum)
197 | 
198 |     return pcd_np_list
199 | 


--------------------------------------------------------------------------------
/v2xvit/utils/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | import numpy
4 | setup(
5 |     name='box overlaps',
6 |     ext_modules=cythonize('v2xvit/utils/box_overlaps.pyx'),
7 |     include_dirs=[numpy.get_include()]
8 | )


--------------------------------------------------------------------------------
/v2xvit/utils/transformation_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Transformation utils
  3 | """
  4 | 
  5 | import numpy as np
  6 | 
  7 | 
  8 | def x_to_world(pose):
  9 |     """
 10 |     The transformation matrix from x-coordinate system to carla world system
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     pose : list
 15 |         [x, y, z, roll, yaw, pitch]
 16 | 
 17 |     Returns
 18 |     -------
 19 |     matrix : np.ndarray
 20 |         The transformation matrix.
 21 |     """
 22 |     x, y, z, roll, yaw, pitch = pose[:]
 23 | 
 24 |     # used for rotation matrix
 25 |     c_y = np.cos(np.radians(yaw))
 26 |     s_y = np.sin(np.radians(yaw))
 27 |     c_r = np.cos(np.radians(roll))
 28 |     s_r = np.sin(np.radians(roll))
 29 |     c_p = np.cos(np.radians(pitch))
 30 |     s_p = np.sin(np.radians(pitch))
 31 | 
 32 |     matrix = np.identity(4)
 33 |     # translation matrix
 34 |     matrix[0, 3] = x
 35 |     matrix[1, 3] = y
 36 |     matrix[2, 3] = z
 37 | 
 38 |     # rotation matrix
 39 |     matrix[0, 0] = c_p * c_y
 40 |     matrix[0, 1] = c_y * s_p * s_r - s_y * c_r
 41 |     matrix[0, 2] = -c_y * s_p * c_r - s_y * s_r
 42 |     matrix[1, 0] = s_y * c_p
 43 |     matrix[1, 1] = s_y * s_p * s_r + c_y * c_r
 44 |     matrix[1, 2] = -s_y * s_p * c_r + c_y * s_r
 45 |     matrix[2, 0] = s_p
 46 |     matrix[2, 1] = -c_p * s_r
 47 |     matrix[2, 2] = c_p * c_r
 48 | 
 49 |     return matrix
 50 | 
 51 | 
 52 | def x1_to_x2(x1, x2):
 53 |     """
 54 |     Transformation matrix from x1 to x2.
 55 | 
 56 |     Parameters
 57 |     ----------
 58 |     x1 : list
 59 |         The pose of x1 under world coordinates.
 60 |     x2 : list
 61 |         The pose of x2 under world coordinates.
 62 | 
 63 |     Returns
 64 |     -------
 65 |     transformation_matrix : np.ndarray
 66 |         The transformation matrix.
 67 | 
 68 |     """
 69 |     x1_to_world = x_to_world(x1)
 70 |     x2_to_world = x_to_world(x2)
 71 |     world_to_x2 = np.linalg.inv(x2_to_world)
 72 | 
 73 |     transformation_matrix = np.dot(world_to_x2, x1_to_world)
 74 |     return transformation_matrix
 75 | 
 76 | 
 77 | def dist_to_continuous(p_dist, displacement_dist, res, downsample_rate):
 78 |     """
 79 |     Convert points discretized format to continuous space for BEV representation.
 80 |     Parameters
 81 |     ----------
 82 |     p_dist : numpy.array
 83 |         Points in discretized coorindates.
 84 | 
 85 |     displacement_dist : numpy.array
 86 |         Discretized coordinates of bottom left origin.
 87 | 
 88 |     res : float
 89 |         Discretization resolution.
 90 | 
 91 |     downsample_rate : int
 92 |         Dowmsamping rate.
 93 | 
 94 |     Returns
 95 |     -------
 96 |     p_continuous : numpy.array
 97 |         Points in continuous coorindates.
 98 | 
 99 |     """
100 |     p_dist = np.copy(p_dist)
101 |     p_dist = p_dist + displacement_dist
102 |     p_continuous = p_dist * res * downsample_rate
103 |     return p_continuous
104 | 


--------------------------------------------------------------------------------
/v2xvit/version.py:
--------------------------------------------------------------------------------
1 | """Specifies the current version number of v2xvit."""
2 | 
3 | __version__ = "0.1.0"
4 | 


--------------------------------------------------------------------------------
/v2xvit/visualization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/visualization/__init__.py


--------------------------------------------------------------------------------
/v2xvit/visualization/pinhole_param.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"class_name" : "PinholeCameraParameters",
 3 | 	"extrinsic" : 
 4 | 	[
 5 | 		1.0,
 6 | 		-0.0,
 7 | 		-0.0,
 8 | 		0.0,
 9 | 		0.0,
10 | 		-1.0,
11 | 		-0.0,
12 | 		0.0,
13 | 		0.0,
14 | 		-0.0,
15 | 		-1.0,
16 | 		0.0,
17 | 		14.870189666748047,
18 | 		0.0001621246337890625,
19 | 		141.0903074604017,
20 | 		1.0
21 | 	],
22 | 	"intrinsic" : 
23 | 	{
24 | 		"height" : 1025,
25 | 		"intrinsic_matrix" : 
26 | 		[
27 | 			887.67603887904966,
28 | 			0.0,
29 | 			0.0,
30 | 			0.0,
31 | 			887.67603887904966,
32 | 			0.0,
33 | 			926.0,
34 | 			512.0,
35 | 			1.0
36 | 		],
37 | 		"width" : 1853
38 | 	},
39 | 	"version_major" : 1,
40 | 	"version_minor" : 0
41 | }


--------------------------------------------------------------------------------
/v2xvit/visualization/vis_data_sequence.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | from torch.utils.data import DataLoader
 4 | 
 5 | from v2xvit.hypes_yaml.yaml_utils import load_yaml
 6 | from v2xvit.visualization import vis_utils
 7 | from v2xvit.data_utils.datasets.early_fusion_vis_dataset import \
 8 |     EarlyFusionVisDataset
 9 | 
10 | 
11 | def vis_parser():
12 |     parser = argparse.ArgumentParser(description="data visualization")
13 |     parser.add_argument('--color_mode', type=str, default="intensity",
14 |                         help='lidar color rendering mode, e.g. intensity,'
15 |                              'z-value or constant.')
16 |     opt = parser.parse_args()
17 |     return opt
18 | 
19 | 
20 | if __name__ == '__main__':
21 |     current_path = os.path.dirname(os.path.realpath(__file__))
22 |     params = load_yaml(os.path.join(current_path,
23 |                                     '../hypes_yaml/visualization.yaml'))
24 | 
25 |     opencda_dataset = EarlyFusionVisDataset(params, visualize=True,
26 |                                             train=False)
27 |     data_loader = DataLoader(opencda_dataset, batch_size=1, num_workers=8,
28 |                              collate_fn=opencda_dataset.collate_batch_train,
29 |                              shuffle=False,
30 |                              pin_memory=False)
31 | 
32 |     opt = vis_parser()
33 |     vis_utils.visualize_sequence_dataloader(data_loader,
34 |                                             params['postprocess']['order'],
35 |                                             color_mode=opt.color_mode)
36 | 


--------------------------------------------------------------------------------