├── .gitignore ├── LICENSE ├── README.md ├── image.png ├── requirements.txt ├── setup.py └── v2xvit ├── __init__.py ├── data_utils ├── __init__.py ├── augmentor │ ├── __init__.py │ ├── augment_utils.py │ └── data_augmentor.py ├── datasets │ ├── __init__.py │ ├── basedataset.py │ ├── early_fusion_dataset.py │ ├── early_fusion_vis_dataset.py │ ├── intermediate_fusion_dataset.py │ └── late_fusion_dataset.py ├── post_processor │ ├── __init__.py │ ├── base_postprocessor.py │ ├── bev_postprocessor.py │ └── voxel_postprocessor.py └── pre_processor │ ├── __init__.py │ ├── base_preprocessor.py │ ├── bev_preprocessor.py │ ├── sp_voxel_preprocessor.py │ └── voxel_preprocessor.py ├── hypes_yaml ├── __init__.py ├── how2comm │ └── v2xset_how2comm_stcformer.yaml └── yaml_utils.py ├── loss ├── __init__.py ├── pixor_loss.py ├── point_pillar_loss.py └── voxel_net_loss.py ├── models ├── __init__.py ├── comm_modules │ ├── communication.py │ └── mutual_communication.py ├── fuse_modules │ ├── __init__.py │ ├── fuse_utils.py │ ├── how2comm_deformable.py │ ├── how2comm_deformable_transformer.py │ └── stcformer.py ├── point_pillar_how2comm.py └── sub_modules │ ├── __init__.py │ ├── base_bev_backbone.py │ ├── base_bev_backbone_resnet.py │ ├── base_transformer.py │ ├── downsample_conv.py │ ├── feature_flow.py │ ├── fuse_utils.py │ ├── how2comm_preprocess.py │ ├── naive_compress.py │ ├── pillar_vfe.py │ ├── point_pillar_scatter.py │ ├── resblock.py │ ├── self_attn.py │ └── torch_transformation_utils.py ├── tools ├── __init__.py ├── debug_utils.py ├── inference.py ├── infrence_utils.py ├── multi_gpu_utils.py ├── train.py └── train_utils.py ├── utils ├── __init__.py ├── box_overlaps.pyx ├── box_utils.py ├── common_utils.py ├── eval_utils.py ├── pcd_utils.py ├── setup.py └── transformation_utils.py ├── version.py └── visualization ├── __init__.py ├── pinhole_param.json ├── vis_data_sequence.py └── vis_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | v2xset/ 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | logs/ 132 | *.c 133 | *.so 134 | .idea 135 | opv2x 136 | .DS_Store 137 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Dicken 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # How2comm: Communication-Efficient and Collaboration-Pragmatic Multi-Agent Perception 2 | 3 | The official repository of the NeurIPS2023 paper: 4 | 5 | ![teaser](image.png) 6 | 7 | > [**How2comm: Communication-Efficient and Collaboration-Pragmatic Multi-Agent Perception**](https://openreview.net/pdf?id=Dbaxm9ujq6) 8 | > Dingkang Yang\*, Kun Yang\*, Yuzheng Wang, Jing Liu, Zhi Xu, Rongbin Yin, Peng Zhai, Lihua Zhang
9 | 10 | 11 | 12 | ## Abstract 13 | 14 | Multi-agent collaborative perception has recently received widespread attention as an emerging application in driving scenarios. Despite the advancements in previous efforts, challenges remain due to various dilemmas in the perception procedure, including communication redundancy, transmission delay, and collaboration heterogeneity. To tackle these issues, we propose *How2comm*, a collaborative perception framework that seeks a trade-off between perception performance and communication bandwidth. Our novelties lie in three aspects. First, we devise a mutual information-aware communication mechanism to maximally sustain the informative features shared by collaborators. The spatial-channel filtering is adopted to perform effective feature sparsification for efficient communication. Second, we present a flow-guided delay compensation strategy to predict future characteristics from collaborators and eliminate feature misalignment due to temporal asynchrony. Ultimately, a pragmatic collaboration transformer is introduced to integrate holistic spatial semantics and temporal context clues among agents. 15 | Our framework is thoroughly evaluated on several LiDAR-based collaborative detection datasets in real-world and simulated scenarios. Comprehensive experiments demonstrate the superiority of How2comm and the effectiveness of all its vital components. 16 | 17 | 18 | ## Installation 19 | Please refer to [OpenCOOD](https://opencood.readthedocs.io/en/latest/md_files/installation.html) and [centerformer](https://github.com/TuSimple/centerformer/blob/master/docs/INSTALL.md) for more installation details. 20 | 21 | Here we install the environment based on the OpenCOOD and centerformer repos. 22 | 23 | ```bash 24 | # Clone the OpenCOOD repo 25 | git clone https://github.com/DerrickXuNu/OpenCOOD.git 26 | cd OpenCOOD 27 | 28 | # Create a conda environment 29 | conda env create -f environment.yml 30 | conda activate opencood 31 | 32 | # install pytorch 33 | conda install -y pytorch torchvision cudatoolkit=11.3 -c pytorch 34 | 35 | # install spconv 36 | pip install spconv-cu113 37 | 38 | # install basic library of deformable attention 39 | git clone https://github.com/TuSimple/centerformer.git 40 | cd centerformer 41 | 42 | # install requirements 43 | pip install -r requirements.txt 44 | sh setup.sh 45 | 46 | # clone our repo 47 | https://github.com/ydk122024/How2comm.git 48 | 49 | # install v2xvit into the conda environment 50 | python setup.py develop 51 | python v2xvit/utils/setup.py build_ext --inplace 52 | ``` 53 | 54 | ## Data 55 | Please download the [V2XSet](https://drive.google.com/drive/folders/1r5sPiBEvo8Xby-nMaWUTnJIPK6WhY1B6) and [OPV2V](https://drive.google.com/drive/folders/1dkDeHlwOVbmgXcDazZvO6TFEZ6V_7WUu) datasets. The dataset folder should be structured as follows: 56 | ```sh 57 | v2xset # the downloaded v2xset data 58 | ── train 59 | ── validate 60 | ── test 61 | opv2v # the downloaded opv2v data 62 | ── train 63 | ── validate 64 | ── test 65 | ``` 66 | 67 | ## Getting Started 68 | ### Test with pretrained model 69 | We provide our pretrained models on V2XSet and OPV2V datasets. The download URLs are as follows: 70 | 71 | * Baidu Disk URL is [here](https://pan.baidu.com/share/init?surl=oTepWy7q0U_x1jXNThbyMw&pwd=vaz2). 72 | 73 | 74 | * Google Drive URL is [here](https://drive.google.com/drive/folders/1xuUAJ82BgCP4EERW6S98NjWTzF8Hqrib). 75 | 76 | 77 | To test the provided pretrained models of How2comm, please download the model file and put it under v2xvit/logs/how2comm. The `validate_path` in the corresponding `config.yaml` file should be changed as `v2xset/test` or `opv2v/test`. 78 | 79 | Run the following command to conduct test: 80 | ```sh 81 | python v2xvit/tools/inference.py --model_dir ${CONFIG_DIR} --eval_epoch ${EVAL_EPOCH} 82 | ``` 83 | The explanation of the optional arguments are as follows: 84 | - `model_dir`: the path to your saved model. 85 | - `eval_epoch`: the evaluated epoch number. 86 | 87 | You can use the following commands to test the provided pretrained models: 88 | ```sh 89 | V2XSet dataset: python v2xvit/tools/inference.py --model_dir ${CONFIG_DIR} --eval_epoch 32 90 | OPV2V dataset: python v2xvit/tools/inference.py --model_dir ${CONFIG_DIR} --eval_epoch 36 91 | ``` 92 | 93 | ### Train your model 94 | We follow OpenCOOD to use yaml files to configure the training parameters. You can use the following command to train your own model from scratch or a continued checkpoint: 95 | ```sh 96 | CUDA_LAUNCH_BLOCKING=1 CUDA_VISIBLE_DEVICES=1 python v2xvit/tools/train.py --hypes_yaml ${YAML_DIR} --model_dir {} 97 | ``` 98 | The explanation of the optional arguments are as follows: 99 | - `hypes_yaml`: the path of the training configuration file, e.g. `v2xvit/hypes_yaml/how2comm/v2xset_how2comm_stcformer.yaml`. You can change the configuration parameters in this provided yaml file. 100 | - `model_dir` (optional) : the path of the checkpoints. This is used to fine-tune the trained models. When the `model_dir` is 101 | given, the trainer will discard the `hypes_yaml` and load the `config.yaml` in the checkpoint folder. 102 | 103 | ## Citation 104 | If you are using our How2comm for your research, please cite the following paper: 105 | ```bibtex 106 | @inproceedings{yang2023how2comm, 107 | title={How2comm: Communication-efficient and collaboration-pragmatic multi-agent perception}, 108 | author={Yang, Dingkang and Yang, Kun and Wang, Yuzheng and Liu, Jing and Xu, Zhi and Yin, Rongbin and Zhai, Peng and Zhang, Lihua}, 109 | booktitle={Thirty-seventh Conference on Neural Information Processing Systems (NeurIPS)}, 110 | year={2023} 111 | } 112 | ``` 113 | 114 | ## Acknowledgement 115 | Many thanks to Runsheng Xu for the high-quality dataset and codebase, including [V2XSet](https://drive.google.com/drive/folders/1r5sPiBEvo8Xby-nMaWUTnJIPK6WhY1B6), [OPV2V](https://drive.google.com/drive/folders/1dkDeHlwOVbmgXcDazZvO6TFEZ6V_7WUu), [OpenCOOD](https://github.com/DerrickXuNu/OpenCOOD) and [OpenCDA](https://github.com/ucla-mobility/OpenCDA). The same goes for [Where2comm](https://github.com/MediaBrain-SJTU/Where2comm.git) and [centerformer](https://github.com/TuSimple/centerformer.git) for the excellent codebase. 116 | -------------------------------------------------------------------------------- /image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/image.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy 3 | open3d 4 | opencv-python 5 | cython 6 | tensorboardX 7 | shapely 8 | einops 9 | 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | from os.path import dirname, realpath 5 | from setuptools import setup, find_packages, Distribution 6 | from v2xvit.version import __version__ 7 | 8 | 9 | def _read_requirements_file(): 10 | """Return the elements in requirements.txt.""" 11 | req_file_path = '%s/requirements.txt' % dirname(realpath(__file__)) 12 | with open(req_file_path) as f: 13 | return [line.strip() for line in f] 14 | 15 | 16 | setup( 17 | name='V2XViT', 18 | version=__version__, 19 | packages=find_packages(), 20 | url='https://github.com/ucla-mobility/OpenCDA.git', 21 | license='MIT', 22 | author='Runsheng Xu, Hao Xiang, Zhengzhong Tu', 23 | author_email='rxx3386@ucla.edu', 24 | description='An opensource pytorch framework for autonomous driving ' 25 | 'cooperative detection', 26 | long_description=open("README.md").read(), 27 | install_requires=_read_requirements_file(), 28 | ) 29 | -------------------------------------------------------------------------------- /v2xvit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/__init__.py -------------------------------------------------------------------------------- /v2xvit/data_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/data_utils/__init__.py -------------------------------------------------------------------------------- /v2xvit/data_utils/augmentor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/data_utils/augmentor/__init__.py -------------------------------------------------------------------------------- /v2xvit/data_utils/augmentor/augment_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from v2xvit.utils import common_utils 4 | 5 | 6 | def random_flip_along_x(gt_boxes, points): 7 | """ 8 | Args: 9 | gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]] 10 | points: (M, 3 + C) 11 | Returns: 12 | """ 13 | enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5]) 14 | if enable: 15 | gt_boxes[:, 1] = -gt_boxes[:, 1] 16 | gt_boxes[:, 6] = -gt_boxes[:, 6] 17 | points[:, 1] = -points[:, 1] 18 | 19 | if gt_boxes.shape[1] > 7: 20 | gt_boxes[:, 8] = -gt_boxes[:, 8] 21 | 22 | return gt_boxes, points 23 | 24 | 25 | def random_flip_along_y(gt_boxes, points): 26 | """ 27 | Args: 28 | gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]] 29 | points: (M, 3 + C) 30 | Returns: 31 | """ 32 | enable = np.random.choice([False, True], replace=False, p=[0.5, 0.5]) 33 | if enable: 34 | gt_boxes[:, 0] = -gt_boxes[:, 0] 35 | gt_boxes[:, 6] = -(gt_boxes[:, 6] + np.pi) 36 | points[:, 0] = -points[:, 0] 37 | 38 | if gt_boxes.shape[1] > 7: 39 | gt_boxes[:, 7] = -gt_boxes[:, 7] 40 | 41 | return gt_boxes, points 42 | 43 | 44 | def global_rotation(gt_boxes, points, rot_range): 45 | """ 46 | Args: 47 | gt_boxes: (N, 7 + C), [x, y, z, dx, dy, dz, heading, [vx], [vy]] 48 | points: (M, 3 + C), 49 | rot_range: [min, max] 50 | Returns: 51 | """ 52 | noise_rotation = np.random.uniform(rot_range[0], 53 | rot_range[1]) 54 | points = common_utils.rotate_points_along_z(points[np.newaxis, :, :], 55 | np.array([noise_rotation]))[0] 56 | 57 | gt_boxes[:, 0:3] = \ 58 | common_utils.rotate_points_along_z(gt_boxes[np.newaxis, :, 0:3], 59 | np.array([noise_rotation]))[0] 60 | gt_boxes[:, 6] += noise_rotation 61 | 62 | if gt_boxes.shape[1] > 7: 63 | gt_boxes[:, 7:9] = common_utils.rotate_points_along_z( 64 | np.hstack((gt_boxes[:, 7:9], np.zeros((gt_boxes.shape[0], 1))))[ 65 | np.newaxis, :, :], 66 | np.array([noise_rotation]))[0][:, 0:2] 67 | 68 | return gt_boxes, points 69 | 70 | 71 | def global_scaling(gt_boxes, points, scale_range): 72 | """ 73 | Args: 74 | gt_boxes: (N, 7), [x, y, z, dx, dy, dz, heading] 75 | points: (M, 3 + C), 76 | scale_range: [min, max] 77 | Returns: 78 | """ 79 | if scale_range[1] - scale_range[0] < 1e-3: 80 | return gt_boxes, points 81 | noise_scale = np.random.uniform(scale_range[0], scale_range[1]) 82 | points[:, :3] *= noise_scale 83 | gt_boxes[:, :6] *= noise_scale 84 | 85 | return gt_boxes, points 86 | -------------------------------------------------------------------------------- /v2xvit/data_utils/augmentor/data_augmentor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Class for data augmentation 3 | """ 4 | from functools import partial 5 | 6 | from v2xvit.data_utils.augmentor import augment_utils 7 | 8 | 9 | class DataAugmentor(object): 10 | """ 11 | Data Augmentor. 12 | 13 | Parameters 14 | ---------- 15 | augment_config : list 16 | A list of augmentation configuration. 17 | 18 | Attributes 19 | ---------- 20 | data_augmentor_queue : list 21 | The list of data augmented functions. 22 | """ 23 | 24 | def __init__(self, augment_config, train=True): 25 | self.data_augmentor_queue = [] 26 | self.train = train 27 | 28 | for cur_cfg in augment_config: 29 | cur_augmentor = getattr(self, cur_cfg['NAME'])(config=cur_cfg) 30 | self.data_augmentor_queue.append(cur_augmentor) 31 | 32 | def random_world_flip(self, data_dict=None, config=None): 33 | if data_dict is None: 34 | return partial(self.random_world_flip, config=config) 35 | 36 | gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \ 37 | data_dict['object_bbx_mask'], \ 38 | data_dict['lidar_np'] 39 | gt_boxes_valid = gt_boxes[gt_mask == 1] 40 | 41 | for cur_axis in config['ALONG_AXIS_LIST']: 42 | assert cur_axis in ['x', 'y'] 43 | gt_boxes_valid, points = getattr(augment_utils, 44 | 'random_flip_along_%s' % cur_axis)( 45 | gt_boxes_valid, points, 46 | ) 47 | 48 | gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid 49 | 50 | data_dict['object_bbx_center'] = gt_boxes 51 | data_dict['object_bbx_mask'] = gt_mask 52 | data_dict['lidar_np'] = points 53 | 54 | return data_dict 55 | 56 | def random_world_rotation(self, data_dict=None, config=None): 57 | if data_dict is None: 58 | return partial(self.random_world_rotation, config=config) 59 | 60 | rot_range = config['WORLD_ROT_ANGLE'] 61 | if not isinstance(rot_range, list): 62 | rot_range = [-rot_range, rot_range] 63 | 64 | gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \ 65 | data_dict['object_bbx_mask'], \ 66 | data_dict['lidar_np'] 67 | gt_boxes_valid = gt_boxes[gt_mask == 1] 68 | gt_boxes_valid, points = augment_utils.global_rotation( 69 | gt_boxes_valid, points, rot_range=rot_range 70 | ) 71 | gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid 72 | 73 | data_dict['object_bbx_center'] = gt_boxes 74 | data_dict['object_bbx_mask'] = gt_mask 75 | data_dict['lidar_np'] = points 76 | 77 | return data_dict 78 | 79 | def random_world_scaling(self, data_dict=None, config=None): 80 | if data_dict is None: 81 | return partial(self.random_world_scaling, config=config) 82 | 83 | gt_boxes, gt_mask, points = data_dict['object_bbx_center'], \ 84 | data_dict['object_bbx_mask'], \ 85 | data_dict['lidar_np'] 86 | gt_boxes_valid = gt_boxes[gt_mask == 1] 87 | 88 | gt_boxes_valid, points = augment_utils.global_scaling( 89 | gt_boxes_valid, points, config['WORLD_SCALE_RANGE'] 90 | ) 91 | gt_boxes[:gt_boxes_valid.shape[0], :] = gt_boxes_valid 92 | 93 | data_dict['object_bbx_center'] = gt_boxes 94 | data_dict['object_bbx_mask'] = gt_mask 95 | data_dict['lidar_np'] = points 96 | 97 | return data_dict 98 | 99 | def forward(self, data_dict): 100 | """ 101 | Args: 102 | data_dict: 103 | points: (N, 3 + C_in) 104 | gt_boxes: optional, (N, 7) [x, y, z, dx, dy, dz, heading] 105 | gt_names: optional, (N), string 106 | ... 107 | 108 | Returns: 109 | """ 110 | if self.train: 111 | for cur_augmentor in self.data_augmentor_queue: 112 | data_dict = cur_augmentor(data_dict=data_dict) 113 | 114 | return data_dict 115 | -------------------------------------------------------------------------------- /v2xvit/data_utils/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from v2xvit.data_utils.datasets.late_fusion_dataset import LateFusionDataset 2 | from v2xvit.data_utils.datasets.early_fusion_dataset import EarlyFusionDataset 3 | from v2xvit.data_utils.datasets.intermediate_fusion_dataset import IntermediateFusionDataset 4 | 5 | __all__ = { 6 | 'LateFusionDataset': LateFusionDataset, 7 | 'EarlyFusionDataset': EarlyFusionDataset, 8 | 'IntermediateFusionDataset': IntermediateFusionDataset 9 | } 10 | 11 | # the final range for evaluation 12 | GT_RANGE = [-140, -40, -3, 140, 40, 1] 13 | # The communication range for cavs 14 | COM_RANGE = 70 15 | 16 | 17 | def build_dataset(dataset_cfg, visualize=False, train=True): 18 | dataset_name = dataset_cfg['fusion']['core_method'] 19 | error_message = f"{dataset_name} is not found. " \ 20 | f"Please add your processor file's name in opencood/" \ 21 | f"data_utils/datasets/init.py" 22 | assert dataset_name in ['LateFusionDataset', 'EarlyFusionDataset', 23 | 'IntermediateFusionDataset'], error_message 24 | 25 | dataset = __all__[dataset_name]( 26 | params=dataset_cfg, 27 | visualize=visualize, 28 | train=train 29 | ) 30 | 31 | return dataset 32 | -------------------------------------------------------------------------------- /v2xvit/data_utils/datasets/early_fusion_vis_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a dataset for early fusion visualization only. 3 | """ 4 | from collections import OrderedDict 5 | 6 | import numpy as np 7 | import torch 8 | 9 | from v2xvit.utils import box_utils 10 | from v2xvit.data_utils.post_processor import build_postprocessor 11 | from v2xvit.data_utils.datasets import basedataset 12 | from v2xvit.data_utils.pre_processor import build_preprocessor 13 | from v2xvit.utils.pcd_utils import \ 14 | mask_points_by_range, mask_ego_points, shuffle_points, \ 15 | downsample_lidar_minimum 16 | 17 | 18 | class EarlyFusionVisDataset(basedataset.BaseDataset): 19 | def __init__(self, params, visualize, train=True): 20 | super(EarlyFusionVisDataset, self).__init__(params, visualize, train) 21 | self.pre_processor = build_preprocessor(params['preprocess'], 22 | train) 23 | self.post_processor = build_postprocessor(params['postprocess'], train) 24 | 25 | def __getitem__(self, idx): 26 | base_data_dict = self.retrieve_base_data(idx) 27 | 28 | processed_data_dict = OrderedDict() 29 | processed_data_dict['ego'] = {} 30 | 31 | ego_id = -1 32 | ego_lidar_pose = [] 33 | 34 | # first find the ego vehicle's lidar pose 35 | for cav_id, cav_content in base_data_dict.items(): 36 | if cav_content['ego']: 37 | ego_id = cav_id 38 | ego_lidar_pose = cav_content['params']['lidar_pose'] 39 | break 40 | 41 | assert ego_id != -1 42 | assert len(ego_lidar_pose) > 0 43 | 44 | projected_lidar_stack = [] 45 | object_stack = [] 46 | object_id_stack = [] 47 | 48 | # loop over all CAVs to process information 49 | for cav_id, selected_cav_base in base_data_dict.items(): 50 | selected_cav_processed = self.get_item_single_car( 51 | selected_cav_base, 52 | ego_lidar_pose) 53 | # all these lidar and object coordinates are projected to ego 54 | # already. 55 | projected_lidar_stack.append( 56 | selected_cav_processed['projected_lidar']) 57 | object_stack.append(selected_cav_processed['object_bbx_center']) 58 | object_id_stack += selected_cav_processed['object_ids'] 59 | 60 | # exclude all repetitive objects 61 | unique_indices = \ 62 | [object_id_stack.index(x) for x in set(object_id_stack)] 63 | object_stack = np.vstack(object_stack) 64 | object_stack = object_stack[unique_indices] 65 | 66 | # make sure bounding boxes across all frames have the same number 67 | object_bbx_center = \ 68 | np.zeros((self.params['postprocess']['max_num'], 7)) 69 | mask = np.zeros(self.params['postprocess']['max_num']) 70 | object_bbx_center[:object_stack.shape[0], :] = object_stack 71 | mask[:object_stack.shape[0]] = 1 72 | 73 | # convert list to numpy array, (N, 4) 74 | projected_lidar_stack = np.vstack(projected_lidar_stack) 75 | 76 | # data augmentation 77 | projected_lidar_stack, object_bbx_center, mask = \ 78 | self.augment(projected_lidar_stack, object_bbx_center, mask) 79 | 80 | # we do lidar filtering in the stacked lidar 81 | projected_lidar_stack = mask_points_by_range(projected_lidar_stack, 82 | self.params['preprocess'][ 83 | 'cav_lidar_range']) 84 | # augmentation may remove some of the bbx out of range 85 | object_bbx_center_valid = object_bbx_center[mask == 1] 86 | object_bbx_center_valid = \ 87 | box_utils.mask_boxes_outside_range_numpy(object_bbx_center_valid, 88 | self.params['preprocess'][ 89 | 'cav_lidar_range'], 90 | self.params['postprocess'][ 91 | 'order'] 92 | ) 93 | mask[object_bbx_center_valid.shape[0]:] = 0 94 | object_bbx_center[:object_bbx_center_valid.shape[0]] = \ 95 | object_bbx_center_valid 96 | object_bbx_center[object_bbx_center_valid.shape[0]:] = 0 97 | 98 | processed_data_dict['ego'].update( 99 | {'object_bbx_center': object_bbx_center, 100 | 'object_bbx_mask': mask, 101 | 'object_ids': [object_id_stack[i] for i in unique_indices], 102 | 'origin_lidar': projected_lidar_stack 103 | }) 104 | 105 | return processed_data_dict 106 | 107 | def get_item_single_car(self, selected_cav_base, ego_pose): 108 | """ 109 | Project the lidar and bbx to ego space first, and then do clipping. 110 | 111 | Parameters 112 | ---------- 113 | selected_cav_base : dict 114 | The dictionary contains a single CAV's raw information. 115 | ego_pose : list 116 | The ego vehicle lidar pose under world coordinate. 117 | 118 | Returns 119 | ------- 120 | selected_cav_processed : dict 121 | The dictionary contains the cav's processed information. 122 | """ 123 | selected_cav_processed = {} 124 | 125 | # calculate the transformation matrix 126 | transformation_matrix = \ 127 | selected_cav_base['params']['transformation_matrix'] 128 | 129 | # retrieve objects under ego coordinates 130 | object_bbx_center, object_bbx_mask, object_ids = \ 131 | self.post_processor.generate_object_center([selected_cav_base], 132 | ego_pose) 133 | 134 | # filter lidar 135 | lidar_np = selected_cav_base['lidar_np'] 136 | lidar_np = shuffle_points(lidar_np) 137 | # remove points that hit itself 138 | lidar_np = mask_ego_points(lidar_np) 139 | # project the lidar to ego space 140 | lidar_np[:, :3] = \ 141 | box_utils.project_points_by_matrix_torch(lidar_np[:, :3], 142 | transformation_matrix) 143 | 144 | selected_cav_processed.update( 145 | {'object_bbx_center': object_bbx_center[object_bbx_mask == 1], 146 | 'object_ids': object_ids, 147 | 'projected_lidar': lidar_np}) 148 | 149 | return selected_cav_processed 150 | 151 | def collate_batch_train(self, batch): 152 | """ 153 | Customized collate function for pytorch dataloader during training 154 | for late fusion dataset. 155 | 156 | Parameters 157 | ---------- 158 | batch : dict 159 | 160 | Returns 161 | ------- 162 | batch : dict 163 | Reformatted batch. 164 | """ 165 | # during training, we only care about ego. 166 | output_dict = {'ego': {}} 167 | 168 | object_bbx_center = [] 169 | object_bbx_mask = [] 170 | origin_lidar = [] 171 | 172 | for i in range(len(batch)): 173 | ego_dict = batch[i]['ego'] 174 | object_bbx_center.append(ego_dict['object_bbx_center']) 175 | object_bbx_mask.append(ego_dict['object_bbx_mask']) 176 | origin_lidar.append(ego_dict['origin_lidar']) 177 | 178 | # convert to numpy, (B, max_num, 7) 179 | object_bbx_center = torch.from_numpy(np.array(object_bbx_center)) 180 | object_bbx_mask = torch.from_numpy(np.array(object_bbx_mask)) 181 | output_dict['ego'].update({'object_bbx_center': object_bbx_center, 182 | 'object_bbx_mask': object_bbx_mask}) 183 | 184 | origin_lidar = \ 185 | np.array(downsample_lidar_minimum(pcd_np_list=origin_lidar)) 186 | origin_lidar = torch.from_numpy(origin_lidar) 187 | output_dict['ego'].update({'origin_lidar': origin_lidar}) 188 | 189 | return output_dict 190 | -------------------------------------------------------------------------------- /v2xvit/data_utils/post_processor/__init__.py: -------------------------------------------------------------------------------- 1 | from v2xvit.data_utils.post_processor.voxel_postprocessor import VoxelPostprocessor 2 | from v2xvit.data_utils.post_processor.bev_postprocessor import BevPostprocessor 3 | 4 | __all__ = { 5 | 'VoxelPostprocessor': VoxelPostprocessor, 6 | 'BevPostprocessor': BevPostprocessor, 7 | } 8 | 9 | 10 | def build_postprocessor(anchor_cfg, train): 11 | process_method_name = anchor_cfg['core_method'] 12 | assert process_method_name in ['VoxelPostprocessor', 'BevPostprocessor'] 13 | anchor_generator = __all__[process_method_name]( 14 | anchor_params=anchor_cfg, 15 | train=train 16 | ) 17 | 18 | return anchor_generator 19 | -------------------------------------------------------------------------------- /v2xvit/data_utils/post_processor/base_postprocessor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Template for AnchorGenerator 3 | """ 4 | 5 | import numpy as np 6 | import torch 7 | 8 | from v2xvit.utils import box_utils 9 | 10 | 11 | class BasePostprocessor(object): 12 | """ 13 | Template for Anchor generator. 14 | 15 | Parameters 16 | ---------- 17 | anchor_params : dict 18 | The dictionary containing all anchor-related parameters. 19 | train : bool 20 | Indicate train or test mode. 21 | 22 | Attributes 23 | ---------- 24 | bbx_dict : dictionary 25 | Contain all objects information across the cav, key: id, value: bbx 26 | coordinates (1, 7) 27 | """ 28 | 29 | def __init__(self, anchor_params, train=True): 30 | self.params = anchor_params 31 | self.bbx_dict = {} 32 | self.train = train 33 | 34 | def generate_anchor_box(self): 35 | # needs to be overloaded 36 | return None 37 | 38 | def generate_label(self, *argv): 39 | return None 40 | 41 | def generate_gt_bbx(self, data_dict): 42 | """ 43 | The base postprocessor will generate 3d groundtruth bounding box. 44 | 45 | Parameters 46 | ---------- 47 | data_dict : dict 48 | The dictionary containing the origin input data of model. 49 | 50 | Returns 51 | ------- 52 | gt_box3d_tensor : torch.Tensor 53 | The groundtruth bounding box tensor, shape (N, 8, 3). 54 | """ 55 | gt_box3d_list = [] 56 | # used to avoid repetitive bounding box 57 | object_id_list = [] 58 | 59 | for cav_id, cav_content in data_dict.items(): 60 | # used to project gt bounding box to ego space. 61 | # the transformation matrix for gt should always be based on 62 | # current timestamp (object transformation matrix is for 63 | # late fusion only since other fusion method already did 64 | # the transformation in the preprocess) 65 | transformation_matrix = cav_content['transformation_matrix'] \ 66 | if 'gt_transformation_matrix' not in cav_content \ 67 | else cav_content['gt_transformation_matrix'] 68 | 69 | object_bbx_center = cav_content['object_bbx_center'] 70 | object_bbx_mask = cav_content['object_bbx_mask'] 71 | object_ids = cav_content['object_ids'] 72 | object_bbx_center = object_bbx_center[object_bbx_mask == 1] 73 | 74 | # convert center to corner 75 | object_bbx_corner = \ 76 | box_utils.boxes_to_corners_3d(object_bbx_center, 77 | self.params['order']) 78 | projected_object_bbx_corner = \ 79 | box_utils.project_box3d(object_bbx_corner.float(), 80 | transformation_matrix) 81 | gt_box3d_list.append(projected_object_bbx_corner) 82 | 83 | # append the corresponding ids 84 | object_id_list += object_ids 85 | 86 | # gt bbx 3d 87 | gt_box3d_list = torch.vstack(gt_box3d_list) 88 | # some of the bbx may be repetitive, use the id list to filter 89 | gt_box3d_selected_indices = \ 90 | [object_id_list.index(x) for x in set(object_id_list)] 91 | gt_box3d_tensor = gt_box3d_list[gt_box3d_selected_indices] 92 | 93 | # filter the gt_box to make sure all bbx are in the range 94 | mask = \ 95 | box_utils.get_mask_for_boxes_within_range_torch(gt_box3d_tensor) 96 | gt_box3d_tensor = gt_box3d_tensor[mask, :, :] 97 | 98 | return gt_box3d_tensor 99 | 100 | def generate_object_center(self, 101 | cav_contents, 102 | reference_lidar_pose): 103 | """ 104 | Retrieve all objects in a format of (n, 7), where 7 represents 105 | x, y, z, l, w, h, yaw or x, y, z, h, w, l, yaw. 106 | 107 | Parameters 108 | ---------- 109 | cav_contents : list 110 | List of dictionary, save all cavs' information. 111 | 112 | reference_lidar_pose : list 113 | The final target lidar pose with length 6. 114 | 115 | Returns 116 | ------- 117 | object_np : np.ndarray 118 | Shape is (max_num, 7). 119 | mask : np.ndarray 120 | Shape is (max_num,). 121 | object_ids : list 122 | Length is number of bbx in current sample. 123 | """ 124 | from v2xvit.data_utils.datasets import GT_RANGE 125 | 126 | tmp_object_dict = {} 127 | for cav_content in cav_contents: 128 | tmp_object_dict.update(cav_content['params']['vehicles']) 129 | 130 | output_dict = {} 131 | filter_range = self.params['anchor_args']['cav_lidar_range'] \ 132 | if self.train else GT_RANGE 133 | 134 | box_utils.project_world_objects(tmp_object_dict, 135 | output_dict, 136 | reference_lidar_pose, 137 | filter_range, 138 | self.params['order']) 139 | 140 | object_np = np.zeros((self.params['max_num'], 7)) 141 | mask = np.zeros(self.params['max_num']) 142 | object_ids = [] 143 | 144 | for i, (object_id, object_bbx) in enumerate(output_dict.items()): 145 | object_np[i] = object_bbx[0, :] 146 | mask[i] = 1 147 | object_ids.append(object_id) 148 | 149 | return object_np, mask, object_ids 150 | -------------------------------------------------------------------------------- /v2xvit/data_utils/pre_processor/__init__.py: -------------------------------------------------------------------------------- 1 | from v2xvit.data_utils.pre_processor.base_preprocessor import BasePreprocessor 2 | from v2xvit.data_utils.pre_processor.voxel_preprocessor import VoxelPreprocessor 3 | from v2xvit.data_utils.pre_processor.bev_preprocessor import BevPreprocessor 4 | from v2xvit.data_utils.pre_processor.sp_voxel_preprocessor import SpVoxelPreprocessor 5 | 6 | __all__ = { 7 | 'BasePreprocessor': BasePreprocessor, 8 | 'VoxelPreprocessor': VoxelPreprocessor, 9 | 'BevPreprocessor': BevPreprocessor, 10 | 'SpVoxelPreprocessor': SpVoxelPreprocessor 11 | } 12 | 13 | 14 | def build_preprocessor(preprocess_cfg, train): 15 | process_method_name = preprocess_cfg['core_method'] 16 | error_message = f"{process_method_name} is not found. " \ 17 | f"Please add your processor file's name in opencood/" \ 18 | f"data_utils/processor/init.py" 19 | assert process_method_name in ['BasePreprocessor', 'VoxelPreprocessor', 20 | 'BevPreprocessor', 'SpVoxelPreprocessor'], \ 21 | error_message 22 | 23 | processor = __all__[process_method_name]( 24 | preprocess_params=preprocess_cfg, 25 | train=train 26 | ) 27 | 28 | return processor 29 | -------------------------------------------------------------------------------- /v2xvit/data_utils/pre_processor/base_preprocessor.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from v2xvit.utils import pcd_utils 4 | 5 | 6 | class BasePreprocessor(object): 7 | """ 8 | Basic Lidar pre-processor. 9 | 10 | Parameters 11 | ---------- 12 | preprocess_params : dict 13 | The dictionary containing all parameters of the preprocessing. 14 | 15 | train : bool 16 | Train or test mode. 17 | """ 18 | 19 | def __init__(self, preprocess_params, train): 20 | self.params = preprocess_params 21 | self.train = train 22 | 23 | def preprocess(self, pcd_np): 24 | """ 25 | Preprocess the lidar points by simple sampling. 26 | 27 | Parameters 28 | ---------- 29 | pcd_np : np.ndarray 30 | The raw lidar. 31 | 32 | Returns 33 | ------- 34 | data_dict : the output dictionary. 35 | """ 36 | data_dict = {} 37 | sample_num = self.params['args']['sample_num'] 38 | 39 | pcd_np = pcd_utils.downsample_lidar(pcd_np, sample_num) 40 | data_dict['downsample_lidar'] = pcd_np 41 | 42 | return data_dict 43 | 44 | def project_points_to_bev_map(self, points, ratio=0.1): 45 | """ 46 | Project points to BEV occupancy map with default ratio=0.1. 47 | 48 | Parameters 49 | ---------- 50 | points : np.ndarray 51 | (N, 3) / (N, 4) 52 | 53 | ratio : float 54 | Discretization parameters. Default is 0.1. 55 | 56 | Returns 57 | ------- 58 | bev_map : np.ndarray 59 | BEV occupancy map including projected points with shape 60 | (img_row, img_col). 61 | 62 | """ 63 | L1, W1, H1, L2, W2, H2 = self.params["cav_lidar_range"] 64 | img_row = int((L2 - L1) / ratio) 65 | img_col = int((W2 - W1) / ratio) 66 | bev_map = np.zeros((img_row, img_col)) 67 | bev_origin = np.array([L1, W1, H1]).reshape(1, -1) 68 | # (N, 3) 69 | indices = ((points[:, :3] - bev_origin) / ratio).astype(int) 70 | mask = np.logical_and(indices[:, 0] > 0, indices[:, 0] < img_row) 71 | mask = np.logical_and(mask, np.logical_and(indices[:, 1] > 0, 72 | indices[:, 1] < img_col)) 73 | indices = indices[mask, :] 74 | bev_map[indices[:, 0], indices[:, 1]] = 1 75 | return bev_map 76 | -------------------------------------------------------------------------------- /v2xvit/data_utils/pre_processor/bev_preprocessor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Convert lidar to bev 3 | """ 4 | 5 | import numpy as np 6 | import torch 7 | from v2xvit.data_utils.pre_processor.base_preprocessor import \ 8 | BasePreprocessor 9 | 10 | class BevPreprocessor(BasePreprocessor): 11 | def __init__(self, preprocess_params, train): 12 | super(BevPreprocessor, self).__init__(preprocess_params, train) 13 | self.lidar_range = self.params['cav_lidar_range'] 14 | self.geometry_param = preprocess_params["geometry_param"] 15 | 16 | def preprocess(self, pcd_raw): 17 | """ 18 | Preprocess the lidar points to BEV representations. 19 | 20 | Parameters 21 | ---------- 22 | pcd_raw : np.ndarray 23 | The raw lidar. 24 | 25 | Returns 26 | ------- 27 | data_dict : the structured output dictionary. 28 | """ 29 | bev = np.zeros(self.geometry_param['input_shape'], dtype=np.float32) 30 | intensity_map_count = np.zeros((bev.shape[0], bev.shape[1]), dtype=np.int) 31 | bev_origin = np.array( 32 | [self.geometry_param["L1"], self.geometry_param["W1"], 33 | self.geometry_param["H1"]]).reshape(1, -1) 34 | 35 | indices = ((pcd_raw[:, :3] - bev_origin) / self.geometry_param[ 36 | "res"]).astype(int) 37 | ## bev[indices[:, 0], indices[:, 1], indices[:, 2]] = 1 38 | # np.add.at(bev, (indices[:, 0], indices[:, 1], indices[:, 2]), 1) 39 | # bev[indices[:, 0], indices[:, 1], -1] += pcd_raw[:, 3] 40 | # intensity_map_count[indices[:, 0], indices[:, 1]] += 1 41 | 42 | for i in range(indices.shape[0]): 43 | bev[indices[i, 0], indices[i, 1], indices[i, 2]] = 1 44 | bev[indices[i, 0], indices[i, 1], -1] += pcd_raw[i, 3] 45 | intensity_map_count[indices[i, 0], indices[i, 1]] += 1 46 | divide_mask = intensity_map_count!=0 47 | bev[divide_mask, -1] = np.divide(bev[divide_mask, -1], intensity_map_count[divide_mask]) 48 | 49 | data_dict = { 50 | "bev_input": np.transpose(bev, (2, 0, 1)) 51 | } 52 | return data_dict 53 | 54 | @staticmethod 55 | def collate_batch_list(batch): 56 | """ 57 | Customized pytorch data loader collate function. 58 | 59 | Parameters 60 | ---------- 61 | batch : list 62 | List of dictionary. Each dictionary represent a single frame. 63 | 64 | Returns 65 | ------- 66 | processed_batch : dict 67 | Updated lidar batch. 68 | """ 69 | bev_input_list = [ 70 | x["bev_input"][np.newaxis, ...] for x in batch 71 | ] 72 | processed_batch = { 73 | "bev_input": torch.from_numpy( 74 | np.concatenate(bev_input_list, axis=0)) 75 | } 76 | return processed_batch 77 | @staticmethod 78 | def collate_batch_dict(batch): 79 | """ 80 | Customized pytorch data loader collate function. 81 | 82 | Parameters 83 | ---------- 84 | batch : dict 85 | Dict of list. Each element represents a CAV. 86 | 87 | Returns 88 | ------- 89 | processed_batch : dict 90 | Updated lidar batch. 91 | """ 92 | bev_input_list = [ 93 | x[np.newaxis, ...] for x in batch["bev_input"] 94 | ] 95 | processed_batch = { 96 | "bev_input": torch.from_numpy( 97 | np.concatenate(bev_input_list, axis=0)) 98 | } 99 | return processed_batch 100 | 101 | def collate_batch(self, batch): 102 | """ 103 | Customized pytorch data loader collate function. 104 | 105 | Parameters 106 | ---------- 107 | batch : list / dict 108 | Batched data. 109 | Returns 110 | ------- 111 | processed_batch : dict 112 | Updated lidar batch. 113 | """ 114 | if isinstance(batch, list): 115 | return self.collate_batch_list(batch) 116 | elif isinstance(batch, dict): 117 | return self.collate_batch_dict(batch) 118 | else: 119 | raise NotImplemented 120 | 121 | -------------------------------------------------------------------------------- /v2xvit/data_utils/pre_processor/sp_voxel_preprocessor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Transform points to voxels using sparse conv library 3 | """ 4 | import sys 5 | 6 | import numpy as np 7 | import torch 8 | from cumm import tensorview as tv 9 | from spconv.utils import Point2VoxelCPU3d 10 | 11 | from v2xvit.data_utils.pre_processor.base_preprocessor import \ 12 | BasePreprocessor 13 | 14 | 15 | class SpVoxelPreprocessor(BasePreprocessor): 16 | def __init__(self, preprocess_params, train): 17 | super(SpVoxelPreprocessor, self).__init__(preprocess_params, 18 | train) 19 | 20 | self.lidar_range = self.params['cav_lidar_range'] 21 | self.voxel_size = self.params['args']['voxel_size'] 22 | self.max_points_per_voxel = self.params['args']['max_points_per_voxel'] 23 | 24 | if train: 25 | self.max_voxels = self.params['args']['max_voxel_train'] 26 | else: 27 | self.max_voxels = self.params['args']['max_voxel_test'] 28 | 29 | grid_size = (np.array(self.lidar_range[3:6]) - 30 | np.array(self.lidar_range[0:3])) / np.array(self.voxel_size) 31 | self.grid_size = np.round(grid_size).astype(np.int64) 32 | 33 | # use sparse conv library to generate voxel 34 | self.voxel_generator = Point2VoxelCPU3d( 35 | vsize_xyz=self.voxel_size, 36 | coors_range_xyz=self.lidar_range, 37 | max_num_points_per_voxel=self.max_points_per_voxel, 38 | num_point_features=4, 39 | max_num_voxels=self.max_voxels 40 | ) 41 | 42 | def preprocess(self, pcd_np): 43 | data_dict = {} 44 | pcd_tv = tv.from_numpy(pcd_np) 45 | voxel_output = self.voxel_generator.point_to_voxel(pcd_tv) 46 | if isinstance(voxel_output, dict): 47 | voxels, coordinates, num_points = \ 48 | voxel_output['voxels'], voxel_output['coordinates'], \ 49 | voxel_output['num_points_per_voxel'] 50 | else: 51 | voxels, coordinates, num_points = voxel_output 52 | 53 | data_dict['voxel_features'] = voxels.numpy() 54 | data_dict['voxel_coords'] = coordinates.numpy() 55 | data_dict['voxel_num_points'] = num_points.numpy() 56 | 57 | return data_dict 58 | 59 | def collate_batch(self, batch): 60 | """ 61 | Customized pytorch data loader collate function. 62 | 63 | Parameters 64 | ---------- 65 | batch : list or dict 66 | List or dictionary. 67 | 68 | Returns 69 | ------- 70 | processed_batch : dict 71 | Updated lidar batch. 72 | """ 73 | 74 | if isinstance(batch, list): 75 | return self.collate_batch_list(batch) 76 | elif isinstance(batch, dict): 77 | return self.collate_batch_dict(batch) 78 | else: 79 | sys.exit('Batch has too be a list or a dictionarn') 80 | 81 | @staticmethod 82 | def collate_batch_list(batch): 83 | """ 84 | Customized pytorch data loader collate function. 85 | 86 | Parameters 87 | ---------- 88 | batch : list 89 | List of dictionary. Each dictionary represent a single frame. 90 | 91 | Returns 92 | ------- 93 | processed_batch : dict 94 | Updated lidar batch. 95 | """ 96 | voxel_features = [] 97 | voxel_num_points = [] 98 | voxel_coords = [] 99 | 100 | for i in range(len(batch)): 101 | voxel_features.append(batch[i]['voxel_features']) 102 | voxel_num_points.append(batch[i]['voxel_num_points']) 103 | coords = batch[i]['voxel_coords'] 104 | voxel_coords.append( 105 | np.pad(coords, ((0, 0), (1, 0)), 106 | mode='constant', constant_values=i)) 107 | 108 | voxel_num_points = torch.from_numpy(np.concatenate(voxel_num_points)) 109 | voxel_features = torch.from_numpy(np.concatenate(voxel_features)) 110 | voxel_coords = torch.from_numpy(np.concatenate(voxel_coords)) 111 | 112 | return {'voxel_features': voxel_features, 113 | 'voxel_coords': voxel_coords, 114 | 'voxel_num_points': voxel_num_points} 115 | 116 | @staticmethod 117 | def collate_batch_dict(batch: dict): 118 | """ 119 | Collate batch if the batch is a dictionary, 120 | eg: {'voxel_features': [feature1, feature2...., feature n]} 121 | 122 | Parameters 123 | ---------- 124 | batch : dict 125 | 126 | Returns 127 | ------- 128 | processed_batch : dict 129 | Updated lidar batch. 130 | """ 131 | voxel_features = \ 132 | torch.from_numpy(np.concatenate(batch['voxel_features'])) 133 | voxel_num_points = \ 134 | torch.from_numpy(np.concatenate(batch['voxel_num_points'])) 135 | coords = batch['voxel_coords'] 136 | voxel_coords = [] 137 | 138 | for i in range(len(coords)): 139 | voxel_coords.append( 140 | np.pad(coords[i], ((0, 0), (1, 0)), 141 | mode='constant', constant_values=i)) 142 | voxel_coords = torch.from_numpy(np.concatenate(voxel_coords)) 143 | 144 | return {'voxel_features': voxel_features, 145 | 'voxel_coords': voxel_coords, 146 | 'voxel_num_points': voxel_num_points} 147 | -------------------------------------------------------------------------------- /v2xvit/data_utils/pre_processor/voxel_preprocessor.py: -------------------------------------------------------------------------------- 1 | """ 2 | Convert lidar to voxel 3 | """ 4 | import sys 5 | 6 | import numpy as np 7 | import torch 8 | 9 | from v2xvit.data_utils.pre_processor.base_preprocessor import \ 10 | BasePreprocessor 11 | 12 | 13 | class VoxelPreprocessor(BasePreprocessor): 14 | def __init__(self, preprocess_params, train): 15 | super(VoxelPreprocessor, self).__init__(preprocess_params, train) 16 | self.lidar_range = self.params['cav_lidar_range'] 17 | 18 | self.vw = self.params['args']['vw'] 19 | self.vh = self.params['args']['vh'] 20 | self.vd = self.params['args']['vd'] 21 | self.T = self.params['args']['T'] 22 | 23 | def preprocess(self, pcd_np): 24 | """ 25 | Preprocess the lidar points by voxelization. 26 | 27 | Parameters 28 | ---------- 29 | pcd_np : np.ndarray 30 | The raw lidar. 31 | 32 | Returns 33 | ------- 34 | data_dict : the structured output dictionary. 35 | """ 36 | data_dict = {} 37 | 38 | # calculate the voxel coordinates 39 | voxel_coords = ((pcd_np[:, :3] - 40 | np.floor(np.array([self.lidar_range[0], 41 | self.lidar_range[1], 42 | self.lidar_range[2]])) / ( 43 | self.vw, self.vh, self.vd))).astype(np.int32) 44 | 45 | # convert to (D, H, W) as the paper 46 | voxel_coords = voxel_coords[:, [2, 1, 0]] 47 | voxel_coords, inv_ind, voxel_counts = np.unique(voxel_coords, axis=0, 48 | return_inverse=True, 49 | return_counts=True) 50 | 51 | voxel_features = [] 52 | 53 | for i in range(len(voxel_coords)): 54 | voxel = np.zeros((self.T, 7), dtype=np.float32) 55 | pts = pcd_np[inv_ind == i] 56 | if voxel_counts[i] > self.T: 57 | pts = pts[:self.T, :] 58 | voxel_counts[i] = self.T 59 | 60 | # augment the points 61 | voxel[:pts.shape[0], :] = np.concatenate((pts, pts[:, :3] - 62 | np.mean(pts[:, :3], 0)), 63 | axis=1) 64 | voxel_features.append(voxel) 65 | 66 | data_dict['voxel_features'] = np.array(voxel_features) 67 | data_dict['voxel_coords'] = voxel_coords 68 | 69 | return data_dict 70 | 71 | def collate_batch(self, batch): 72 | """ 73 | Customized pytorch data loader collate function. 74 | 75 | Parameters 76 | ---------- 77 | batch : list or dict 78 | List or dictionary. 79 | 80 | Returns 81 | ------- 82 | processed_batch : dict 83 | Updated lidar batch. 84 | """ 85 | 86 | if isinstance(batch, list): 87 | return self.collate_batch_list(batch) 88 | elif isinstance(batch, dict): 89 | return self.collate_batch_dict(batch) 90 | else: 91 | sys.exit('Batch has too be a list or a dictionarn') 92 | 93 | @staticmethod 94 | def collate_batch_list(batch): 95 | """ 96 | Customized pytorch data loader collate function. 97 | 98 | Parameters 99 | ---------- 100 | batch : list 101 | List of dictionary. Each dictionary represent a single frame. 102 | 103 | Returns 104 | ------- 105 | processed_batch : dict 106 | Updated lidar batch. 107 | """ 108 | voxel_features = [] 109 | voxel_coords = [] 110 | 111 | for i in range(len(batch)): 112 | voxel_features.append(batch[i]['voxel_features']) 113 | coords = batch[i]['voxel_coords'] 114 | voxel_coords.append( 115 | np.pad(coords, ((0, 0), (1, 0)), 116 | mode='constant', constant_values=i)) 117 | 118 | voxel_features = torch.from_numpy(np.concatenate(voxel_features)) 119 | voxel_coords = torch.from_numpy(np.concatenate(voxel_coords)) 120 | 121 | return {'voxel_features': voxel_features, 122 | 'voxel_coords': voxel_coords} 123 | 124 | @staticmethod 125 | def collate_batch_dict(batch: dict): 126 | """ 127 | Collate batch if the batch is a dictionary, 128 | eg: {'voxel_features': [feature1, feature2...., feature n]} 129 | 130 | Parameters 131 | ---------- 132 | batch : dict 133 | 134 | Returns 135 | ------- 136 | processed_batch : dict 137 | Updated lidar batch. 138 | """ 139 | voxel_features = \ 140 | torch.from_numpy(np.concatenate(batch['voxel_features'])) 141 | coords = batch['voxel_coords'] 142 | voxel_coords = [] 143 | 144 | for i in range(len(coords)): 145 | voxel_coords.append( 146 | np.pad(coords[i], ((0, 0), (1, 0)), 147 | mode='constant', constant_values=i)) 148 | voxel_coords = torch.from_numpy(np.concatenate(voxel_coords)) 149 | 150 | return {'voxel_features': voxel_features, 151 | 'voxel_coords': voxel_coords} 152 | -------------------------------------------------------------------------------- /v2xvit/hypes_yaml/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/hypes_yaml/__init__.py -------------------------------------------------------------------------------- /v2xvit/hypes_yaml/how2comm/v2xset_how2comm_stcformer.yaml: -------------------------------------------------------------------------------- 1 | name: v2xset_how2comm_stcformer_opv2v 2 | # root_dir: '/data/v2xset/train' 3 | # validate_dir: '/data/v2xset/validate' 4 | root_dir: '/data/opv2v/train' 5 | validate_dir: '/data/opv2v/validate' 6 | 7 | wild_setting: 8 | async: false 9 | async_overhead: 0 10 | seed: 20 11 | loc_err: false 12 | xyz_std: 0 13 | ryp_std: 0 14 | data_size: 1.06 15 | transmission_speed: 27 16 | backbone_delay: 0 17 | 18 | 19 | yaml_parser: "load_point_pillar_params" 20 | train_params: 21 | batch_size: &batch_size 1 22 | epoches: 20 23 | eval_freq: 2 24 | save_freq: 1 25 | max_cav: &max_cav 5 26 | frame: &frame 1 27 | 28 | 29 | fusion: 30 | core_method: 'IntermediateFusionDataset' 31 | args: 32 | cur_ego_pose_flag: true 33 | 34 | preprocess: 35 | core_method: 'SpVoxelPreprocessor' 36 | args: 37 | voxel_size: &voxel_size [0.4, 0.4, 4] 38 | max_points_per_voxel: 32 39 | max_voxel_train: 32000 40 | max_voxel_test: 70000 41 | cav_lidar_range: &cav_lidar [-140.8, -40, -3, 140.8, 40, 1] 42 | 43 | data_augment: 44 | - NAME: random_world_flip 45 | ALONG_AXIS_LIST: [ 'x' ] 46 | 47 | - NAME: random_world_rotation 48 | WORLD_ROT_ANGLE: [ -0.78539816, 0.78539816 ] 49 | 50 | - NAME: random_world_scaling 51 | WORLD_SCALE_RANGE: [ 0.95, 1.05 ] 52 | 53 | 54 | postprocess: 55 | core_method: 'VoxelPostprocessor' 56 | gt_range: *cav_lidar 57 | anchor_args: 58 | cav_lidar_range: *cav_lidar 59 | l: 3.9 60 | w: 1.6 61 | h: 1.56 62 | r: [0, 90] 63 | feature_stride: 2 64 | num: &achor_num 2 65 | target_args: 66 | pos_threshold: 0.6 67 | neg_threshold: 0.45 68 | score_threshold: 0.20 69 | order: 'hwl' 70 | max_num: 100 71 | nms_thresh: 0.15 72 | 73 | 74 | model: 75 | core_method: point_pillar_how2comm 76 | 77 | args: 78 | voxel_size: *voxel_size 79 | lidar_range: *cav_lidar 80 | anchor_number: *achor_num 81 | max_cav: *max_cav 82 | compression: 0 83 | backbone_fix: false 84 | flow_flag: true 85 | 86 | pillar_vfe: 87 | use_norm: true 88 | with_distance: false 89 | use_absolute_xyz: true 90 | num_filters: [64] 91 | point_pillar_scatter: 92 | num_features: 64 93 | 94 | base_bev_backbone: 95 | resnet: True 96 | layer_nums: &layer_nums [3, 4, 5] 97 | layer_strides: [2, 2, 2] 98 | num_filters: &num_filters [64, 128, 256] 99 | upsample_strides: [1, 2, 4] 100 | num_upsample_filter: [128, 128, 128] 101 | compression: 0 102 | voxel_size: *voxel_size 103 | shrink_header: 104 | kernal_size: [ 3 ] 105 | stride: [ 1 ] 106 | padding: [ 1 ] 107 | dim: [ 256 ] 108 | input_dim: 384 109 | 110 | fusion_args: 111 | voxel_size: *voxel_size 112 | downsample_rate: 1 113 | in_channels: 256 114 | n_head: 8 115 | dropout_rate: 0 116 | only_attention: true 117 | communication: 118 | thre: 0.01 119 | compressed_dim: 2 120 | request_flag: True 121 | gaussian_smooth: 122 | k_size: 5 123 | c_sigma: 1.0 124 | communication_flag: True 125 | agg_operator: 126 | mode: 'STCFormer' 127 | feature_dim: 256 128 | n_head: 8 129 | depth: 1 130 | embed_pos: None 131 | with_spe: false 132 | with_scm: false 133 | hetero: False 134 | multi_scale: true 135 | frame: *frame 136 | layer_nums: *layer_nums 137 | num_filters: *num_filters 138 | temporal_fusion: 139 | height: [100, 50, 25, 100] 140 | width: [352, 176, 88, 352] 141 | layers: 1 142 | channel: [64, 128, 256, 256] 143 | gate: False 144 | n_head: 5 145 | rte_ratio: 1 146 | hidden_dim: [64, 128, 256, 256] 147 | delay: 0 148 | compressed_dim: 2 149 | only_ego: False 150 | 151 | 152 | loss: 153 | core_method: point_pillar_loss 154 | args: 155 | cls_weight: 1.0 156 | reg: 2.0 157 | 158 | optimizer: 159 | core_method: Adam 160 | lr: 0.002 161 | args: 162 | eps: 1e-10 163 | weight_decay: 1e-4 164 | 165 | lr_scheduler: 166 | core_method: multistep 167 | gamma: 0.1 168 | step_size: [10, 20] -------------------------------------------------------------------------------- /v2xvit/hypes_yaml/yaml_utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import yaml 3 | import os 4 | import math 5 | 6 | import numpy as np 7 | 8 | 9 | def load_yaml(file, opt=None): 10 | """ 11 | Load yaml file and return a dictionary. 12 | 13 | Parameters 14 | ---------- 15 | file : string 16 | yaml file path. 17 | 18 | opt : argparser 19 | Argparser. 20 | Returns 21 | ------- 22 | param : dict 23 | A dictionary that contains defined parameters. 24 | """ 25 | if opt and opt.model_dir: 26 | file = os.path.join(opt.model_dir, 'config.yaml') 27 | 28 | stream = open(file, 'r') 29 | loader = yaml.Loader 30 | loader.add_implicit_resolver( 31 | u'tag:yaml.org,2002:float', 32 | re.compile(u'''^(?: 33 | [-+]?(?:[0-9][0-9_]*)\\.[0-9_]*(?:[eE][-+]?[0-9]+)? 34 | |[-+]?(?:[0-9][0-9_]*)(?:[eE][-+]?[0-9]+) 35 | |\\.[0-9_]+(?:[eE][-+][0-9]+)? 36 | |[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]* 37 | |[-+]?\\.(?:inf|Inf|INF) 38 | |\\.(?:nan|NaN|NAN))$''', re.X), 39 | list(u'-+0123456789.')) 40 | param = yaml.load(stream, Loader=loader) 41 | if "yaml_parser" in param: 42 | param = eval(param["yaml_parser"])(param) 43 | 44 | return param 45 | 46 | 47 | def load_voxel_params(param): 48 | """ 49 | Based on the lidar range and resolution of voxel, calcuate the anchor box 50 | and target resolution. 51 | 52 | Parameters 53 | ---------- 54 | param : dict 55 | Original loaded parameter dictionary. 56 | 57 | Returns 58 | ------- 59 | param : dict 60 | Modified parameter dictionary with new attribute `anchor_args[W][H][L]` 61 | """ 62 | anchor_args = param['postprocess']['anchor_args'] 63 | cav_lidar_range = anchor_args['cav_lidar_range'] 64 | voxel_size = param['preprocess']['args']['voxel_size'] 65 | 66 | vw = voxel_size[0] 67 | vh = voxel_size[1] 68 | vd = voxel_size[2] 69 | 70 | anchor_args['vw'] = vw 71 | anchor_args['vh'] = vh 72 | anchor_args['vd'] = vd 73 | 74 | anchor_args['W'] = int((cav_lidar_range[3] - cav_lidar_range[0]) / vw) 75 | anchor_args['H'] = int((cav_lidar_range[4] - cav_lidar_range[1]) / vh) 76 | anchor_args['D'] = int((cav_lidar_range[5] - cav_lidar_range[2]) / vd) 77 | 78 | param['postprocess'].update({'anchor_args': anchor_args}) 79 | # sometimes we just want to visualize the data without implementing model 80 | if 'model' in param: 81 | param['model']['args']['W'] = anchor_args['W'] 82 | param['model']['args']['H'] = anchor_args['H'] 83 | param['model']['args']['D'] = anchor_args['D'] 84 | return param 85 | 86 | 87 | def load_point_pillar_params(param): 88 | """ 89 | Based on the lidar range and resolution of voxel, calcuate the anchor box 90 | and target resolution. 91 | 92 | Parameters 93 | ---------- 94 | param : dict 95 | Original loaded parameter dictionary. 96 | 97 | Returns 98 | ------- 99 | param : dict 100 | Modified parameter dictionary with new attribute. 101 | """ 102 | cav_lidar_range = param['preprocess']['cav_lidar_range'] 103 | voxel_size = param['preprocess']['args']['voxel_size'] 104 | 105 | grid_size = (np.array(cav_lidar_range[3:6]) - np.array( 106 | cav_lidar_range[0:3])) / \ 107 | np.array(voxel_size) 108 | grid_size = np.round(grid_size).astype(np.int64) 109 | param['model']['args']['point_pillar_scatter']['grid_size'] = grid_size 110 | 111 | anchor_args = param['postprocess']['anchor_args'] 112 | 113 | vw = voxel_size[0] 114 | vh = voxel_size[1] 115 | vd = voxel_size[2] 116 | 117 | anchor_args['vw'] = vw 118 | anchor_args['vh'] = vh 119 | anchor_args['vd'] = vd 120 | 121 | anchor_args['W'] = math.ceil((cav_lidar_range[3] - cav_lidar_range[0]) / vw) 122 | anchor_args['H'] = math.ceil((cav_lidar_range[4] - cav_lidar_range[1]) / vh) 123 | anchor_args['D'] = math.ceil((cav_lidar_range[5] - cav_lidar_range[2]) / vd) 124 | 125 | param['postprocess'].update({'anchor_args': anchor_args}) 126 | 127 | return param 128 | 129 | def load_second_params(param): 130 | """ 131 | Based on the lidar range and resolution of voxel, calcuate the anchor box 132 | and target resolution. 133 | 134 | Parameters 135 | ---------- 136 | param : dict 137 | Original loaded parameter dictionary. 138 | 139 | Returns 140 | ------- 141 | param : dict 142 | Modified parameter dictionary with new attribute. 143 | """ 144 | cav_lidar_range = param['preprocess']['cav_lidar_range'] 145 | voxel_size = param['preprocess']['args']['voxel_size'] 146 | 147 | grid_size = (np.array(cav_lidar_range[3:6]) - np.array( 148 | cav_lidar_range[0:3])) / \ 149 | np.array(voxel_size) 150 | grid_size = np.round(grid_size).astype(np.int64) 151 | param['model']['args']['grid_size'] = grid_size 152 | 153 | anchor_args = param['postprocess']['anchor_args'] 154 | 155 | vw = voxel_size[0] 156 | vh = voxel_size[1] 157 | vd = voxel_size[2] 158 | 159 | anchor_args['vw'] = vw 160 | anchor_args['vh'] = vh 161 | anchor_args['vd'] = vd 162 | 163 | anchor_args['W'] = math.ceil((cav_lidar_range[3] - cav_lidar_range[0]) / vw) 164 | anchor_args['H'] = math.ceil((cav_lidar_range[4] - cav_lidar_range[1]) / vh) 165 | anchor_args['D'] = math.ceil((cav_lidar_range[5] - cav_lidar_range[2]) / vd) 166 | 167 | param['postprocess'].update({'anchor_args': anchor_args}) 168 | 169 | return param 170 | 171 | def load_bev_params(param): 172 | """ 173 | Load bev related geometry parameters s.t. boundary, resolutions, input 174 | shape, target shape etc. 175 | 176 | Parameters 177 | ---------- 178 | param : dict 179 | Original loaded parameter dictionary. 180 | 181 | Returns 182 | ------- 183 | param : dict 184 | Modified parameter dictionary with new attribute `geometry_param`. 185 | 186 | """ 187 | res = param["preprocess"]["args"]["res"] 188 | L1, W1, H1, L2, W2, H2 = param["preprocess"]["cav_lidar_range"] 189 | downsample_rate = param["preprocess"]["args"]["downsample_rate"] 190 | 191 | def f(low, high, r): 192 | return int((high - low) / r) 193 | 194 | input_shape = ( 195 | int((f(L1, L2, res))), 196 | int((f(W1, W2, res))), 197 | int((f(H1, H2, res)) + 1) 198 | ) 199 | label_shape = ( 200 | int(input_shape[0] / downsample_rate), 201 | int(input_shape[1] / downsample_rate), 202 | 7 203 | ) 204 | geometry_param = { 205 | 'L1': L1, 206 | 'L2': L2, 207 | 'W1': W1, 208 | 'W2': W2, 209 | 'H1': H1, 210 | 'H2': H2, 211 | "downsample_rate": downsample_rate, 212 | "input_shape": input_shape, 213 | "label_shape": label_shape, 214 | "res": res 215 | } 216 | param["preprocess"]["geometry_param"] = geometry_param 217 | param["postprocess"]["geometry_param"] = geometry_param 218 | param["model"]["args"]["geometry_param"] = geometry_param 219 | return param 220 | 221 | 222 | def save_yaml(data, save_name): 223 | """ 224 | Save the dictionary into a yaml file. 225 | 226 | Parameters 227 | ---------- 228 | data : dict 229 | The dictionary contains all data. 230 | 231 | save_name : string 232 | Full path of the output yaml file. 233 | """ 234 | 235 | with open(save_name, 'w') as outfile: 236 | yaml.dump(data, outfile, default_flow_style=False) 237 | -------------------------------------------------------------------------------- /v2xvit/loss/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/loss/__init__.py -------------------------------------------------------------------------------- /v2xvit/loss/pixor_loss.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class PixorLoss(nn.Module): 9 | def __init__(self, args): 10 | super(PixorLoss, self).__init__() 11 | self.alpha = args["alpha"] 12 | self.beta = args["beta"] 13 | self.loss_dict = {} 14 | 15 | def forward(self, output_dict, target_dict): 16 | """ 17 | Compute loss for pixor network 18 | Parameters 19 | ---------- 20 | output_dict : dict 21 | The dictionary that contains the output. 22 | 23 | target_dict : dict 24 | The dictionary that contains the target. 25 | 26 | Returns 27 | ------- 28 | total_loss : torch.Tensor 29 | Total loss. 30 | 31 | """ 32 | targets = target_dict["label_map"] 33 | cls_preds, loc_preds = output_dict["cls"], output_dict["reg"] 34 | 35 | cls_targets, loc_targets = targets.split([1, 6], dim=1) 36 | pos_count = cls_targets.sum() 37 | neg_count = (cls_targets == 0).sum() 38 | w1, w2 = neg_count / (pos_count + neg_count), pos_count / ( 39 | pos_count + neg_count) 40 | weights = torch.ones_like(cls_preds.reshape(-1)) 41 | weights[cls_targets.reshape(-1) == 1] = w1 42 | weights[cls_targets.reshape(-1) == 0] = w2 43 | # cls_targets = cls_targets.float() 44 | # cls_loss = F.binary_cross_entropy_with_logits(input=cls_preds.reshape(-1), target=cls_targets.reshape(-1), weight=weights, 45 | # reduction='mean') 46 | cls_loss = F.binary_cross_entropy_with_logits( 47 | input=cls_preds, target=cls_targets, 48 | reduction='mean') 49 | pos_pixels = cls_targets.sum() 50 | 51 | loc_loss = F.smooth_l1_loss(cls_targets * loc_preds, 52 | cls_targets * loc_targets, 53 | reduction='sum') 54 | loc_loss = loc_loss / pos_pixels if pos_pixels > 0 else loc_loss 55 | 56 | total_loss = self.alpha * cls_loss + self.beta * loc_loss 57 | 58 | self.loss_dict.update({'total_loss': total_loss, 59 | 'reg_loss': loc_loss, 60 | 'cls_loss': cls_loss}) 61 | 62 | return total_loss 63 | 64 | def logging(self, epoch, batch_id, batch_len, writer): 65 | """ 66 | Print out the loss function for current iteration. 67 | 68 | Parameters 69 | ---------- 70 | epoch : int 71 | Current epoch for training. 72 | batch_id : int 73 | The current batch. 74 | batch_len : int 75 | Total batch length in one iteration of training, 76 | writer : SummaryWriter 77 | Used to visualize on tensorboard 78 | """ 79 | total_loss = self.loss_dict['total_loss'] 80 | reg_loss = self.loss_dict['reg_loss'] 81 | cls_loss = self.loss_dict['cls_loss'] 82 | 83 | print("[epoch %d][%d/%d], || Loss: %.4f || cls Loss: %.4f" 84 | " || reg Loss: %.4f" % ( 85 | epoch, batch_id + 1, batch_len, 86 | total_loss.item(), cls_loss.item(), reg_loss.item())) 87 | 88 | writer.add_scalar('Regression_loss', reg_loss.item(), 89 | epoch * batch_len + batch_id) 90 | writer.add_scalar('Confidence_loss', cls_loss.item(), 91 | epoch * batch_len + batch_id) 92 | 93 | 94 | def test(): 95 | torch.manual_seed(0) 96 | loss = PixorLoss(None) 97 | pred = torch.sigmoid(torch.randn(1, 7, 2, 3)) 98 | label = torch.zeros(1, 7, 2, 3) 99 | loss = loss(pred, label) 100 | print(loss) 101 | 102 | 103 | if __name__ == "__main__": 104 | test() 105 | -------------------------------------------------------------------------------- /v2xvit/loss/point_pillar_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | 7 | class WeightedSmoothL1Loss(nn.Module): 8 | """ 9 | Code-wise Weighted Smooth L1 Loss modified based on fvcore.nn.smooth_l1_loss 10 | https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/smooth_l1_loss.py 11 | | 0.5 * x ** 2 / beta if abs(x) < beta 12 | smoothl1(x) = | 13 | | abs(x) - 0.5 * beta otherwise, 14 | where x = input - target. 15 | """ 16 | def __init__(self, beta: float = 1.0 / 9.0, code_weights: list = None): 17 | """ 18 | Args: 19 | beta: Scalar float. 20 | L1 to L2 change point. 21 | For beta values < 1e-5, L1 loss is computed. 22 | code_weights: (#codes) float list if not None. 23 | Code-wise weights. 24 | """ 25 | super(WeightedSmoothL1Loss, self).__init__() 26 | self.beta = beta 27 | if code_weights is not None: 28 | self.code_weights = np.array(code_weights, dtype=np.float32) 29 | self.code_weights = torch.from_numpy(self.code_weights).cuda() 30 | 31 | @staticmethod 32 | def smooth_l1_loss(diff, beta): 33 | if beta < 1e-5: 34 | loss = torch.abs(diff) 35 | else: 36 | n = torch.abs(diff) 37 | loss = torch.where(n < beta, 0.5 * n ** 2 / beta, n - 0.5 * beta) 38 | 39 | return loss 40 | 41 | def forward(self, input: torch.Tensor, 42 | target: torch.Tensor, weights: torch.Tensor = None): 43 | """ 44 | Args: 45 | input: (B, #anchors, #codes) float tensor. 46 | Ecoded predicted locations of objects. 47 | target: (B, #anchors, #codes) float tensor. 48 | Regression targets. 49 | weights: (B, #anchors) float tensor if not None. 50 | 51 | Returns: 52 | loss: (B, #anchors) float tensor. 53 | Weighted smooth l1 loss without reduction. 54 | """ 55 | target = torch.where(torch.isnan(target), input, target) # ignore nan targets 56 | 57 | diff = input - target 58 | loss = self.smooth_l1_loss(diff, self.beta) 59 | 60 | # anchor-wise weighting 61 | if weights is not None: 62 | assert weights.shape[0] == loss.shape[0] and weights.shape[1] == loss.shape[1] 63 | loss = loss * weights.unsqueeze(-1) 64 | 65 | return loss 66 | 67 | 68 | class PointPillarLoss(nn.Module): 69 | def __init__(self, args): 70 | super(PointPillarLoss, self).__init__() 71 | self.reg_loss_func = WeightedSmoothL1Loss() 72 | self.alpha = 0.25 73 | self.gamma = 2.0 74 | 75 | self.cls_weight = args['cls_weight'] 76 | self.reg_coe = args['reg'] 77 | self.loss_dict = {} 78 | 79 | def forward(self, output_dict, target_dict, prefix=''): 80 | """ 81 | Parameters 82 | ---------- 83 | output_dict : dict 84 | target_dict : dict 85 | """ 86 | rm = output_dict['rm{}'.format(prefix)] 87 | psm = output_dict['psm{}'.format(prefix)] 88 | targets = target_dict['targets'] 89 | 90 | cls_preds = psm.permute(0, 2, 3, 1).contiguous() 91 | 92 | box_cls_labels = target_dict['pos_equal_one'] 93 | box_cls_labels = box_cls_labels.view(psm.shape[0], -1).contiguous() 94 | 95 | positives = box_cls_labels > 0 96 | negatives = box_cls_labels == 0 97 | negative_cls_weights = negatives * 1.0 98 | cls_weights = (negative_cls_weights + 1.0 * positives).float() 99 | reg_weights = positives.float() 100 | 101 | pos_normalizer = positives.sum(1, keepdim=True).float() 102 | reg_weights /= torch.clamp(pos_normalizer, min=1.0) 103 | cls_weights /= torch.clamp(pos_normalizer, min=1.0) 104 | cls_targets = box_cls_labels 105 | cls_targets = cls_targets.unsqueeze(dim=-1) 106 | 107 | cls_targets = cls_targets.squeeze(dim=-1) 108 | one_hot_targets = torch.zeros( 109 | *list(cls_targets.shape), 2, 110 | dtype=cls_preds.dtype, device=cls_targets.device 111 | ) 112 | one_hot_targets.scatter_(-1, cls_targets.unsqueeze(dim=-1).long(), 1.0) 113 | cls_preds = cls_preds.view(psm.shape[0], -1, 1) 114 | one_hot_targets = one_hot_targets[..., 1:] 115 | 116 | cls_loss_src = self.cls_loss_func(cls_preds, 117 | one_hot_targets, 118 | weights=cls_weights) # [N, M] 119 | cls_loss = cls_loss_src.sum() / psm.shape[0] 120 | conf_loss = cls_loss * self.cls_weight 121 | 122 | # regression 123 | rm = rm.permute(0, 2, 3, 1).contiguous() 124 | rm = rm.view(rm.size(0), -1, 7) 125 | targets = targets.view(targets.size(0), -1, 7) 126 | box_preds_sin, reg_targets_sin = self.add_sin_difference(rm, 127 | targets) 128 | loc_loss_src =\ 129 | self.reg_loss_func(box_preds_sin, 130 | reg_targets_sin, 131 | weights=reg_weights) 132 | reg_loss = loc_loss_src.sum() / rm.shape[0] 133 | reg_loss *= self.reg_coe 134 | 135 | total_loss = reg_loss + conf_loss 136 | 137 | self.loss_dict.update({'total_loss{}'.format(prefix): total_loss, 138 | 'reg_loss{}'.format(prefix): reg_loss, 139 | 'conf_loss{}'.format(prefix): conf_loss}) 140 | 141 | return total_loss 142 | 143 | def cls_loss_func(self, input: torch.Tensor, 144 | target: torch.Tensor, 145 | weights: torch.Tensor): 146 | """ 147 | Args: 148 | input: (B, #anchors, #classes) float tensor. 149 | Predicted logits for each class 150 | target: (B, #anchors, #classes) float tensor. 151 | One-hot encoded classification targets 152 | weights: (B, #anchors) float tensor. 153 | Anchor-wise weights. 154 | 155 | Returns: 156 | weighted_loss: (B, #anchors, #classes) float tensor after weighting. 157 | """ 158 | pred_sigmoid = torch.sigmoid(input) 159 | alpha_weight = target * self.alpha + (1 - target) * (1 - self.alpha) 160 | pt = target * (1.0 - pred_sigmoid) + (1.0 - target) * pred_sigmoid 161 | focal_weight = alpha_weight * torch.pow(pt, self.gamma) 162 | 163 | bce_loss = self.sigmoid_cross_entropy_with_logits(input, target) 164 | 165 | loss = focal_weight * bce_loss 166 | 167 | if weights.shape.__len__() == 2 or \ 168 | (weights.shape.__len__() == 1 and target.shape.__len__() == 2): 169 | weights = weights.unsqueeze(-1) 170 | 171 | assert weights.shape.__len__() == loss.shape.__len__() 172 | 173 | return loss * weights 174 | 175 | @staticmethod 176 | def sigmoid_cross_entropy_with_logits(input: torch.Tensor, target: torch.Tensor): 177 | """ PyTorch Implementation for tf.nn.sigmoid_cross_entropy_with_logits: 178 | max(x, 0) - x * z + log(1 + exp(-abs(x))) in 179 | https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits 180 | 181 | Args: 182 | input: (B, #anchors, #classes) float tensor. 183 | Predicted logits for each class 184 | target: (B, #anchors, #classes) float tensor. 185 | One-hot encoded classification targets 186 | 187 | Returns: 188 | loss: (B, #anchors, #classes) float tensor. 189 | Sigmoid cross entropy loss without reduction 190 | """ 191 | loss = torch.clamp(input, min=0) - input * target + \ 192 | torch.log1p(torch.exp(-torch.abs(input))) 193 | return loss 194 | 195 | @staticmethod 196 | def add_sin_difference(boxes1, boxes2, dim=6): 197 | assert dim != -1 198 | rad_pred_encoding = torch.sin(boxes1[..., dim:dim + 1]) * \ 199 | torch.cos(boxes2[..., dim:dim + 1]) 200 | rad_tg_encoding = torch.cos(boxes1[..., dim:dim + 1]) * \ 201 | torch.sin(boxes2[..., dim:dim + 1]) 202 | 203 | boxes1 = torch.cat([boxes1[..., :dim], rad_pred_encoding, 204 | boxes1[..., dim + 1:]], dim=-1) 205 | boxes2 = torch.cat([boxes2[..., :dim], rad_tg_encoding, 206 | boxes2[..., dim + 1:]], dim=-1) 207 | return boxes1, boxes2 208 | 209 | 210 | def logging(self, epoch, batch_id, batch_len, writer, pbar=None): 211 | """ 212 | Print out the loss function for current iteration. 213 | 214 | Parameters 215 | ---------- 216 | epoch : int 217 | Current epoch for training. 218 | batch_id : int 219 | The current batch. 220 | batch_len : int 221 | Total batch length in one iteration of training, 222 | writer : SummaryWriter 223 | Used to visualize on tensorboard 224 | """ 225 | total_loss = self.loss_dict['total_loss'] 226 | reg_loss = self.loss_dict['reg_loss'] 227 | conf_loss = self.loss_dict['conf_loss'] 228 | if pbar is None: 229 | print("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f" 230 | " || Loc Loss: %.4f" % ( 231 | epoch, batch_id + 1, batch_len, 232 | total_loss.item(), conf_loss.item(), reg_loss.item())) 233 | else: 234 | pbar.set_description("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f" 235 | " || Loc Loss: %.4f" % ( 236 | epoch, batch_id + 1, batch_len, 237 | total_loss.item(), conf_loss.item(), reg_loss.item())) 238 | 239 | 240 | writer.add_scalar('Regression_loss', reg_loss.item(), 241 | epoch*batch_len + batch_id) 242 | writer.add_scalar('Confidence_loss', conf_loss.item(), 243 | epoch*batch_len + batch_id) 244 | -------------------------------------------------------------------------------- /v2xvit/loss/voxel_net_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class VoxelNetLoss(nn.Module): 7 | def __init__(self, args): 8 | super(VoxelNetLoss, self).__init__() 9 | self.smoothl1loss = nn.SmoothL1Loss(size_average=False) 10 | self.alpha = args['alpha'] 11 | self.beta = args['beta'] 12 | self.reg_coe = args['reg'] 13 | self.loss_dict = {} 14 | 15 | def forward(self, output_dict, target_dict): 16 | """ 17 | Parameters 18 | ---------- 19 | output_dict : dict 20 | target_dict : dict 21 | """ 22 | rm = output_dict['rm'] 23 | psm = output_dict['psm'] 24 | 25 | pos_equal_one = target_dict['pos_equal_one'] 26 | neg_equal_one = target_dict['neg_equal_one'] 27 | targets = target_dict['targets'] 28 | 29 | p_pos = F.sigmoid(psm.permute(0, 2, 3, 1)) 30 | rm = rm.permute(0, 2, 3, 1).contiguous() 31 | rm = rm.view(rm.size(0), rm.size(1), rm.size(2), -1, 7) 32 | targets = targets.view(targets.size(0), targets.size(1), 33 | targets.size(2), -1, 7) 34 | pos_equal_one_for_reg = pos_equal_one.unsqueeze( 35 | pos_equal_one.dim()).expand(-1, -1, -1, -1, 7) 36 | 37 | rm_pos = rm * pos_equal_one_for_reg 38 | targets_pos = targets * pos_equal_one_for_reg 39 | 40 | cls_pos_loss = -pos_equal_one * torch.log(p_pos + 1e-6) 41 | cls_pos_loss = cls_pos_loss.sum() / (pos_equal_one.sum() + 1e-6) 42 | 43 | cls_neg_loss = -neg_equal_one * torch.log(1 - p_pos + 1e-6) 44 | cls_neg_loss = cls_neg_loss.sum() / (neg_equal_one.sum() + 1e-6) 45 | 46 | reg_loss = self.smoothl1loss(rm_pos, targets_pos) 47 | reg_loss = reg_loss / (pos_equal_one.sum() + 1e-6) 48 | conf_loss = self.alpha * cls_pos_loss + self.beta * cls_neg_loss 49 | 50 | total_loss = self.reg_coe * reg_loss + conf_loss 51 | 52 | self.loss_dict.update({'total_loss': total_loss, 53 | 'reg_loss': reg_loss, 54 | 'conf_loss': conf_loss}) 55 | 56 | return total_loss 57 | 58 | def logging(self, epoch, batch_id, batch_len, writer): 59 | """ 60 | Print out the loss function for current iteration. 61 | 62 | Parameters 63 | ---------- 64 | epoch : int 65 | Current epoch for training. 66 | batch_id : int 67 | The current batch. 68 | batch_len : int 69 | Total batch length in one iteration of training, 70 | writer : SummaryWriter 71 | Used to visualize on tensorboard 72 | """ 73 | total_loss = self.loss_dict['total_loss'] 74 | reg_loss = self.loss_dict['reg_loss'] 75 | conf_loss = self.loss_dict['conf_loss'] 76 | 77 | print("[epoch %d][%d/%d], || Loss: %.4f || Conf Loss: %.4f" 78 | " || Loc Loss: %.4f" % ( 79 | epoch, batch_id + 1, batch_len, 80 | total_loss.item(), conf_loss.item(), reg_loss.item())) 81 | 82 | writer.add_scalar('Regression_loss', reg_loss.item(), 83 | epoch*batch_len + batch_id) 84 | writer.add_scalar('Confidence_loss', conf_loss.item(), 85 | epoch*batch_len + batch_id) 86 | -------------------------------------------------------------------------------- /v2xvit/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/models/__init__.py -------------------------------------------------------------------------------- /v2xvit/models/comm_modules/communication.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | class Communication(nn.Module): 6 | def __init__(self, args): 7 | super(Communication, self).__init__() 8 | 9 | self.smooth = False 10 | self.thre = args['thre'] 11 | self.compressed_dim = args['compressed_dim'] 12 | if 'gaussian_smooth' in args: 13 | self.smooth = True 14 | kernel_size = args['gaussian_smooth']['k_size'] 15 | c_sigma = args['gaussian_smooth']['c_sigma'] 16 | self.gaussian_filter = nn.Conv2d(1, 1, kernel_size=kernel_size, stride=1, padding=(kernel_size-1)//2) 17 | self.init_gaussian_filter(kernel_size, c_sigma) 18 | self.gaussian_filter.requires_grad = False 19 | 20 | def init_gaussian_filter(self, k_size=5, sigma=1): 21 | def _gen_gaussian_kernel(k_size=5, sigma=1): 22 | center = k_size // 2 23 | x, y = np.mgrid[0 - center : k_size - center, 0 - center : k_size - center] 24 | g = 1 / (2 * np.pi * sigma) * np.exp(-(np.square(x) + np.square(y)) / (2 * np.square(sigma))) 25 | return g 26 | gaussian_kernel = _gen_gaussian_kernel(k_size, sigma) 27 | self.gaussian_filter.weight.data = torch.Tensor(gaussian_kernel).to(self.gaussian_filter.weight.device).unsqueeze(0).unsqueeze(0) 28 | self.gaussian_filter.bias.data.zero_() 29 | 30 | def forward(self, psm): 31 | B = len(psm) 32 | _, _, H, W = psm[0].shape 33 | 34 | 35 | private_confidence_maps = [] 36 | private_communication_masks = [] 37 | communication_rates = [] 38 | 39 | for b in range(B): 40 | ori_private_communication_maps = psm[b].sigmoid().max(dim=1)[0].unsqueeze(1) 41 | 42 | if self.smooth: 43 | private_communication_maps = self.gaussian_filter(ori_private_communication_maps) 44 | else: 45 | private_communication_maps = ori_private_communication_maps 46 | private_confidence_maps.append(private_communication_maps) 47 | 48 | 49 | ones_mask = torch.ones_like(private_communication_maps).to(private_communication_maps.device) 50 | zeros_mask = torch.zeros_like(private_communication_maps).to(private_communication_maps) 51 | 52 | private_mask = torch.where(private_communication_maps > self.thre, ones_mask, zeros_mask) 53 | cav_num = private_mask.shape[0] 54 | private_rate = private_mask[1:].sum()/((cav_num-1) * H * W) 55 | 56 | private_mask_nodiag = private_mask.clone() 57 | ones_mask = torch.ones_like(private_mask).to(private_mask.device) 58 | private_mask_nodiag[::2] = ones_mask[::2] 59 | private_communication_masks.append(private_mask_nodiag) 60 | communication_rates.append(private_rate) 61 | 62 | communication_rates = sum(communication_rates)/B 63 | private_mask = torch.cat(private_communication_masks, dim=0) 64 | 65 | return private_mask, communication_rates, private_confidence_maps 66 | -------------------------------------------------------------------------------- /v2xvit/models/comm_modules/mutual_communication.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | import random 7 | 8 | 9 | class Channel_Request_Attention(nn.Module): 10 | def __init__(self, in_planes, ratio=16): 11 | super(Channel_Request_Attention, self).__init__() 12 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 13 | self.max_pool = nn.AdaptiveMaxPool2d(1) 14 | 15 | self.sharedMLP = nn.Sequential( 16 | nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False), nn.ReLU(), 17 | nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)) 18 | self.sigmoid = nn.Sigmoid() 19 | 20 | def forward(self, x): 21 | avgout = self.sharedMLP(self.avg_pool(x)) 22 | maxout = self.sharedMLP(self.max_pool(x)) 23 | return self.sigmoid(avgout + maxout) 24 | 25 | class Spatial_Request_Attention(nn.Module): 26 | def __init__(self, kernel_size=3): 27 | super(Spatial_Request_Attention, self).__init__() 28 | assert kernel_size in (3, 7), "kernel size must be 3 or 7" 29 | padding = 3 if kernel_size == 7 else 1 30 | 31 | self.conv = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) 32 | self.sigmoid = nn.Sigmoid() 33 | 34 | def forward(self, x): 35 | avgout = torch.mean(x, dim=1, keepdim=True) 36 | maxout, _ = torch.max(x, dim=1, keepdim=True) 37 | x = torch.cat([avgout, maxout], dim=1) 38 | x = self.conv(x) 39 | return self.sigmoid(x) 40 | 41 | 42 | class StatisticsNetwork(nn.Module): 43 | def __init__(self, img_feature_channels: int): 44 | 45 | super().__init__() 46 | self.conv1 = nn.Conv2d( 47 | in_channels=img_feature_channels, out_channels=img_feature_channels*2, kernel_size=1, stride=1 48 | ) 49 | self.conv2 = nn.Conv2d( 50 | in_channels=img_feature_channels*2, out_channels=img_feature_channels*2, kernel_size=1, stride=1 51 | ) 52 | self.conv3 = nn.Conv2d( 53 | in_channels=img_feature_channels*2, out_channels=1, kernel_size=1, stride=1) 54 | self.relu = nn.ReLU() 55 | 56 | def forward(self, concat_feature: torch.Tensor) -> torch.Tensor: 57 | x = self.conv1(concat_feature) 58 | x = self.relu(x) 59 | x = self.conv2(x) 60 | x = self.relu(x) 61 | local_statistics = self.conv3(x) 62 | return local_statistics 63 | 64 | 65 | class DeepInfoMaxLoss(nn.Module): 66 | def __init__(self, loss_coeff=1) -> None: 67 | super().__init__() 68 | self.loss_coeff = loss_coeff 69 | 70 | def __call__(self, T: torch.Tensor, T_prime: torch.Tensor) -> float: 71 | 72 | joint_expectation = (-F.softplus(-T)).mean() 73 | marginal_expectation = F.softplus(T_prime).mean() 74 | mutual_info = joint_expectation - marginal_expectation 75 | 76 | return -mutual_info*self.loss_coeff 77 | 78 | 79 | class Communication(nn.Module): 80 | def __init__(self, args, in_planes): 81 | super(Communication, self).__init__() 82 | self.channel_request = Channel_Request_Attention(in_planes) 83 | self.spatial_request = Spatial_Request_Attention() 84 | self.channel_fusion = nn.Conv2d(in_planes*2, in_planes, 1, bias=False) 85 | self.spatial_fusion = nn.Conv2d(2, 1, 1, bias=False) 86 | self.statisticsNetwork = StatisticsNetwork(in_planes*2) 87 | self.mutual_loss = DeepInfoMaxLoss() 88 | self.request_flag = args['request_flag'] 89 | 90 | self.smooth = False 91 | self.thre = args['thre'] 92 | if 'gaussian_smooth' in args: 93 | self.smooth = True 94 | kernel_size = args['gaussian_smooth']['k_size'] 95 | self.kernel_size = kernel_size 96 | c_sigma = args['gaussian_smooth']['c_sigma'] 97 | self.gaussian_filter = nn.Conv2d( 98 | 1, 1, kernel_size=kernel_size, stride=1, padding=(kernel_size-1)//2) 99 | self.init_gaussian_filter(kernel_size, c_sigma) 100 | self.gaussian_filter.requires_grad = False 101 | 102 | x = torch.arange(-(kernel_size - 1) // 2, (kernel_size + 1) // 2, dtype=torch.float32) 103 | d1_gaussian_filter = torch.exp(-x**2 / (2 * c_sigma**2)) 104 | d1_gaussian_filter /= d1_gaussian_filter.sum() 105 | 106 | self.d1_gaussian_filter = d1_gaussian_filter.view(1, 1, kernel_size).cuda() 107 | 108 | def init_gaussian_filter(self, k_size=5, sigma=1): 109 | def _gen_gaussian_kernel(k_size=5, sigma=1): 110 | center = k_size // 2 111 | x, y = np.mgrid[0 - center: k_size - 112 | center, 0 - center: k_size - center] 113 | g = 1 / (2 * np.pi * sigma) * np.exp(-(np.square(x) + 114 | np.square(y)) / (2 * np.square(sigma))) 115 | return g 116 | gaussian_kernel = _gen_gaussian_kernel(k_size, sigma) 117 | gaussian_kernel = torch.Tensor(gaussian_kernel).to( 118 | self.gaussian_filter.weight.device).unsqueeze(0).unsqueeze(0) 119 | self.gaussian_filter.weight.data = gaussian_kernel 120 | self.gaussian_filter.bias.data.zero_() 121 | 122 | def forward(self, feat_list,confidence_map_list=None): 123 | send_feats = [] 124 | comm_rate_list = [] 125 | sparse_mask_list = [] 126 | total_loss = torch.zeros(1).to(feat_list[0].device) 127 | for bs in range(len(feat_list)): 128 | agent_feature = feat_list[bs] 129 | cav_num, C, H, W = agent_feature.shape 130 | if cav_num == 1: 131 | send_feats.append(agent_feature) 132 | ones_mask = torch.ones(cav_num, C, H, W).to(feat_list[0].device) 133 | sparse_mask_list.append(ones_mask) 134 | continue 135 | 136 | collaborator_feature = torch.tensor([]).to(agent_feature.device) 137 | sparse_batch_mask = torch.tensor([]).to(agent_feature.device) 138 | 139 | agent_channel_attention = self.channel_request( 140 | agent_feature) 141 | agent_spatial_attention = self.spatial_request( 142 | agent_feature) 143 | agent_activation = torch.mean(agent_feature, dim=1, keepdims=True).sigmoid() 144 | agent_activation = self.gaussian_filter(agent_activation) 145 | 146 | ego_channel_request = ( 147 | 1 - agent_channel_attention[0, ]).unsqueeze(0) 148 | ego_spatial_request = ( 149 | 1 - agent_spatial_attention[0, ]).unsqueeze(0) 150 | 151 | 152 | for i in range(cav_num - 1): 153 | if self.request_flag: 154 | channel_coefficient = self.channel_fusion(torch.cat( 155 | [ego_channel_request, agent_channel_attention[i+1, ].unsqueeze(0)], dim=1)) 156 | spatial_coefficient = self.spatial_fusion(torch.cat( 157 | [ego_spatial_request, agent_spatial_attention[i+1, ].unsqueeze(0)], dim=1)) 158 | else: 159 | channel_coefficient = agent_channel_attention[i+1, ].unsqueeze( 160 | 0) 161 | spatial_coefficient = agent_spatial_attention[i+1, ].unsqueeze( 162 | 0) 163 | 164 | spatial_coefficient = spatial_coefficient.sigmoid() 165 | channel_coefficient = channel_coefficient.sigmoid() 166 | smoth_channel_coefficient = F.conv1d(channel_coefficient.reshape(1,1,C), self.d1_gaussian_filter, padding=(self.kernel_size - 1) // 2) 167 | channel_coefficient = smoth_channel_coefficient.reshape(1,C,1,1) 168 | 169 | spatial_coefficient = self.gaussian_filter(spatial_coefficient) 170 | sparse_matrix = channel_coefficient * spatial_coefficient 171 | temp_activation = agent_activation[i+1, ].unsqueeze(0) 172 | sparse_matrix = sparse_matrix * temp_activation 173 | 174 | if self.thre > 0: 175 | ones_mask = torch.ones_like( 176 | sparse_matrix).to(sparse_matrix.device) 177 | zeros_mask = torch.zeros_like( 178 | sparse_matrix).to(sparse_matrix.device) 179 | sparse_mask = torch.where( 180 | sparse_matrix > self.thre, ones_mask, zeros_mask) 181 | else: 182 | K = int(C * H * W * random.uniform(0, 0.3)) 183 | communication_maps = sparse_matrix.reshape(1, C * H * W) 184 | _, indices = torch.topk(communication_maps, k=K, sorted=False) 185 | communication_mask = torch.zeros_like(communication_maps).to(communication_maps.device) 186 | ones_fill = torch.ones(1, K, dtype=communication_maps.dtype, device=communication_maps.device) 187 | sparse_mask = torch.scatter(communication_mask, -1, indices, ones_fill).reshape(1, C, H, W) 188 | 189 | comm_rate = sparse_mask.sum()/(C*H*W) 190 | comm_rate_list.append(comm_rate) 191 | 192 | collaborator_feature = torch.cat( 193 | [collaborator_feature, agent_feature[i+1, ].unsqueeze(0)*sparse_mask], dim=0) 194 | sparse_batch_mask = torch.cat( 195 | [sparse_batch_mask, sparse_mask], dim=0) 196 | 197 | 198 | org_feature = agent_feature.clone() 199 | sparse_feature = torch.cat( 200 | [agent_feature[:1], collaborator_feature], dim=0) 201 | send_feats.append(sparse_feature) 202 | ego_mask = torch.ones_like(agent_feature[:1]).to( 203 | agent_feature[:1].device) 204 | sparse_batch_mask = torch.cat( 205 | [ego_mask, sparse_batch_mask], dim=0) 206 | sparse_mask_list.append(sparse_batch_mask) 207 | 208 | org_feature_prime = torch.cat( 209 | [org_feature[1:], org_feature[0].unsqueeze(0)], dim=0) 210 | local_mutual = self.statisticsNetwork( 211 | torch.cat([org_feature, sparse_feature], dim=1)) 212 | local_mutual_prime = self.statisticsNetwork( 213 | torch.cat([org_feature_prime, sparse_feature], dim=1)) 214 | loss = self.mutual_loss(local_mutual, local_mutual_prime) 215 | total_loss += loss 216 | 217 | if len(comm_rate_list) > 0: 218 | mean_rate = sum(comm_rate_list) / len(comm_rate_list) 219 | else: 220 | mean_rate = torch.tensor(0).to(feat_list[0].device) 221 | sparse_mask = torch.cat(sparse_mask_list, dim=0) 222 | 223 | return send_feats, total_loss, mean_rate, sparse_mask 224 | -------------------------------------------------------------------------------- /v2xvit/models/fuse_modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/models/fuse_modules/__init__.py -------------------------------------------------------------------------------- /v2xvit/models/fuse_modules/fuse_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | from einops import rearrange 5 | from v2xvit.utils.common_utils import torch_tensor_to_numpy 6 | 7 | 8 | def regroup(dense_feature, record_len, max_len): 9 | """ 10 | Regroup the data based on the record_len. 11 | Parameters 12 | ---------- 13 | dense_feature : torch.Tensor 14 | N, C, H, W 15 | record_len : list 16 | [sample1_len, sample2_len, ...] 17 | max_len : int 18 | Maximum cav number 19 | Returns 20 | ------- 21 | regroup_feature : torch.Tensor 22 | B, L, C, H, W 23 | """ 24 | cum_sum_len = list(np.cumsum(torch_tensor_to_numpy(record_len))) 25 | split_features = torch.tensor_split(dense_feature, 26 | cum_sum_len[:-1]) 27 | regroup_features = [] 28 | mask = [] 29 | 30 | for split_feature in split_features: 31 | # M, C, H, W 32 | feature_shape = split_feature.shape 33 | 34 | # the maximum M is 5 as most 5 cavs 35 | padding_len = max_len - feature_shape[0] 36 | mask.append([1] * feature_shape[0] + [0] * padding_len) 37 | 38 | padding_tensor = torch.zeros(padding_len, feature_shape[1], 39 | feature_shape[2], feature_shape[3]) 40 | padding_tensor = padding_tensor.to(split_feature.device) 41 | 42 | split_feature = torch.cat([split_feature, padding_tensor], 43 | dim=0) 44 | 45 | # 1, 5C, H, W 46 | split_feature = split_feature.view(-1, 47 | feature_shape[2], 48 | feature_shape[3]).unsqueeze(0) 49 | regroup_features.append(split_feature) 50 | 51 | # B, 5C, H, W 52 | regroup_features = torch.cat(regroup_features, dim=0) 53 | # B, L, C, H, W 54 | regroup_features = rearrange(regroup_features, 55 | 'b (l c) h w -> b l c h w', 56 | l=max_len) 57 | mask = torch.from_numpy(np.array(mask)).to(regroup_features.device) 58 | 59 | return regroup_features, mask -------------------------------------------------------------------------------- /v2xvit/models/fuse_modules/how2comm_deformable.py: -------------------------------------------------------------------------------- 1 | from turtle import update 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import numpy as np 6 | from torch.nn import functional as F 7 | from torch import batch_norm, einsum 8 | from einops import rearrange, repeat 9 | from icecream import ic 10 | 11 | from v2xvit.models.sub_modules.torch_transformation_utils import warp_affine_simple 12 | from v2xvit.models.comm_modules.communication import Communication 13 | from v2xvit.models.sub_modules.how2comm_preprocess import How2commPreprocess 14 | from v2xvit.models.fuse_modules.stcformer import STCFormer 15 | 16 | class How2comm(nn.Module): 17 | def __init__(self, args, args_pre): 18 | super(How2comm, self).__init__() 19 | 20 | self.max_cav = 5 21 | self.communication = False 22 | self.round = 1 23 | if 'communication' in args: 24 | self.communication = True 25 | self.naive_communication = Communication(args['communication']) 26 | if 'round' in args['communication']: 27 | self.round = args['communication']['round'] 28 | print("communication:", self.communication) 29 | self.communication_flag = args['communication_flag'] 30 | self.discrete_ratio = args['voxel_size'][0] 31 | self.downsample_rate = args['downsample_rate'] 32 | self.async_flag = True 33 | self.channel_fuse = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=7, stride=1, padding=3) 34 | 35 | self.agg_mode = args['agg_operator']['mode'] 36 | self.multi_scale = args['multi_scale'] 37 | self.how2comm = How2commPreprocess(args_pre, channel=64, delay=1) 38 | if self.multi_scale: 39 | layer_nums = args['layer_nums'] 40 | num_filters = args['num_filters'] 41 | self.num_levels = len(layer_nums) 42 | self.fuse_modules = nn.ModuleList() 43 | for idx in range(self.num_levels): 44 | if self.agg_mode == 'STCFormer': 45 | fuse_network = STCFormer( 46 | channel=num_filters[idx], args=args['temporal_fusion'], idx=idx) 47 | self.fuse_modules.append(fuse_network) 48 | 49 | def regroup(self, x, record_len): 50 | cum_sum_len = torch.cumsum(record_len, dim=0) 51 | split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) 52 | return split_x 53 | 54 | def forward(self, x, psm, record_len, pairwise_t_matrix, backbone=None, heads=None, history=None): 55 | _, C, H, W = x.shape 56 | B, L = pairwise_t_matrix.shape[:2] 57 | 58 | pairwise_t_matrix = pairwise_t_matrix[:, :, :, [ 59 | 0, 1], :][:, :, :, :, [0, 1, 3]] 60 | pairwise_t_matrix[..., 0, 1] = pairwise_t_matrix[..., 0, 1] * H / W 61 | pairwise_t_matrix[..., 1, 0] = pairwise_t_matrix[..., 1, 0] * W / H 62 | pairwise_t_matrix[..., 0, 2] = pairwise_t_matrix[..., 0, 63 | 2] / (self.downsample_rate * self.discrete_ratio * W) * 2 64 | pairwise_t_matrix[..., 1, 2] = pairwise_t_matrix[..., 1, 65 | 2] / (self.downsample_rate * self.discrete_ratio * H) * 2 66 | 67 | 68 | if history and self.async_flag: 69 | feat_final, offset_loss = self.how2comm(x, history, record_len, backbone, heads) 70 | x = feat_final 71 | else: 72 | offset_loss = torch.zeros(1).to(x.device) 73 | neighbor_psm_list = [] 74 | if history: 75 | his = history[0] 76 | else: 77 | his = x 78 | 79 | if self.multi_scale: 80 | ups = [] 81 | ups_temporal = [] 82 | ups_exclusive = [] 83 | ups_common = [] 84 | with_resnet = True if hasattr(backbone, 'resnet') else False 85 | if with_resnet: 86 | feats = backbone.resnet(x) 87 | history_feats = backbone.resnet(his) 88 | 89 | for i in range(self.num_levels): 90 | x = feats[i] if with_resnet else backbone.blocks[i](x) 91 | his = history_feats[i] if with_resnet else backbone.blocks[i](his) 92 | 93 | if i == 0: 94 | if self.communication: 95 | batch_confidence_maps = self.regroup(psm, record_len) 96 | _, _, confidence_maps = self.naive_communication(batch_confidence_maps) 97 | 98 | batch_temp_features = self.regroup(x, record_len) 99 | batch_temp_features_his = self.regroup(his, record_len) 100 | temp_list = [] 101 | temp_psm_list = [] 102 | history_list = [] 103 | for b in range(B): 104 | N = record_len[b] 105 | t_matrix = pairwise_t_matrix[b][:N, :N, :, :] 106 | temp_features = batch_temp_features[b] 107 | C, H, W = temp_features.shape[1:] 108 | neighbor_feature = warp_affine_simple(temp_features, 109 | t_matrix[0, 110 | :, :, :], 111 | (H, W)) 112 | temp_list.append(neighbor_feature) 113 | 114 | temp_features_his = batch_temp_features_his[b] 115 | C, H, W = temp_features_his.shape[1:] 116 | neighbor_feature_his = warp_affine_simple(temp_features_his, 117 | t_matrix[0, 118 | :, :, :], 119 | (H, W)) 120 | history_list.append(neighbor_feature_his) 121 | 122 | temp_psm_list.append(warp_affine_simple(confidence_maps[b], t_matrix[0, :, :, :], (H, W))) 123 | x = torch.cat(temp_list, dim=0) 124 | his = torch.cat(history_list, dim=0) 125 | if self.communication_flag: 126 | sparse_feats, commu_loss, communication_rates, sparse_history = self.how2comm.communication( 127 | x, record_len,history_list,temp_psm_list) 128 | x = F.interpolate(sparse_feats, scale_factor=1, mode='bilinear', align_corners=False) 129 | x = self.channel_fuse(x) 130 | his = F.interpolate(sparse_history, scale_factor=1, mode='bilinear', align_corners=False) 131 | his = self.channel_fuse(his) 132 | else: 133 | communication_rates = torch.tensor(0).to(x.device) 134 | commu_loss = torch.zeros(1).to(x.device) 135 | else: 136 | communication_rates = torch.tensor(0).to(x.device) 137 | 138 | batch_node_features = self.regroup(x, record_len) 139 | batch_node_features_his = self.regroup(his, record_len) 140 | 141 | x_fuse = [] 142 | x_temporal = [] 143 | x_exclusive = [] 144 | x_common = [] 145 | for b in range(B): 146 | N = record_len[b] 147 | t_matrix = pairwise_t_matrix[b][:N, :N, :, :] 148 | node_features = batch_node_features[b] 149 | node_features_his = batch_node_features_his[b] 150 | if i == 0: 151 | neighbor_feature = node_features 152 | neighbor_feature_his = node_features_his 153 | neighbor_psm = warp_affine_simple( 154 | confidence_maps[b], t_matrix[0, :, :, :], (H, W)) 155 | 156 | else: 157 | C, H, W = node_features.shape[1:] 158 | neighbor_feature = warp_affine_simple(node_features, 159 | t_matrix[0, 160 | :, :, :], 161 | (H, W)) 162 | neighbor_feature_his = warp_affine_simple(node_features_his, 163 | t_matrix[0, 164 | :, :, :], 165 | (H, W)) 166 | 167 | feature_shape = neighbor_feature.shape 168 | padding_len = self.max_cav - feature_shape[0] 169 | padding_feature = torch.zeros(padding_len, feature_shape[1], 170 | feature_shape[2], feature_shape[3]) 171 | padding_feature = padding_feature.to( 172 | neighbor_feature.device) 173 | neighbor_feature = torch.cat([neighbor_feature, padding_feature], 174 | dim=0) 175 | 176 | if i == 0: 177 | padding_map = torch.zeros( 178 | padding_len, 1, feature_shape[2], feature_shape[3]) 179 | padding_map = padding_map.to(neighbor_feature.device) 180 | neighbor_psm = torch.cat( 181 | [neighbor_psm, padding_map], dim=0) 182 | neighbor_psm_list.append(neighbor_psm) 183 | 184 | if self.agg_mode == "STCFormer": 185 | fusion, output_list = self.fuse_modules[i](neighbor_feature, neighbor_psm_list[b], neighbor_feature_his, i) 186 | x_fuse.append(fusion) 187 | x_temporal.append(output_list[0]) 188 | x_exclusive.append(output_list[1]) 189 | x_common.append(output_list[2]) 190 | 191 | x_fuse = torch.stack(x_fuse) 192 | x_temporal = torch.stack(x_temporal) 193 | x_exclusive = torch.stack(x_exclusive) 194 | x_common = torch.stack(x_common) 195 | 196 | if len(backbone.deblocks) > 0: 197 | ups.append(backbone.deblocks[i](x_fuse)) 198 | ups_temporal.append(backbone.deblocks[i](x_temporal)) 199 | ups_exclusive.append(backbone.deblocks[i](x_exclusive)) 200 | ups_common.append(backbone.deblocks[i](x_common)) 201 | else: 202 | ups.append(x_fuse) 203 | 204 | if len(ups) > 1: 205 | x_fuse = torch.cat(ups, dim=1) 206 | x_temporal = torch.cat(ups_temporal, dim=1) 207 | x_exclusive = torch.cat(ups_exclusive, dim=1) 208 | x_common = torch.cat(ups_common, dim=1) 209 | elif len(ups) == 1: 210 | x_fuse = ups[0] 211 | 212 | if len(backbone.deblocks) > self.num_levels: 213 | x_fuse = backbone.deblocks[-1](x_fuse) 214 | 215 | return x_fuse, communication_rates, {}, offset_loss, commu_loss, None, [x_temporal, x_exclusive, x_common] 216 | -------------------------------------------------------------------------------- /v2xvit/models/fuse_modules/stcformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | from torch import nn 4 | import torch.nn.functional as F 5 | from icecream import ic 6 | import numpy as np 7 | from v2xvit.models.fuse_modules.how2comm_deformable_transformer import RPN_transformer_deformable_mtf_singlescale 8 | 9 | 10 | class ScaledDotProductAttention(nn.Module): 11 | def __init__(self, dim): 12 | super(ScaledDotProductAttention, self).__init__() 13 | self.sqrt_dim = np.sqrt(dim) 14 | 15 | def forward(self, query, key, value): 16 | score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim 17 | attn = F.softmax(score, -1) 18 | context = torch.bmm(attn, value) 19 | return context 20 | 21 | 22 | class TemporalAttention(nn.Module): 23 | def __init__(self, feature_dim): 24 | super(TemporalAttention, self).__init__() 25 | self.att = ScaledDotProductAttention(feature_dim) 26 | self.hidden_dim = feature_dim * 2 27 | self.conv_query = nn.Conv2d( 28 | feature_dim, self.hidden_dim, kernel_size=3, padding=1) 29 | self.conv_key = nn.Conv2d( 30 | feature_dim, self.hidden_dim, kernel_size=3, padding=1) 31 | self.conv_value = nn.Conv2d( 32 | feature_dim, self.hidden_dim, kernel_size=3, padding=1) 33 | self.conv_temporal_key = nn.Conv1d( 34 | self.hidden_dim, self.hidden_dim, kernel_size=1, stride=1) 35 | self.conv_temporal_value = nn.Conv1d( 36 | self.hidden_dim, self.hidden_dim, kernel_size=1, stride=1) 37 | self.pool = nn.AdaptiveAvgPool2d((1, 1)) 38 | self.conv_feat = nn.Conv2d( 39 | self.hidden_dim, feature_dim, kernel_size=3, padding=1) 40 | 41 | def forward(self, x): 42 | frame, C, H, W = x.shape 43 | ego = x[:1] 44 | query = self.conv_query(ego) 45 | query = query.view(1, self.hidden_dim, -1).permute(2, 0, 1) 46 | 47 | 48 | key = self.conv_key(x) 49 | key_avg = key 50 | value = self.conv_value(x) 51 | val_avg = value 52 | key = key.view(frame, self.hidden_dim, -1).permute(2, 0, 1) 53 | value = value.view(frame, self.hidden_dim, - 54 | 1).permute(2, 0, 1) 55 | 56 | 57 | key_avg = self.pool(key_avg).squeeze(-1).squeeze(-1) 58 | val_avg = self.pool(val_avg).squeeze(-1).squeeze(-1) 59 | key_avg = self.conv_temporal_key( 60 | key_avg.unsqueeze(0).permute(0, 2, 1)) 61 | val_avg = self.conv_temporal_value( 62 | val_avg.unsqueeze(0).permute(0, 2, 1)) 63 | key_avg = key_avg.permute(0, 2, 1) 64 | val_avg = val_avg.permute(0, 2, 1) 65 | key = key * key_avg 66 | value = value * val_avg 67 | 68 | 69 | x = self.att(query, key, value) 70 | x = x.permute(1, 2, 0).view(1, self.hidden_dim, H, W) 71 | out = self.conv_feat(x) 72 | 73 | return out 74 | 75 | 76 | class LateFusion(nn.Module): 77 | def __init__(self, channel): 78 | super(LateFusion, self).__init__() 79 | self.channel = channel 80 | self.gate_1 = nn.Conv2d( 81 | self.channel, 1, kernel_size=3, stride=1, padding=1) 82 | self.gate_2 = nn.Conv2d( 83 | self.channel, 1, kernel_size=3, stride=1, padding=1) 84 | 85 | def forward(self, exc, com): 86 | weight_1 = self.gate_1(exc) 87 | weight_2 = self.gate_2(com) 88 | weights = torch.cat([weight_1, weight_2], dim=1) 89 | weights = torch.softmax(weights, dim=1) 90 | final = weights[:, :1, :, :] * exc + \ 91 | weights[:, 1:, :, :] * com # 92 | 93 | return final 94 | 95 | 96 | class Decoupling(nn.Module): 97 | def __init__(self): 98 | super(Decoupling, self).__init__() 99 | self.exclusive_thre = 0.01 100 | self.common_thre = 0.01 101 | 102 | def forward(self, feat, confidence): 103 | 104 | ego_confi = confidence[:1] 105 | exclusive_list = [] 106 | exclusive_map_list = [ego_confi] 107 | common_list = [] 108 | common_map_list = [ego_confi] 109 | for n in range(1, feat.shape[0]): 110 | exclusive_map = (1 - ego_confi) * \ 111 | confidence[n].unsqueeze(0) # 112 | exclusive_map_list.append(exclusive_map) 113 | common_map = ego_confi * confidence[n].unsqueeze(0) 114 | common_map_list.append(common_map) 115 | ones_mask = torch.ones_like(exclusive_map).to(exclusive_map.device) 116 | zeros_mask = torch.zeros_like( 117 | exclusive_map).to(exclusive_map.device) 118 | exclusive_mask = torch.where( 119 | exclusive_map > self.exclusive_thre, ones_mask, zeros_mask) 120 | common_mask = torch.where( 121 | common_map > self.common_thre, ones_mask, zeros_mask) 122 | 123 | exclusive_list.append(feat[n].unsqueeze(0) * exclusive_mask) 124 | common_list.append(feat[n].unsqueeze(0) * common_mask) 125 | 126 | return torch.cat(exclusive_list, dim=0), torch.cat(common_list, dim=0), torch.cat(exclusive_map_list, dim=0), torch.cat(common_map_list, dim=0) 127 | 128 | 129 | class FeedForward(nn.Module): 130 | def __init__(self, dim, hidden_dim, dropout=0.): 131 | super().__init__() 132 | self.net = nn.Sequential( 133 | nn.Linear(dim, hidden_dim), 134 | nn.GELU(), 135 | nn.Dropout(dropout), 136 | nn.Linear(hidden_dim, dim), 137 | nn.Dropout(dropout) 138 | ) 139 | 140 | def forward(self, x): 141 | return self.net(x) 142 | 143 | 144 | class STCFormer(nn.Module): 145 | def __init__(self, channel, args, idx): 146 | super(STCFormer, self).__init__() 147 | 148 | self.decoupling = Decoupling() 149 | self.scale = [1, 0.5, 0.25] 150 | self.temporal_self_attention = TemporalAttention(channel) 151 | self.layer_norm = nn.LayerNorm( 152 | [channel, args['height'][idx], args['width'][idx]]) 153 | self.exclusive_encoder = RPN_transformer_deformable_mtf_singlescale( 154 | channel=channel, points=9) 155 | self.common_encoder = RPN_transformer_deformable_mtf_singlescale( 156 | channel=channel, points=3) 157 | self.late_fusion = LateFusion(channel=channel) 158 | self.time_embedding = nn.Linear(1, channel) 159 | 160 | def forward(self, neighbor_feat, neighbor_confidence, history_feat, level): 161 | if level > 0: 162 | neighbor_confidence = F.interpolate( 163 | neighbor_confidence, scale_factor=self.scale[level]) 164 | exclusive_feat, common_feat, exclusive_map, common_map = self.decoupling( 165 | neighbor_feat, neighbor_confidence) 166 | 167 | ego_feat = neighbor_feat[:1] 168 | history_feat = torch.cat([ego_feat, history_feat], dim=0) 169 | 170 | delay = [0.0] + [-1.0] * (history_feat.shape[0] -1) 171 | delay = torch.tensor([delay]).to(ego_feat.device) 172 | time_embed = self.time_embedding(delay[:, :, None]) 173 | time_embed = time_embed.reshape(history_feat.shape[0], -1, 1, 1) 174 | history_feat = history_feat + time_embed 175 | 176 | x = self.temporal_self_attention(history_feat) 177 | ego_feat = x 178 | temporal_feat = ego_feat 179 | 180 | exclusive_feat = torch.cat( 181 | [ego_feat, exclusive_feat], dim=0) 182 | common_feat = torch.cat([ego_feat, common_feat], dim=0) 183 | ego_exclusive_feat = self.exclusive_encoder( 184 | exclusive_feat, exclusive_map).unsqueeze(0) 185 | ego_common_feat = self.common_encoder( 186 | common_feat, common_map).unsqueeze(0) 187 | 188 | 189 | x = self.late_fusion(ego_exclusive_feat, ego_common_feat) 190 | ego_feat = x 191 | 192 | return ego_feat[0], [temporal_feat[0], ego_exclusive_feat[0], ego_common_feat[0]] 193 | -------------------------------------------------------------------------------- /v2xvit/models/point_pillar_how2comm.py: -------------------------------------------------------------------------------- 1 | from numpy import record 2 | import torch.nn as nn 3 | 4 | from v2xvit.models.sub_modules.pillar_vfe import PillarVFE 5 | from v2xvit.models.sub_modules.point_pillar_scatter import PointPillarScatter 6 | from v2xvit.models.sub_modules.base_bev_backbone import BaseBEVBackbone 7 | from v2xvit.models.sub_modules.base_bev_backbone_resnet import ResNetBEVBackbone 8 | from v2xvit.models.sub_modules.downsample_conv import DownsampleConv 9 | from v2xvit.models.sub_modules.naive_compress import NaiveCompressor 10 | from v2xvit.models.fuse_modules.how2comm_deformable import How2comm 11 | import torch 12 | from v2xvit.models.sub_modules.torch_transformation_utils import warp_affine_simple 13 | 14 | def transform_feature(feature_list, delay): 15 | return feature_list[delay] 16 | 17 | 18 | class PointPillarHow2comm(nn.Module): 19 | def __init__(self, args): 20 | super(PointPillarHow2comm, self).__init__() 21 | 22 | self.pillar_vfe = PillarVFE(args['pillar_vfe'], 23 | num_point_features=4, 24 | voxel_size=args['voxel_size'], 25 | point_cloud_range=args['lidar_range']) 26 | self.scatter = PointPillarScatter(args['point_pillar_scatter']) 27 | if 'resnet' in args['base_bev_backbone']: 28 | self.backbone = ResNetBEVBackbone(args['base_bev_backbone'], 64) 29 | else: 30 | self.backbone = BaseBEVBackbone(args['base_bev_backbone'], 64) 31 | 32 | # used to downsample the feature map for efficient computation 33 | self.shrink_flag = False 34 | if 'shrink_header' in args: 35 | self.shrink_flag = True 36 | self.shrink_conv = DownsampleConv(args['shrink_header']) 37 | self.compression = False 38 | 39 | if args['compression'] > 0: 40 | self.compression = True 41 | self.naive_compressor = NaiveCompressor(256, args['compression']) 42 | 43 | self.dcn = False 44 | 45 | self.fusion_net = How2comm(args['fusion_args'], args) 46 | self.frame = args['fusion_args']['frame'] 47 | self.delay = 1 48 | self.discrete_ratio = args['fusion_args']['voxel_size'][0] 49 | self.downsample_rate = args['fusion_args']['downsample_rate'] 50 | self.multi_scale = args['fusion_args']['multi_scale'] 51 | 52 | self.cls_head = nn.Conv2d(128 * 2, args['anchor_number'], 53 | kernel_size=1) 54 | self.reg_head = nn.Conv2d(128 * 2, 7 * args['anchor_number'], 55 | kernel_size=1) 56 | if args['backbone_fix']: 57 | self.backbone_fix() 58 | 59 | def backbone_fix(self): 60 | """ 61 | Fix the parameters of backbone during finetune on timedelay。 62 | """ 63 | for p in self.pillar_vfe.parameters(): 64 | p.requires_grad = False 65 | 66 | for p in self.scatter.parameters(): 67 | p.requires_grad = False 68 | 69 | for p in self.backbone.parameters(): 70 | p.requires_grad = False 71 | 72 | if self.compression: 73 | for p in self.naive_compressor.parameters(): 74 | p.requires_grad = False 75 | if self.shrink_flag: 76 | for p in self.shrink_conv.parameters(): 77 | p.requires_grad = False 78 | 79 | for p in self.cls_head.parameters(): 80 | p.requires_grad = False 81 | for p in self.reg_head.parameters(): 82 | p.requires_grad = False 83 | 84 | def regroup(self, x, record_len): 85 | cum_sum_len = torch.cumsum(record_len, dim=0) 86 | split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) 87 | return split_x 88 | 89 | def forward(self, data_dict_list): 90 | batch_dict_list = [] 91 | feature_list = [] 92 | feature_2d_list = [] 93 | matrix_list = [] 94 | regroup_feature_list = [] 95 | regroup_feature_list_large = [] 96 | 97 | 98 | for origin_data in data_dict_list: 99 | data_dict = origin_data['ego'] 100 | voxel_features = data_dict['processed_lidar']['voxel_features'] 101 | voxel_coords = data_dict['processed_lidar']['voxel_coords'] 102 | voxel_num_points = data_dict['processed_lidar']['voxel_num_points'] 103 | record_len = data_dict['record_len'] 104 | 105 | pairwise_t_matrix = data_dict['pairwise_t_matrix'] 106 | batch_dict = {'voxel_features': voxel_features, 107 | 'voxel_coords': voxel_coords, 108 | 'voxel_num_points': voxel_num_points, 109 | 'record_len': record_len} 110 | # n, 4 -> n, c encoding voxel feature using point-pillar method 111 | batch_dict = self.pillar_vfe(batch_dict) 112 | # n, c -> N, C, H, W 113 | batch_dict = self.scatter(batch_dict) 114 | batch_dict = self.backbone(batch_dict) 115 | # N, C, H', W' 116 | spatial_features_2d = batch_dict['spatial_features_2d'] 117 | 118 | # downsample feature to reduce memory 119 | if self.shrink_flag: 120 | spatial_features_2d = self.shrink_conv(spatial_features_2d) 121 | # compressor 122 | if self.compression: 123 | spatial_features_2d = self.naive_compressor( 124 | spatial_features_2d) 125 | # dcn 126 | if self.dcn: 127 | spatial_features_2d = self.dcn_net(spatial_features_2d) 128 | 129 | batch_dict_list.append(batch_dict) 130 | spatial_features = batch_dict['spatial_features'] 131 | feature_list.append(spatial_features) 132 | feature_2d_list.append(spatial_features_2d) 133 | matrix_list.append(pairwise_t_matrix) 134 | regroup_feature_list.append(self.regroup( 135 | spatial_features_2d, record_len)) 136 | regroup_feature_list_large.append( 137 | self.regroup(spatial_features, record_len)) 138 | 139 | pairwise_t_matrix = matrix_list[0].clone().detach() 140 | 141 | 142 | history_feature = transform_feature(regroup_feature_list_large, self.delay) 143 | spatial_features = feature_list[0] 144 | spatial_features_2d = feature_2d_list[0] 145 | batch_dict = batch_dict_list[0] 146 | record_len = batch_dict['record_len'] 147 | psm_single = self.cls_head(spatial_features_2d) 148 | 149 | if self.delay == 0: 150 | fused_feature, communication_rates, result_dict, offset_loss, commu_loss, _, _ = self.fusion_net(spatial_features, psm_single, record_len,pairwise_t_matrix,self.backbone,[self.shrink_conv, self.cls_head, self.reg_head]) 151 | elif self.delay > 0: 152 | fused_feature, communication_rates, result_dict, offset_loss, commu_loss, _, _ = self.fusion_net(spatial_features, psm_single,record_len,pairwise_t_matrix,self.backbone,[self.shrink_conv, self.cls_head, self.reg_head], history=history_feature) 153 | if self.shrink_flag: 154 | fused_feature = self.shrink_conv(fused_feature) 155 | 156 | psm = self.cls_head(fused_feature) 157 | rm = self.reg_head(fused_feature) 158 | 159 | output_dict = {'psm': psm, 160 | 'rm': rm 161 | } 162 | 163 | output_dict.update(result_dict) 164 | output_dict.update({'comm_rate': communication_rates, 165 | "offset_loss": offset_loss, 166 | 'commu_loss': commu_loss 167 | }) 168 | return output_dict 169 | -------------------------------------------------------------------------------- /v2xvit/models/sub_modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/models/sub_modules/__init__.py -------------------------------------------------------------------------------- /v2xvit/models/sub_modules/base_bev_backbone.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class BaseBEVBackbone(nn.Module): 7 | def __init__(self, model_cfg, input_channels): 8 | super().__init__() 9 | self.model_cfg = model_cfg 10 | 11 | if 'layer_nums' in self.model_cfg: 12 | 13 | assert len(self.model_cfg['layer_nums']) == \ 14 | len(self.model_cfg['layer_strides']) == \ 15 | len(self.model_cfg['num_filters']) 16 | 17 | layer_nums = self.model_cfg['layer_nums'] 18 | layer_strides = self.model_cfg['layer_strides'] 19 | num_filters = self.model_cfg['num_filters'] 20 | else: 21 | layer_nums = layer_strides = num_filters = [] 22 | 23 | if 'upsample_strides' in self.model_cfg: 24 | assert len(self.model_cfg['upsample_strides']) \ 25 | == len(self.model_cfg['num_upsample_filter']) 26 | 27 | num_upsample_filters = self.model_cfg['num_upsample_filter'] 28 | upsample_strides = self.model_cfg['upsample_strides'] 29 | 30 | else: 31 | upsample_strides = num_upsample_filters = [] 32 | 33 | num_levels = len(layer_nums) 34 | c_in_list = [input_channels, *num_filters[:-1]] 35 | 36 | self.blocks = nn.ModuleList() 37 | self.deblocks = nn.ModuleList() 38 | 39 | for idx in range(num_levels): 40 | cur_layers = [ 41 | nn.ZeroPad2d(1), 42 | nn.Conv2d( 43 | c_in_list[idx], num_filters[idx], kernel_size=3, 44 | stride=layer_strides[idx], padding=0, bias=False 45 | ), 46 | nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01), 47 | nn.ReLU() 48 | ] 49 | for k in range(layer_nums[idx]): 50 | cur_layers.extend([ 51 | nn.Conv2d(num_filters[idx], num_filters[idx], 52 | kernel_size=3, padding=1, bias=False), 53 | nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01), 54 | nn.ReLU() 55 | ]) 56 | 57 | self.blocks.append(nn.Sequential(*cur_layers)) 58 | if len(upsample_strides) > 0: 59 | stride = upsample_strides[idx] 60 | if stride >= 1: 61 | self.deblocks.append(nn.Sequential( 62 | nn.ConvTranspose2d( 63 | num_filters[idx], num_upsample_filters[idx], 64 | upsample_strides[idx], 65 | stride=upsample_strides[idx], bias=False 66 | ), 67 | nn.BatchNorm2d(num_upsample_filters[idx], 68 | eps=1e-3, momentum=0.01), 69 | nn.ReLU() 70 | )) 71 | else: 72 | stride = np.round(1 / stride).astype(np.int) 73 | self.deblocks.append(nn.Sequential( 74 | nn.Conv2d( 75 | num_filters[idx], num_upsample_filters[idx], 76 | stride, 77 | stride=stride, bias=False 78 | ), 79 | nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, 80 | momentum=0.01), 81 | nn.ReLU() 82 | )) 83 | 84 | c_in = sum(num_upsample_filters) 85 | if len(upsample_strides) > num_levels: 86 | self.deblocks.append(nn.Sequential( 87 | nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1], 88 | stride=upsample_strides[-1], bias=False), 89 | nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01), 90 | nn.ReLU(), 91 | )) 92 | 93 | self.num_bev_features = c_in 94 | 95 | def forward(self, data_dict): 96 | spatial_features = data_dict['spatial_features'] 97 | 98 | ups = [] 99 | ret_dict = {} 100 | x = spatial_features 101 | 102 | for i in range(len(self.blocks)): 103 | x = self.blocks[i](x) 104 | 105 | stride = int(spatial_features.shape[2] / x.shape[2]) 106 | ret_dict['spatial_features_%dx' % stride] = x 107 | 108 | if len(self.deblocks) > 0: 109 | ups.append(self.deblocks[i](x)) 110 | else: 111 | ups.append(x) 112 | 113 | if len(ups) > 1: 114 | x = torch.cat(ups, dim=1) 115 | elif len(ups) == 1: 116 | x = ups[0] 117 | 118 | if len(self.deblocks) > len(self.blocks): 119 | x = self.deblocks[-1](x) 120 | 121 | data_dict['spatial_features_2d'] = x 122 | return data_dict 123 | -------------------------------------------------------------------------------- /v2xvit/models/sub_modules/base_bev_backbone_resnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from v2xvit.models.sub_modules.resblock import ResNetModified, BasicBlock 6 | 7 | DEBUG = False 8 | 9 | class ResNetBEVBackbone(nn.Module): 10 | def __init__(self, model_cfg, input_channels): 11 | super().__init__() 12 | self.model_cfg = model_cfg 13 | 14 | if 'layer_nums' in self.model_cfg: 15 | 16 | assert len(self.model_cfg['layer_nums']) == \ 17 | len(self.model_cfg['layer_strides']) == \ 18 | len(self.model_cfg['num_filters']) 19 | 20 | layer_nums = self.model_cfg['layer_nums'] 21 | layer_strides = self.model_cfg['layer_strides'] 22 | num_filters = self.model_cfg['num_filters'] 23 | else: 24 | layer_nums = layer_strides = num_filters = [] 25 | 26 | if 'upsample_strides' in self.model_cfg: 27 | assert len(self.model_cfg['upsample_strides']) \ 28 | == len(self.model_cfg['num_upsample_filter']) 29 | 30 | num_upsample_filters = self.model_cfg['num_upsample_filter'] 31 | upsample_strides = self.model_cfg['upsample_strides'] 32 | 33 | else: 34 | upsample_strides = num_upsample_filters = [] 35 | 36 | self.resnet = ResNetModified(BasicBlock, 37 | layer_nums, 38 | layer_strides, 39 | num_filters) 40 | 41 | num_levels = len(layer_nums) 42 | self.num_levels = len(layer_nums) 43 | self.deblocks = nn.ModuleList() 44 | 45 | for idx in range(num_levels): 46 | if len(upsample_strides) > 0: 47 | stride = upsample_strides[idx] 48 | if stride >= 1: 49 | self.deblocks.append(nn.Sequential( 50 | nn.ConvTranspose2d( 51 | num_filters[idx], num_upsample_filters[idx], 52 | upsample_strides[idx], 53 | stride=upsample_strides[idx], bias=False 54 | ), 55 | nn.BatchNorm2d(num_upsample_filters[idx], 56 | eps=1e-3, momentum=0.01), 57 | nn.ReLU() 58 | )) 59 | else: 60 | stride = np.round(1 / stride).astype(np.int) 61 | self.deblocks.append(nn.Sequential( 62 | nn.Conv2d( 63 | num_filters[idx], num_upsample_filters[idx], 64 | stride, 65 | stride=stride, bias=False 66 | ), 67 | nn.BatchNorm2d(num_upsample_filters[idx], eps=1e-3, 68 | momentum=0.01), 69 | nn.ReLU() 70 | )) 71 | 72 | c_in = sum(num_upsample_filters) 73 | if len(upsample_strides) > num_levels: 74 | self.deblocks.append(nn.Sequential( 75 | nn.ConvTranspose2d(c_in, c_in, upsample_strides[-1], 76 | stride=upsample_strides[-1], bias=False), 77 | nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01), 78 | nn.ReLU(), 79 | )) 80 | 81 | self.num_bev_features = c_in 82 | 83 | def forward(self, data_dict): 84 | spatial_features = data_dict['spatial_features'] 85 | 86 | x = self.resnet(spatial_features) # tuple of features 87 | ups = [] 88 | 89 | for i in range(self.num_levels): 90 | if len(self.deblocks) > 0: 91 | ups.append(self.deblocks[i](x[i])) 92 | else: 93 | ups.append(x[i]) 94 | 95 | if len(ups) > 1: 96 | x = torch.cat(ups, dim=1) 97 | elif len(ups) == 1: 98 | x = ups[0] 99 | 100 | if len(self.deblocks) > self.num_levels: 101 | x = self.deblocks[-1](x) 102 | 103 | data_dict['spatial_features_2d'] = x 104 | return data_dict 105 | -------------------------------------------------------------------------------- /v2xvit/models/sub_modules/base_transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from einops import rearrange 5 | 6 | 7 | class PreNorm(nn.Module): 8 | def __init__(self, dim, fn): 9 | super().__init__() 10 | self.norm = nn.LayerNorm(dim) 11 | self.fn = fn 12 | 13 | def forward(self, x, **kwargs): 14 | return self.fn(self.norm(x), **kwargs) 15 | 16 | 17 | class FeedForward(nn.Module): 18 | def __init__(self, dim, hidden_dim, dropout=0.): 19 | super().__init__() 20 | self.net = nn.Sequential( 21 | nn.Linear(dim, hidden_dim), 22 | nn.GELU(), 23 | nn.Dropout(dropout), 24 | nn.Linear(hidden_dim, dim), 25 | nn.Dropout(dropout) 26 | ) 27 | 28 | def forward(self, x): 29 | return self.net(x) 30 | 31 | 32 | class CavAttention(nn.Module): 33 | """ 34 | Vanilla CAV attention. 35 | """ 36 | def __init__(self, dim, heads, dim_head=64, dropout=0.1): 37 | super().__init__() 38 | inner_dim = heads * dim_head 39 | 40 | self.heads = heads 41 | self.scale = dim_head ** -0.5 42 | 43 | self.attend = nn.Softmax(dim=-1) 44 | self.to_qkv = nn.Linear(dim, inner_dim * 3, bias=False) 45 | 46 | self.to_out = nn.Sequential( 47 | nn.Linear(inner_dim, dim), 48 | nn.Dropout(dropout) 49 | ) 50 | 51 | def forward(self, x, mask, prior_encoding): 52 | # x: (B, L, H, W, C) -> (B, H, W, L, C) 53 | # mask: (B, L) 54 | x = x.permute(0, 2, 3, 1, 4) 55 | # mask: (B, 1, H, W, L, 1) 56 | mask = mask.unsqueeze(1) 57 | 58 | # qkv: [(B, H, W, L, C_inner) *3] 59 | qkv = self.to_qkv(x).chunk(3, dim=-1) 60 | # q: (B, M, H, W, L, C) 61 | q, k, v = map(lambda t: rearrange(t, 'b h w l (m c) -> b m h w l c', 62 | m=self.heads), qkv) 63 | 64 | # attention, (B, M, H, W, L, L) 65 | att_map = torch.einsum('b m h w i c, b m h w j c -> b m h w i j', 66 | q, k) * self.scale 67 | # add mask 68 | att_map = att_map.masked_fill(mask == 0, -float('inf')) 69 | # softmax 70 | att_map = self.attend(att_map) 71 | 72 | # out:(B, M, H, W, L, C_head) 73 | out = torch.einsum('b m h w i j, b m h w j c -> b m h w i c', att_map, 74 | v) 75 | out = rearrange(out, 'b m h w l c -> b h w l (m c)', 76 | m=self.heads) 77 | out = self.to_out(out) 78 | # (B L H W C) 79 | out = out.permute(0, 3, 1, 2, 4) 80 | return out 81 | 82 | 83 | class BaseEncoder(nn.Module): 84 | def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout=0.): 85 | super().__init__() 86 | self.layers = nn.ModuleList([]) 87 | for _ in range(depth): 88 | self.layers.append(nn.ModuleList([ 89 | PreNorm(dim, CavAttention(dim, 90 | heads=heads, 91 | dim_head=dim_head, 92 | dropout=dropout)), 93 | PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)) 94 | ])) 95 | 96 | def forward(self, x, mask): 97 | for attn, ff in self.layers: 98 | x = attn(x, mask=mask) + x 99 | x = ff(x) + x 100 | return x 101 | 102 | 103 | class BaseTransformer(nn.Module): 104 | def __init__(self, args): 105 | super().__init__() 106 | 107 | dim = args['dim'] 108 | depth = args['depth'] 109 | heads = args['heads'] 110 | dim_head = args['dim_head'] 111 | mlp_dim = args['mlp_dim'] 112 | dropout = args['dropout'] 113 | max_cav = args['max_cav'] 114 | 115 | self.encoder = BaseEncoder(dim, depth, heads, dim_head, mlp_dim, 116 | dropout) 117 | 118 | def forward(self, x, mask): 119 | # B, L, H, W, C 120 | output = self.encoder(x, mask) 121 | # B, H, W, C 122 | output = output[:, 0] 123 | 124 | return output -------------------------------------------------------------------------------- /v2xvit/models/sub_modules/downsample_conv.py: -------------------------------------------------------------------------------- 1 | """ 2 | Class used to downsample features by 3*3 conv 3 | """ 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | 9 | class DoubleConv(nn.Module): 10 | """ 11 | Double convoltuion 12 | Args: 13 | in_channels: input channel num 14 | out_channels: output channel num 15 | """ 16 | 17 | def __init__(self, in_channels, out_channels, kernel_size, 18 | stride, padding): 19 | super().__init__() 20 | self.double_conv = nn.Sequential( 21 | nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, 22 | stride=stride, padding=padding), 23 | nn.ReLU(inplace=True), 24 | nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1), 25 | nn.ReLU(inplace=True) 26 | ) 27 | 28 | def forward(self, x): 29 | return self.double_conv(x) 30 | 31 | 32 | class DownsampleConv(nn.Module): 33 | def __init__(self, config): 34 | super(DownsampleConv, self).__init__() 35 | self.layers = nn.ModuleList([]) 36 | input_dim = config['input_dim'] 37 | 38 | for (ksize, dim, stride, padding) in zip(config['kernal_size'], 39 | config['dim'], 40 | config['stride'], 41 | config['padding']): 42 | self.layers.append(DoubleConv(input_dim, 43 | dim, 44 | kernel_size=ksize, 45 | stride=stride, 46 | padding=padding)) 47 | input_dim = dim 48 | 49 | def forward(self, x): 50 | for i in range(len(self.layers)): 51 | x = self.layers[i](x) 52 | return x -------------------------------------------------------------------------------- /v2xvit/models/sub_modules/fuse_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | from einops import rearrange 5 | from v2xvit.utils.common_utils import torch_tensor_to_numpy 6 | 7 | 8 | def regroup(dense_feature, record_len, max_len): 9 | """ 10 | Regroup the data based on the record_len. 11 | 12 | Parameters 13 | ---------- 14 | dense_feature : torch.Tensor 15 | N, C, H, W 16 | record_len : list 17 | [sample1_len, sample2_len, ...] 18 | max_len : int 19 | Maximum cav number 20 | 21 | Returns 22 | ------- 23 | regroup_feature : torch.Tensor 24 | B, L, C, H, W 25 | """ 26 | cum_sum_len = list(np.cumsum(torch_tensor_to_numpy(record_len))) 27 | split_features = torch.tensor_split(dense_feature, 28 | cum_sum_len[:-1]) 29 | regroup_features = [] 30 | mask = [] 31 | 32 | for split_feature in split_features: 33 | # M, C, H, W 34 | feature_shape = split_feature.shape 35 | 36 | # the maximum M is 5 as most 5 cavs 37 | padding_len = max_len - feature_shape[0] 38 | mask.append([1] * feature_shape[0] + [0] * padding_len) 39 | 40 | padding_tensor = torch.zeros(padding_len, feature_shape[1], 41 | feature_shape[2], feature_shape[3]) 42 | padding_tensor = padding_tensor.to(split_feature.device) 43 | 44 | split_feature = torch.cat([split_feature, padding_tensor], 45 | dim=0) 46 | 47 | # 1, 5C, H, W 48 | split_feature = split_feature.view(-1, 49 | feature_shape[2], 50 | feature_shape[3]).unsqueeze(0) 51 | regroup_features.append(split_feature) 52 | 53 | # B, 5C, H, W 54 | regroup_features = torch.cat(regroup_features, dim=0) 55 | # B, L, C, H, W 56 | regroup_features = rearrange(regroup_features, 57 | 'b (l c) h w -> b l c h w', 58 | l=max_len) 59 | mask = torch.from_numpy(np.array(mask)).to(regroup_features.device) 60 | 61 | return regroup_features, mask 62 | -------------------------------------------------------------------------------- /v2xvit/models/sub_modules/how2comm_preprocess.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import functional as F 5 | from v2xvit.models.sub_modules.feature_flow import FlowGenerator, ResNetBEVBackbone 6 | from v2xvit.models.comm_modules.mutual_communication import Communication 7 | 8 | 9 | class How2commPreprocess(nn.Module): 10 | def __init__(self, args, channel, delay): 11 | super(How2commPreprocess, self).__init__() 12 | self.flow_flag = args['flow_flag'] 13 | self.channel = channel 14 | self.frame = args['fusion_args']['frame'] 15 | self.delay = delay 16 | self.flow = FlowGenerator(args) 17 | 18 | self.commu_module = Communication( 19 | args['fusion_args']['communication'], in_planes=self.channel) 20 | 21 | def regroup(self, x, record_len): 22 | cum_sum_len = torch.cumsum(record_len, dim=0) 23 | split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) 24 | return split_x 25 | 26 | def get_grid(self, flow): 27 | m, n = flow.shape[-2:] 28 | shifts_x = torch.arange( 29 | 0, n, 1, dtype=torch.float32, device=flow.device) 30 | shifts_y = torch.arange( 31 | 0, m, 1, dtype=torch.float32, device=flow.device) 32 | shifts_y, shifts_x = torch.meshgrid(shifts_y, shifts_x) 33 | 34 | grid_dst = torch.stack((shifts_x, shifts_y)).unsqueeze(0) 35 | workspace = torch.tensor( 36 | [(n - 1) / 2, (m - 1) / 2]).view(1, 2, 1, 1).to(flow.device) 37 | 38 | flow_grid = ((flow + grid_dst) / workspace - 1).permute(0, 2, 3, 1) 39 | 40 | return flow_grid 41 | 42 | def resample(self, feats, flow): 43 | flow_grid = self.get_grid(flow) 44 | warped_feats = F.grid_sample( 45 | feats, flow_grid, mode="bilinear", padding_mode="border") 46 | 47 | return warped_feats 48 | 49 | def communication(self, feats, record_len, history_list, confidence_map_list): 50 | feat_list = self.regroup(feats, record_len) 51 | sparse_feat_list, commu_loss, commu_rate, sparse_mask = self.commu_module( 52 | feat_list,confidence_map_list) 53 | sparse_feats = torch.cat(sparse_feat_list, dim=0) 54 | sparse_history_list = [] 55 | for i in range(len(sparse_feat_list)): 56 | sparse_history = torch.cat([history_list[i][:1], sparse_feat_list[i][1:]], dim=0) 57 | sparse_history_list.append(sparse_history) 58 | sparse_history = torch.cat(sparse_history_list, dim=0) 59 | return sparse_feats, commu_loss, commu_rate, sparse_history 60 | 61 | def forward(self, feat_curr, feat_history, record_len, backbone=None, heads=None): 62 | feat_curr = self.regroup(feat_curr, record_len) 63 | B = len(feat_curr) 64 | feat_list = [[] for _ in range(B)] 65 | for bs in range(B): 66 | feat_list[bs] += [feat_curr[bs], feat_history[bs]] 67 | 68 | if self.flow_flag: 69 | feat_final, offset_loss = self.flow(feat_list) 70 | else: 71 | offset_loss = torch.zeros(1).to(record_len.device) 72 | x_list = [] 73 | for bs in range(B): 74 | delayed_colla_feat = feat_list[bs][self.delay][1:] 75 | ego_feat = feat_list[bs][0][:1] 76 | x_list.append( 77 | torch.cat([ego_feat, delayed_colla_feat], dim=0)) 78 | feat_final = torch.cat(x_list, dim=0) 79 | 80 | return feat_final, offset_loss 81 | -------------------------------------------------------------------------------- /v2xvit/models/sub_modules/naive_compress.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class NaiveCompressor(nn.Module): 6 | def __init__(self, input_dim, compress_raito): 7 | super().__init__() 8 | self.encoder = nn.Sequential( 9 | nn.Conv2d(input_dim, input_dim//compress_raito, kernel_size=3, 10 | stride=1, padding=1), 11 | nn.BatchNorm2d(input_dim//compress_raito, eps=1e-3, momentum=0.01), 12 | nn.ReLU() 13 | ) 14 | self.decoder = nn.Sequential( 15 | nn.Conv2d(input_dim//compress_raito, input_dim, kernel_size=3, 16 | stride=1, padding=1), 17 | nn.BatchNorm2d(input_dim, eps=1e-3, momentum=0.01), 18 | nn.ReLU(), 19 | nn.Conv2d(input_dim, input_dim, kernel_size=3, stride=1, padding=1), 20 | nn.BatchNorm2d(input_dim, eps=1e-3, 21 | momentum=0.01), 22 | nn.ReLU() 23 | ) 24 | 25 | def forward(self, x): 26 | x = self.encoder(x) 27 | x = self.decoder(x) 28 | 29 | return x -------------------------------------------------------------------------------- /v2xvit/models/sub_modules/pillar_vfe.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pillar VFE, credits to OpenPCDet. 3 | """ 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class PFNLayer(nn.Module): 11 | def __init__(self, 12 | in_channels, 13 | out_channels, 14 | use_norm=True, 15 | last_layer=False): 16 | super().__init__() 17 | 18 | self.last_vfe = last_layer 19 | self.use_norm = use_norm 20 | if not self.last_vfe: 21 | out_channels = out_channels // 2 22 | 23 | if self.use_norm: 24 | self.linear = nn.Linear(in_channels, out_channels, bias=False) 25 | self.norm = nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01) 26 | else: 27 | self.linear = nn.Linear(in_channels, out_channels, bias=True) 28 | 29 | self.part = 50000 30 | 31 | def forward(self, inputs): 32 | if inputs.shape[0] > self.part: 33 | # nn.Linear performs randomly when batch size is too large 34 | num_parts = inputs.shape[0] // self.part 35 | part_linear_out = [self.linear( 36 | inputs[num_part * self.part:(num_part + 1) * self.part]) 37 | for num_part in range(num_parts + 1)] 38 | x = torch.cat(part_linear_out, dim=0) 39 | else: 40 | x = self.linear(inputs) 41 | torch.backends.cudnn.enabled = False 42 | x = self.norm(x.permute(0, 2, 1)).permute(0, 2, 43 | 1) if self.use_norm else x 44 | torch.backends.cudnn.enabled = True 45 | x = F.relu(x) 46 | x_max = torch.max(x, dim=1, keepdim=True)[0] 47 | 48 | if self.last_vfe: 49 | return x_max 50 | else: 51 | x_repeat = x_max.repeat(1, inputs.shape[1], 1) 52 | x_concatenated = torch.cat([x, x_repeat], dim=2) 53 | return x_concatenated 54 | 55 | 56 | class PillarVFE(nn.Module): 57 | def __init__(self, model_cfg, num_point_features, voxel_size, 58 | point_cloud_range): 59 | super().__init__() 60 | self.model_cfg = model_cfg 61 | 62 | self.use_norm = self.model_cfg['use_norm'] 63 | self.with_distance = self.model_cfg['with_distance'] 64 | 65 | self.use_absolute_xyz = self.model_cfg['use_absolute_xyz'] 66 | num_point_features += 6 if self.use_absolute_xyz else 3 67 | if self.with_distance: 68 | num_point_features += 1 69 | 70 | self.num_filters = self.model_cfg['num_filters'] 71 | assert len(self.num_filters) > 0 72 | num_filters = [num_point_features] + list(self.num_filters) 73 | 74 | pfn_layers = [] 75 | for i in range(len(num_filters) - 1): 76 | in_filters = num_filters[i] 77 | out_filters = num_filters[i + 1] 78 | pfn_layers.append( 79 | PFNLayer(in_filters, out_filters, self.use_norm, 80 | last_layer=(i >= len(num_filters) - 2)) 81 | ) 82 | self.pfn_layers = nn.ModuleList(pfn_layers) 83 | 84 | self.voxel_x = voxel_size[0] 85 | self.voxel_y = voxel_size[1] 86 | self.voxel_z = voxel_size[2] 87 | self.x_offset = self.voxel_x / 2 + point_cloud_range[0] 88 | self.y_offset = self.voxel_y / 2 + point_cloud_range[1] 89 | self.z_offset = self.voxel_z / 2 + point_cloud_range[2] 90 | 91 | def get_output_feature_dim(self): 92 | return self.num_filters[-1] 93 | 94 | @staticmethod 95 | def get_paddings_indicator(actual_num, max_num, axis=0): 96 | actual_num = torch.unsqueeze(actual_num, axis + 1) 97 | max_num_shape = [1] * len(actual_num.shape) 98 | max_num_shape[axis + 1] = -1 99 | max_num = torch.arange(max_num, 100 | dtype=torch.int, 101 | device=actual_num.device).view(max_num_shape) 102 | paddings_indicator = actual_num.int() > max_num 103 | return paddings_indicator 104 | 105 | def forward(self, batch_dict): 106 | 107 | voxel_features, voxel_num_points, coords = \ 108 | batch_dict['voxel_features'], batch_dict['voxel_num_points'], \ 109 | batch_dict['voxel_coords'] 110 | points_mean = \ 111 | voxel_features[:, :, :3].sum(dim=1, keepdim=True) / \ 112 | voxel_num_points.type_as(voxel_features).view(-1, 1, 1) 113 | f_cluster = voxel_features[:, :, :3] - points_mean 114 | 115 | f_center = torch.zeros_like(voxel_features[:, :, :3]) 116 | f_center[:, :, 0] = voxel_features[:, :, 0] - ( 117 | coords[:, 3].to(voxel_features.dtype).unsqueeze( 118 | 1) * self.voxel_x + self.x_offset) 119 | f_center[:, :, 1] = voxel_features[:, :, 1] - ( 120 | coords[:, 2].to(voxel_features.dtype).unsqueeze( 121 | 1) * self.voxel_y + self.y_offset) 122 | f_center[:, :, 2] = voxel_features[:, :, 2] - ( 123 | coords[:, 1].to(voxel_features.dtype).unsqueeze( 124 | 1) * self.voxel_z + self.z_offset) 125 | 126 | if self.use_absolute_xyz: 127 | features = [voxel_features, f_cluster, f_center] 128 | else: 129 | features = [voxel_features[..., 3:], f_cluster, f_center] 130 | 131 | if self.with_distance: 132 | points_dist = torch.norm(voxel_features[:, :, :3], 2, 2, 133 | keepdim=True) 134 | features.append(points_dist) 135 | features = torch.cat(features, dim=-1) 136 | 137 | voxel_count = features.shape[1] 138 | mask = self.get_paddings_indicator(voxel_num_points, voxel_count, 139 | axis=0) 140 | mask = torch.unsqueeze(mask, -1).type_as(voxel_features) 141 | features *= mask 142 | for pfn in self.pfn_layers: 143 | features = pfn(features) 144 | features = features.squeeze() 145 | batch_dict['pillar_features'] = features 146 | return batch_dict 147 | -------------------------------------------------------------------------------- /v2xvit/models/sub_modules/point_pillar_scatter.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class PointPillarScatter(nn.Module): 6 | def __init__(self, model_cfg): 7 | super().__init__() 8 | 9 | self.model_cfg = model_cfg 10 | self.num_bev_features = self.model_cfg['num_features'] 11 | self.nx, self.ny, self.nz = model_cfg['grid_size'] 12 | assert self.nz == 1 13 | 14 | def forward(self, batch_dict): 15 | pillar_features, coords = batch_dict['pillar_features'], batch_dict[ 16 | 'voxel_coords'] 17 | batch_spatial_features = [] 18 | batch_size = coords[:, 0].max().int().item() + 1 19 | 20 | for batch_idx in range(batch_size): 21 | spatial_feature = torch.zeros( 22 | self.num_bev_features, 23 | self.nz * self.nx * self.ny, 24 | dtype=pillar_features.dtype, 25 | device=pillar_features.device) 26 | 27 | batch_mask = coords[:, 0] == batch_idx 28 | this_coords = coords[batch_mask, :] 29 | 30 | indices = this_coords[:, 1] + \ 31 | this_coords[:, 2] * self.nx + \ 32 | this_coords[:, 3] 33 | indices = indices.type(torch.long) 34 | 35 | pillars = pillar_features[batch_mask, :] 36 | pillars = pillars.t() 37 | spatial_feature[:, indices] = pillars 38 | batch_spatial_features.append(spatial_feature) 39 | 40 | batch_spatial_features = \ 41 | torch.stack(batch_spatial_features, 0) 42 | batch_spatial_features = \ 43 | batch_spatial_features.view(batch_size, self.num_bev_features * 44 | self.nz, self.ny, self.nx) 45 | batch_dict['spatial_features'] = batch_spatial_features 46 | 47 | return batch_dict 48 | 49 | -------------------------------------------------------------------------------- /v2xvit/models/sub_modules/self_attn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class ScaledDotProductAttention(nn.Module): 8 | """ 9 | Scaled Dot-Product Attention proposed in "Attention Is All You Need" 10 | Compute the dot products of the query with all keys, divide each by sqrt(dim), 11 | and apply a softmax function to obtain the weights on the values 12 | Args: dim, mask 13 | dim (int): dimention of attention 14 | mask (torch.Tensor): tensor containing indices to be masked 15 | Inputs: query, key, value, mask 16 | - **query** (batch, q_len, d_model): tensor containing projection vector for decoder. 17 | - **key** (batch, k_len, d_model): tensor containing projection vector for encoder. 18 | - **value** (batch, v_len, d_model): tensor containing features of the encoded input sequence. 19 | - **mask** (-): tensor containing indices to be masked 20 | Returns: context, attn 21 | - **context**: tensor containing the context vector from attention mechanism. 22 | - **attn**: tensor containing the attention (alignment) from the encoder outputs. 23 | """ 24 | 25 | def __init__(self, dim): 26 | super(ScaledDotProductAttention, self).__init__() 27 | self.sqrt_dim = np.sqrt(dim) 28 | 29 | def forward(self, query, key, value): 30 | score = torch.bmm(query, key.transpose(1, 2)) / self.sqrt_dim 31 | attn = F.softmax(score, -1) 32 | context = torch.bmm(attn, value) 33 | return context 34 | 35 | 36 | class AttFusion(nn.Module): 37 | def __init__(self, feature_dim): 38 | super(AttFusion, self).__init__() 39 | self.att = ScaledDotProductAttention(feature_dim) 40 | 41 | def forward(self, x, record_len): 42 | split_x = self.regroup(x, record_len) 43 | batch_size = len(record_len) 44 | C, W, H = split_x[0].shape[1:] 45 | out = [] 46 | for xx in split_x: 47 | cav_num = xx.shape[0] 48 | query = xx[0,:].unsqueeze(0) 49 | query = query.view(1,C,-1).permute(2, 0, 1) 50 | key = xx.view(cav_num, C, -1).permute(2, 0, 1) 51 | value = xx.view(cav_num, C, -1).permute(2, 0, 1) 52 | h = self.att(query, key, value) 53 | h = h.permute(1, 2, 0).view(1, C, W, H)[0, ...].unsqueeze(0) 54 | out.append(h) 55 | return torch.cat(out, dim=0) 56 | 57 | def regroup(self, x, record_len): 58 | cum_sum_len = torch.cumsum(record_len, dim=0) 59 | split_x = torch.tensor_split(x, cum_sum_len[:-1].cpu()) 60 | return split_x -------------------------------------------------------------------------------- /v2xvit/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/tools/__init__.py -------------------------------------------------------------------------------- /v2xvit/tools/debug_utils.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | from torch.utils.data import DataLoader 5 | 6 | import v2xvit.hypes_yaml.yaml_utils as yaml_utils 7 | from v2xvit.tools import train_utils 8 | from v2xvit.data_utils.datasets import build_dataset 9 | from v2xvit.visualization import vis_utils 10 | 11 | 12 | def test_parser(): 13 | parser = argparse.ArgumentParser(description="synthetic data generation") 14 | parser.add_argument('--model_dir', type=str, required=True, 15 | help='Continued training path') 16 | parser.add_argument('--fusion_method', type=str, default='late', 17 | help='late, early or intermediate') 18 | opt = parser.parse_args() 19 | return opt 20 | 21 | 22 | def test_bev_post_processing(): 23 | opt = test_parser() 24 | assert opt.fusion_method in ['late', 'early', 'intermediate'] 25 | 26 | hypes = yaml_utils.load_yaml(None, opt) 27 | 28 | print('Dataset Building') 29 | opencood_dataset = build_dataset(hypes, visualize=True, train=False) 30 | data_loader = DataLoader(opencood_dataset, 31 | batch_size=1, 32 | num_workers=0, 33 | collate_fn=opencood_dataset.collate_batch_test, 34 | shuffle=False, 35 | pin_memory=False, 36 | drop_last=False) 37 | 38 | print('Creating Model') 39 | model = train_utils.create_model(hypes) 40 | # we assume gpu is necessary 41 | if torch.cuda.is_available(): 42 | model.cuda() 43 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 44 | 45 | print('Loading Model from checkpoint') 46 | saved_path = opt.model_dir 47 | _, model = train_utils.load_saved_model(saved_path, model) 48 | model.eval() 49 | for i, batch_data in enumerate(data_loader): 50 | batch_data = train_utils.to_device(batch_data, device) 51 | label_map = batch_data["ego"]["label_dict"]["label_map"] 52 | output_dict = { 53 | "cls": label_map[:, 0, :, :], 54 | "reg": label_map[:, 1:, :, :] 55 | } 56 | gt_box_tensor, _ = opencood_dataset.post_processor.post_process_debug( 57 | batch_data["ego"], output_dict) 58 | vis_utils.visualize_single_sample_output_bev(gt_box_tensor, 59 | batch_data['ego'][ 60 | 'origin_lidar'].squeeze( 61 | 0), 62 | opencood_dataset) 63 | 64 | 65 | if __name__ == '__main__': 66 | test_bev_post_processing() 67 | -------------------------------------------------------------------------------- /v2xvit/tools/inference.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import time 4 | from collections import OrderedDict 5 | 6 | import torch 7 | import os 8 | import open3d as o3d 9 | from torch.utils.data import DataLoader 10 | 11 | import v2xvit.hypes_yaml.yaml_utils as yaml_utils 12 | from v2xvit.tools import train_utils, infrence_utils 13 | from v2xvit.data_utils.datasets import build_dataset 14 | from v2xvit.visualization import vis_utils 15 | from v2xvit.utils import eval_utils 16 | import torch.multiprocessing 17 | torch.multiprocessing.set_sharing_strategy('file_system') 18 | 19 | 20 | def test_parser(): 21 | parser = argparse.ArgumentParser(description="synthetic data generation") 22 | parser.add_argument('--model_dir', type=str, required=True, 23 | help='Continued training path') 24 | parser.add_argument('--fusion_method', required=False, type=str, 25 | default='intermediate_with_comm', 26 | help='late, early or intermediate') 27 | parser.add_argument('--show_vis', action='store_true', 28 | help='whether to show image visualization result') 29 | parser.add_argument('--show_sequence', action='store_true', 30 | help='whether to show video visualization result.' 31 | 'it can note be set true with show_vis together ') 32 | parser.add_argument('--eval_epoch', type=str, default=14, 33 | help='Set the checkpoint') 34 | parser.add_argument('--save_vis', action='store_true', 35 | help='whether to save visualization result') 36 | parser.add_argument('--save_npy', action='store_true', 37 | help='whether to save prediction and gt result' 38 | 'in npy file') 39 | parser.add_argument('--comm_thre', type=float, default=None, 40 | help='Communication confidence threshold') 41 | parser.add_argument('--score_thre', type=float, default=None, 42 | help='Confidence score threshold') 43 | parser.add_argument('--xyz_std', type=float, default=None, 44 | help='position error') 45 | parser.add_argument('--ryp_std', type=float, default=None, 46 | help='rotation error') 47 | opt = parser.parse_args() 48 | return opt 49 | 50 | 51 | def main(): 52 | opt = test_parser() 53 | assert opt.fusion_method in ['late', 'early', 'intermediate',"intermediate_with_comm"] 54 | assert not (opt.show_vis and opt.show_sequence), \ 55 | 'you can only visualize ' \ 56 | 'the results in single ' \ 57 | 'image mode or video mode' 58 | 59 | hypes = yaml_utils.load_yaml(None, opt) 60 | if opt.comm_thre is not None: 61 | hypes['model']['args']['fusion_args']['communication']['thre'] = opt.comm_thre 62 | if opt.score_thre is not None: 63 | hypes['postprocess']['target_args']['score_threshold'] = opt.score_thre 64 | score_threshold = hypes['postprocess']['target_args']['score_threshold'] 65 | if opt.xyz_std is not None: 66 | hypes['wild_setting']['xyz_std'] = opt.xyz_std 67 | if opt.ryp_std is not None: 68 | hypes['wild_setting']['ryp_std'] = opt.ryp_std 69 | 70 | print('Dataset Building') 71 | opencood_dataset = build_dataset(hypes, visualize=True, train=False) 72 | data_loader = DataLoader(opencood_dataset, 73 | batch_size=1, 74 | num_workers=10, 75 | collate_fn=opencood_dataset.collate_batch_test, 76 | shuffle=False, 77 | pin_memory=False, 78 | drop_last=False) 79 | 80 | print('Creating Model') 81 | model = train_utils.create_model(hypes) 82 | # we assume gpu is necessary 83 | if torch.cuda.is_available(): 84 | model.cuda() 85 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 86 | 87 | print('Loading Model from checkpoint') 88 | saved_path = opt.model_dir 89 | last_epoch = train_utils.findLastCheckpoint(saved_path) 90 | if opt.eval_epoch is None: 91 | epoch_id_list = [last_epoch] 92 | else: 93 | epoch_id_list = [opt.eval_epoch] 94 | 95 | for epoch_id in epoch_id_list: 96 | epoch_id, model = train_utils.load_saved_model(saved_path, model, epoch_id) 97 | model.eval() 98 | 99 | # Create the dictionary for evaluation 100 | result_stat = {0.3: {'tp': [], 'fp': [], 'gt': 0}, 101 | 0.5: {'tp': [], 'fp': [], 'gt': 0}, 102 | 0.7: {'tp': [], 'fp': [], 'gt': 0}} 103 | 104 | total_comm_rates = [] 105 | for i, batch_data_list in enumerate(data_loader): 106 | print("{}".format(i)) 107 | with torch.no_grad(): 108 | torch.cuda.synchronize() 109 | batch_data = batch_data_list[0] 110 | batch_data = train_utils.to_device(batch_data, device) 111 | batch_data_list = train_utils.to_device(batch_data_list, device) 112 | if opt.fusion_method == 'late': 113 | pred_box_tensor, pred_score, gt_box_tensor = \ 114 | infrence_utils.inference_late_fusion(batch_data, 115 | model, 116 | opencood_dataset) 117 | elif opt.fusion_method == 'early': 118 | pred_box_tensor, pred_score, gt_box_tensor = \ 119 | infrence_utils.inference_early_fusion(batch_data, 120 | model, 121 | opencood_dataset) 122 | elif opt.fusion_method == 'intermediate': 123 | pred_box_tensor, pred_score, gt_box_tensor = \ 124 | infrence_utils.inference_intermediate_fusion(batch_data_list, 125 | model, 126 | opencood_dataset) 127 | elif opt.fusion_method == 'intermediate_with_comm': 128 | pred_box_tensor, pred_score, gt_box_tensor, comm_rates = \ 129 | infrence_utils.inference_intermediate_fusion_withcomm(batch_data_list, 130 | model, 131 | opencood_dataset) 132 | total_comm_rates.append(comm_rates) 133 | else: 134 | raise NotImplementedError('Only early, late and intermediate' 135 | 'fusion is supported.') 136 | eval_utils.caluclate_tp_fp(pred_box_tensor, 137 | pred_score, 138 | gt_box_tensor, 139 | result_stat, 140 | 0.3) 141 | eval_utils.caluclate_tp_fp(pred_box_tensor, 142 | pred_score, 143 | gt_box_tensor, 144 | result_stat, 145 | 0.5) 146 | eval_utils.caluclate_tp_fp(pred_box_tensor, 147 | pred_score, 148 | gt_box_tensor, 149 | result_stat, 150 | 0.7) 151 | if opt.save_npy: 152 | npy_save_path = os.path.join(opt.model_dir, 'npy') 153 | if not os.path.exists(npy_save_path): 154 | os.makedirs(npy_save_path) 155 | infrence_utils.save_prediction_gt(pred_box_tensor, 156 | gt_box_tensor, 157 | batch_data['ego'][ 158 | 'origin_lidar'][0], 159 | i, 160 | npy_save_path) 161 | 162 | if opt.show_vis or opt.save_vis: 163 | vis_save_path = '' 164 | if opt.save_vis: 165 | vis_save_path = os.path.join(opt.model_dir, 'vis') 166 | if not os.path.exists(vis_save_path): 167 | os.makedirs(vis_save_path) 168 | vis_save_path = os.path.join(vis_save_path, '%05d.png' % i) 169 | 170 | opencood_dataset.visualize_result(pred_box_tensor, 171 | gt_box_tensor, 172 | batch_data['ego'][ 173 | 'origin_lidar'][0], 174 | opt.show_vis, 175 | vis_save_path, 176 | dataset=opencood_dataset) 177 | if len(total_comm_rates) > 0: 178 | comm_rates = (sum(total_comm_rates)/len(total_comm_rates)).item() 179 | else: 180 | comm_rates = 0 181 | ap_30, ap_50, ap_70 = eval_utils.eval_final_results(result_stat, opt.model_dir) 182 | current_time = time.ctime() 183 | 184 | with open(os.path.join(saved_path, 'result.txt'), 'a+') as f: 185 | msg = 'Epoch: {} | AP @0.3: {:.04f} | AP @0.5: {:.04f} | AP @0.7: {:.04f} | comm_rate: {:.06f}\n'.format(epoch_id, ap_30, ap_50, ap_70, comm_rates) 186 | if opt.comm_thre is not None: 187 | msg = 'Epoch: {} | AP @0.3: {:.04f} | AP @0.5: {:.04f} | AP @0.7: {:.04f} | comm_rate: {:.06f} | comm_thre: {:.04f} | score_threshold: {:.02f} | xyz_std: {:.01f} | ryp_std: {:.01f} | time: {}\n'.format(epoch_id, ap_30, ap_50, ap_70, comm_rates, opt.comm_thre,score_threshold,opt.xyz_std,opt.ryp_std,current_time) 188 | f.write(msg) 189 | print(msg) 190 | 191 | if __name__ == '__main__': 192 | main() -------------------------------------------------------------------------------- /v2xvit/tools/infrence_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | 4 | import numpy as np 5 | import torch 6 | 7 | from v2xvit.utils.common_utils import torch_tensor_to_numpy 8 | 9 | 10 | def inference_late_fusion(batch_data, model, dataset): 11 | """ 12 | Model inference for late fusion. 13 | 14 | Parameters 15 | ---------- 16 | batch_data : dict 17 | model : opencood.object 18 | dataset : opencood.LateFusionDataset 19 | 20 | Returns 21 | ------- 22 | pred_box_tensor : torch.Tensor 23 | The tensor of prediction bounding box after NMS. 24 | gt_box_tensor : torch.Tensor 25 | The tensor of gt bounding box. 26 | """ 27 | output_dict = OrderedDict() 28 | 29 | for cav_id, cav_content in batch_data.items(): 30 | output_dict[cav_id] = model(cav_content) 31 | 32 | pred_box_tensor, pred_score, gt_box_tensor = \ 33 | dataset.post_process(batch_data, 34 | output_dict) 35 | 36 | return pred_box_tensor, pred_score, gt_box_tensor 37 | 38 | 39 | def inference_early_fusion(batch_data, model, dataset): 40 | """ 41 | Model inference for early fusion. 42 | 43 | Parameters 44 | ---------- 45 | batch_data : dict 46 | model : opencood.object 47 | dataset : opencood.EarlyFusionDataset 48 | 49 | Returns 50 | ------- 51 | pred_box_tensor : torch.Tensor 52 | The tensor of prediction bounding box after NMS. 53 | gt_box_tensor : torch.Tensor 54 | The tensor of gt bounding box. 55 | """ 56 | output_dict = OrderedDict() 57 | cav_content = batch_data 58 | 59 | output_dict['ego'] = model(cav_content) 60 | 61 | pred_box_tensor, pred_score, gt_box_tensor = \ 62 | dataset.post_process(batch_data[0], 63 | output_dict) 64 | 65 | return pred_box_tensor, pred_score, gt_box_tensor 66 | 67 | def inference_intermediate_fusion_withcomm(batch_data_list, model, dataset, tail=""): 68 | """ 69 | Model inference for early fusion. 70 | 71 | Parameters 72 | ---------- 73 | batch_data : dict 74 | model : opencood.object 75 | dataset : opencood.EarlyFusionDataset 76 | 77 | Returns 78 | ------- 79 | pred_box_tensor : torch.Tensor 80 | The tensor of prediction bounding box after NMS. 81 | gt_box_tensor : torch.Tensor 82 | The tensor of gt bounding box. 83 | """ 84 | output_dict = OrderedDict() 85 | batch_data = batch_data_list[0] 86 | 87 | output_dict['ego'] = model(batch_data_list) 88 | 89 | pred_box_tensor, pred_score, gt_box_tensor = \ 90 | dataset.post_process(batch_data, 91 | output_dict) 92 | comm_rates = output_dict['ego']['comm_rate'] 93 | return pred_box_tensor, pred_score, gt_box_tensor, comm_rates 94 | 95 | 96 | def inference_intermediate_fusion(batch_data, model, dataset): 97 | """ 98 | Model inference for early fusion. 99 | 100 | Parameters 101 | ---------- 102 | batch_data : dict 103 | model : opencood.object 104 | dataset : opencood.EarlyFusionDataset 105 | 106 | Returns 107 | ------- 108 | pred_box_tensor : torch.Tensor 109 | The tensor of prediction bounding box after NMS. 110 | gt_box_tensor : torch.Tensor 111 | The tensor of gt bounding box. 112 | """ 113 | return inference_early_fusion(batch_data, model, dataset) 114 | 115 | 116 | def save_prediction_gt(pred_tensor, gt_tensor, pcd, timestamp, save_path): 117 | """ 118 | Save prediction and gt tensor to txt file. 119 | """ 120 | pred_np = torch_tensor_to_numpy(pred_tensor) 121 | gt_np = torch_tensor_to_numpy(gt_tensor) 122 | pcd_np = torch_tensor_to_numpy(pcd) 123 | 124 | np.save(os.path.join(save_path, '%04d_pcd.npy' % timestamp), pcd_np) 125 | np.save(os.path.join(save_path, '%04d_pred.npy' % timestamp), pred_np) 126 | np.save(os.path.join(save_path, '%04d_gt.npy' % timestamp), gt_np) 127 | -------------------------------------------------------------------------------- /v2xvit/tools/multi_gpu_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.distributed as dist 4 | 5 | 6 | def get_dist_info(): 7 | if dist.is_available() and dist.is_initialized(): 8 | rank = dist.get_rank() 9 | world_size = dist.get_world_size() 10 | else: 11 | rank = 0 12 | world_size = 1 13 | return rank, world_size 14 | 15 | 16 | def init_distributed_mode(args): 17 | if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: 18 | args.rank = int(os.environ["RANK"]) 19 | args.world_size = int(os.environ['WORLD_SIZE']) 20 | args.gpu = int(os.environ['LOCAL_RANK']) 21 | elif 'SLURM_PROCID' in os.environ: 22 | args.rank = int(os.environ['SLURM_PROCID']) 23 | args.gpu = args.rank % torch.cuda.device_count() 24 | else: 25 | print('Not using distributed mode') 26 | args.distributed = False 27 | return 28 | 29 | args.distributed = True 30 | 31 | torch.cuda.set_device(args.gpu) 32 | args.dist_backend = 'nccl' 33 | print('| distributed init (rank {}): {}'.format( 34 | args.rank, args.dist_url), flush=True) 35 | torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 36 | world_size=args.world_size, rank=args.rank) 37 | torch.distributed.barrier() 38 | setup_for_distributed(args.rank == 0) 39 | 40 | 41 | def setup_for_distributed(is_master): 42 | """ 43 | This function disables printing when not in master process 44 | """ 45 | import builtins as __builtin__ 46 | builtin_print = __builtin__.print 47 | 48 | def print(*args, **kwargs): 49 | force = kwargs.pop('force', False) 50 | if is_master or force: 51 | builtin_print(*args, **kwargs) 52 | 53 | __builtin__.print = print -------------------------------------------------------------------------------- /v2xvit/tools/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os,sys,random 3 | import statistics 4 | 5 | import torch 6 | import os,time 7 | torch.autograd.set_detect_anomaly(True) 8 | import tqdm 9 | from torch.utils.data import DataLoader, DistributedSampler 10 | from tensorboardX import SummaryWriter 11 | 12 | import v2xvit.hypes_yaml.yaml_utils as yaml_utils 13 | from v2xvit.tools import train_utils,infrence_utils 14 | from v2xvit.data_utils.datasets import build_dataset 15 | from v2xvit.tools import multi_gpu_utils 16 | 17 | 18 | def train_parser(): 19 | parser = argparse.ArgumentParser(description="synthetic data generation") 20 | parser.add_argument("--hypes_yaml", type=str, required=True, 21 | help='data generation yaml file needed ') 22 | parser.add_argument('--model_dir', default='', 23 | help='Continued training path') 24 | parser.add_argument("--half", action='store_true', help="whether train with half precision") 25 | parser.add_argument('--dist_url', default='env://', 26 | help='url used to set up distributed training') 27 | opt = parser.parse_args() 28 | return opt 29 | 30 | def main(): 31 | opt = train_parser() 32 | hypes = yaml_utils.load_yaml(opt.hypes_yaml, opt) 33 | multi_gpu_utils.init_distributed_mode(opt) 34 | 35 | print('-----------------Dataset Building------------------') 36 | opencood_train_dataset = build_dataset(hypes, visualize=False, train=True) 37 | opencood_validate_dataset = build_dataset(hypes, 38 | visualize=False, 39 | train=False) 40 | if opt.distributed: 41 | sampler_train = DistributedSampler(opencood_train_dataset) 42 | sampler_val = DistributedSampler(opencood_validate_dataset, 43 | shuffle=False) 44 | 45 | batch_sampler_train = torch.utils.data.BatchSampler( 46 | sampler_train, hypes['train_params']['batch_size'], drop_last=True) 47 | 48 | train_loader = DataLoader(opencood_train_dataset, 49 | batch_sampler=batch_sampler_train, 50 | num_workers=8, 51 | collate_fn=opencood_train_dataset.collate_batch_train) 52 | val_loader = DataLoader(opencood_validate_dataset, 53 | sampler=sampler_val, 54 | num_workers=8, 55 | collate_fn=opencood_train_dataset.collate_batch_train, 56 | drop_last=False) 57 | else: 58 | train_loader = DataLoader(opencood_train_dataset, 59 | batch_size=hypes['train_params']['batch_size'], 60 | num_workers=8, 61 | collate_fn=opencood_train_dataset.collate_batch_train, 62 | shuffle=True, 63 | pin_memory=False, 64 | drop_last=True) 65 | val_loader = DataLoader(opencood_validate_dataset, 66 | batch_size=hypes['train_params']['batch_size'], 67 | num_workers=8, 68 | collate_fn=opencood_train_dataset.collate_batch_train, 69 | shuffle=False, 70 | pin_memory=False, 71 | drop_last=True) 72 | 73 | print('---------------Creating Model------------------') 74 | model = train_utils.create_model(hypes) 75 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 76 | 77 | # if we want to train from last checkpoint. 78 | if opt.model_dir: 79 | saved_path = opt.model_dir 80 | init_epoch, model = train_utils.load_saved_model(saved_path, model) 81 | 82 | else: 83 | init_epoch = 0 84 | # if we train the model from scratch, we need to create a folder 85 | # to save the model, 86 | saved_path = train_utils.setup_train(hypes) 87 | 88 | # we assume gpu is necessary 89 | if torch.cuda.is_available(): 90 | model.to(device) 91 | model_without_ddp = model 92 | 93 | if opt.distributed: 94 | model = \ 95 | torch.nn.parallel.DistributedDataParallel(model, 96 | device_ids=[opt.gpu], 97 | find_unused_parameters=True) 98 | model_without_ddp = model.module 99 | 100 | # define the loss 101 | criterion = train_utils.create_loss(hypes) 102 | 103 | # optimizer setup 104 | # optimizer = train_utils.setup_optimizer(hypes, model) 105 | optimizer = train_utils.setup_optimizer(hypes, model_without_ddp) 106 | # lr scheduler setup 107 | num_steps = len(train_loader) 108 | scheduler = train_utils.setup_lr_schedular(hypes, optimizer, num_steps) 109 | 110 | # record training 111 | writer = SummaryWriter(saved_path) 112 | 113 | # half precision training 114 | if opt.half: 115 | scaler = torch.cuda.amp.GradScaler() 116 | 117 | print('Training start') 118 | epoches = hypes['train_params']['epoches'] 119 | # used to help schedule learning rate 120 | for epoch in range(init_epoch, max(epoches, init_epoch)): 121 | if hypes['lr_scheduler']['core_method'] != 'cosineannealwarm': 122 | scheduler.step(epoch) 123 | if hypes['lr_scheduler']['core_method'] == 'cosineannealwarm': 124 | scheduler.step_update(epoch * num_steps + 0) 125 | for param_group in optimizer.param_groups: 126 | print('learning rate %.7f' % param_group["lr"]) 127 | 128 | if opt.distributed: 129 | sampler_train.set_epoch(epoch) 130 | 131 | pbar2 = tqdm.tqdm(total=len(train_loader), leave=True) 132 | record_len_list = [] 133 | for i, batch_data_list in enumerate(train_loader): 134 | for v in batch_data_list: 135 | record_len_list.append(v['ego']['record_len'][0].item()) 136 | if len(set(record_len_list)) != 1: 137 | record_len_list = [] 138 | continue 139 | print(record_len_list) 140 | record_len_list = [] 141 | # the model will be evaluation mode during validation 142 | model.train() 143 | model.zero_grad() 144 | optimizer.zero_grad() 145 | 146 | batch_data = batch_data_list[0] 147 | 148 | batch_data_list = train_utils.to_device(batch_data_list, device) 149 | batch_data = train_utils.to_device(batch_data, device) 150 | 151 | # case1 : late fusion train --> only ego needed 152 | # case2 : early fusion train --> all data projected to ego 153 | # case3 : intermediate fusion --> ['ego']['processed_lidar'] 154 | # becomes a list, which containing all data from other cavs 155 | # as well 156 | if not opt.half: 157 | ouput_dict = model(batch_data_list) 158 | final_loss = criterion(ouput_dict, 159 | batch_data['ego']['label_dict']) 160 | final_loss += ouput_dict["offset_loss"][0] + ouput_dict["commu_loss"][0] 161 | else: 162 | with torch.cuda.amp.autocast(): 163 | ouput_dict = model(batch_data_list) 164 | # first argument is always your output dictionary, 165 | # second argument is always your label dictionary. 166 | final_loss = criterion(ouput_dict, 167 | batch_data['ego']['label_dict']) 168 | final_loss += ouput_dict["offset_loss"][0] + ouput_dict["commu_loss"][0] 169 | criterion.logging(epoch, i, len(train_loader), writer) 170 | pbar2.update(1) 171 | time.sleep(0.001) 172 | # back-propagation 173 | if not opt.half: 174 | final_loss.backward() 175 | optimizer.step() 176 | else: 177 | scaler.scale(final_loss).backward() 178 | scaler.step(optimizer) 179 | scaler.update() 180 | 181 | if epoch % hypes['train_params']['save_freq'] == 0: 182 | torch.save(model.state_dict(), 183 | os.path.join(saved_path, 184 | 'net_epoch%d.pth' % (epoch + 1))) 185 | 186 | print('Training Finished, checkpoints saved to %s' % saved_path) 187 | torch.cuda.empty_cache() 188 | 189 | if __name__ == '__main__': 190 | main() 191 | -------------------------------------------------------------------------------- /v2xvit/tools/train_utils.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import importlib 3 | import yaml 4 | import os 5 | import re 6 | from datetime import datetime 7 | 8 | import torch 9 | import torch.optim as optim 10 | 11 | def findLastCheckpoint(save_dir): 12 | file_list = glob.glob(os.path.join(save_dir, '*epoch*.pth')) 13 | if file_list: 14 | epochs_exist = [] 15 | for file_ in file_list: 16 | result = re.findall(".*epoch(.*).pth.*", file_) 17 | epochs_exist.append(int(result[0])) 18 | initial_epoch_ = max(epochs_exist) 19 | else: 20 | initial_epoch_ = 0 21 | return initial_epoch_ 22 | 23 | 24 | def load_saved_model(saved_path, model, epoch=None): 25 | """ 26 | Load saved model if exiseted 27 | 28 | Parameters 29 | __________ 30 | saved_path : str 31 | model saved path 32 | model : opencood object 33 | The model instance. 34 | 35 | Returns 36 | ------- 37 | model : opencood object 38 | The model instance loaded pretrained params. 39 | """ 40 | assert os.path.exists(saved_path), '{} not found'.format(saved_path) 41 | 42 | if os.path.exists(os.path.join(saved_path, 'net_latest.pth')): 43 | model.load_state_dict(torch.load( 44 | os.path.join(saved_path, 45 | 'net_latest.pth'))) 46 | return 100, model 47 | else: 48 | if epoch is None: 49 | initial_epoch = findLastCheckpoint(saved_path) 50 | else: 51 | initial_epoch = int(epoch) 52 | 53 | if initial_epoch > 0: 54 | print('resuming by loading epoch %d' % initial_epoch) 55 | 56 | state_dict_ = torch.load(os.path.join(saved_path, 'net_epoch%d.pth' % initial_epoch), map_location="cuda:0") 57 | state_dict = {} 58 | # convert data_parallal to model 59 | for k in state_dict_: 60 | if k.startswith('module') and not k.startswith('module_list'): 61 | state_dict[k[7:]] = state_dict_[k] 62 | else: 63 | state_dict[k] = state_dict_[k] 64 | 65 | model_state_dict = model.state_dict() 66 | 67 | for k in state_dict: 68 | if k in model_state_dict: 69 | if state_dict[k].shape != model_state_dict[k].shape: 70 | print('Skip loading parameter {}, required shape{}, ' \ 71 | 'loaded shape{}.'.format( 72 | k, model_state_dict[k].shape, state_dict[k].shape)) 73 | state_dict[k] = model_state_dict[k] 74 | else: 75 | print('Drop parameter {}.'.format(k)) 76 | for k in model_state_dict: 77 | if not (k in state_dict): 78 | print('No param {}.'.format(k)) 79 | state_dict[k] = model_state_dict[k] 80 | model.load_state_dict(state_dict, strict=False) 81 | return initial_epoch, model 82 | 83 | 84 | def setup_train(hypes): 85 | """ 86 | Create folder for saved model based on current timestep and model name 87 | 88 | Parameters 89 | ---------- 90 | hypes: dict 91 | Config yaml dictionary for training: 92 | """ 93 | model_name = hypes['name'] 94 | current_time = datetime.now() 95 | 96 | folder_name = current_time.strftime("_%Y_%m_%d_%H_%M_%S") 97 | folder_name = model_name + folder_name 98 | 99 | current_path = os.path.dirname(__file__) 100 | current_path = os.path.join(current_path, '../logs') 101 | 102 | full_path = os.path.join(current_path, folder_name) 103 | 104 | if not os.path.exists(full_path): 105 | os.makedirs(full_path) 106 | # save the yaml file 107 | save_name = os.path.join(full_path, 'config.yaml') 108 | with open(save_name, 'w') as outfile: 109 | yaml.dump(hypes, outfile) 110 | 111 | return full_path 112 | 113 | 114 | def create_model(hypes): 115 | """ 116 | Import the module "models/[model_name].py 117 | 118 | Parameters 119 | __________ 120 | hypes : dict 121 | Dictionary containing parameters. 122 | 123 | Returns 124 | ------- 125 | model : opencood,object 126 | Model object. 127 | """ 128 | backbone_name = hypes['model']['core_method'] 129 | backbone_config = hypes['model']['args'] 130 | 131 | model_filename = "v2xvit.models." + backbone_name 132 | model_lib = importlib.import_module(model_filename) 133 | model = None 134 | target_model_name = backbone_name.replace('_', '') 135 | 136 | for name, cls in model_lib.__dict__.items(): 137 | if name.lower() == target_model_name.lower(): 138 | model = cls 139 | 140 | if model is None: 141 | print('backbone not found in models folder. Please make sure you ' 142 | 'have a python file named %s and has a class ' 143 | 'called %s ignoring upper/lower case' % (model_filename, 144 | target_model_name)) 145 | exit(0) 146 | instance = model(backbone_config) 147 | return instance 148 | 149 | 150 | def create_loss(hypes): 151 | """ 152 | Create the loss function based on the given loss name. 153 | 154 | Parameters 155 | ---------- 156 | hypes : dict 157 | Configuration params for training. 158 | Returns 159 | ------- 160 | criterion : opencood.object 161 | The loss function. 162 | """ 163 | loss_func_name = hypes['loss']['core_method'] 164 | loss_func_config = hypes['loss']['args'] 165 | 166 | loss_filename = "v2xvit.loss." + loss_func_name 167 | loss_lib = importlib.import_module(loss_filename) 168 | loss_func = None 169 | target_loss_name = loss_func_name.replace('_', '') 170 | 171 | for name, lfunc in loss_lib.__dict__.items(): 172 | if name.lower() == target_loss_name.lower(): 173 | loss_func = lfunc 174 | 175 | if loss_func is None: 176 | print('loss function not found in loss folder. Please make sure you ' 177 | 'have a python file named %s and has a class ' 178 | 'called %s ignoring upper/lower case' % (loss_filename, 179 | target_loss_name)) 180 | exit(0) 181 | 182 | criterion = loss_func(loss_func_config) 183 | return criterion 184 | 185 | 186 | def setup_optimizer(hypes, model): 187 | """ 188 | Create optimizer corresponding to the yaml file 189 | 190 | Parameters 191 | ---------- 192 | hypes : dict 193 | The training configurations. 194 | model : opencood model 195 | The pytorch model 196 | """ 197 | method_dict = hypes['optimizer'] 198 | optimizer_method = getattr(optim, method_dict['core_method'], None) 199 | if not optimizer_method: 200 | raise ValueError('{} is not supported'.format(method_dict['name'])) 201 | if 'args' in method_dict: 202 | return optimizer_method(filter(lambda p: p.requires_grad, 203 | model.parameters()), 204 | lr=method_dict['lr'], 205 | **method_dict['args']) 206 | else: 207 | return optimizer_method(filter(lambda p: p.requires_grad, 208 | model.parameters()), 209 | lr=method_dict['lr']) 210 | 211 | 212 | def setup_lr_schedular(hypes, optimizer, init_epoch=None): 213 | """ 214 | Set up the learning rate schedular. 215 | 216 | Parameters 217 | ---------- 218 | hypes : dict 219 | The training configurations. 220 | 221 | optimizer : torch.optimizer 222 | """ 223 | lr_schedule_config = hypes['lr_scheduler'] 224 | last_epoch = init_epoch if init_epoch is not None else 0 225 | 226 | 227 | if lr_schedule_config['core_method'] == 'step': 228 | from torch.optim.lr_scheduler import StepLR 229 | step_size = lr_schedule_config['step_size'] 230 | gamma = lr_schedule_config['gamma'] 231 | scheduler = StepLR(optimizer, step_size=step_size, gamma=gamma) 232 | 233 | elif lr_schedule_config['core_method'] == 'multistep': 234 | from torch.optim.lr_scheduler import MultiStepLR 235 | milestones = lr_schedule_config['step_size'] 236 | gamma = lr_schedule_config['gamma'] 237 | scheduler = MultiStepLR(optimizer, 238 | milestones=milestones, 239 | gamma=gamma) 240 | 241 | else: 242 | from torch.optim.lr_scheduler import ExponentialLR 243 | gamma = lr_schedule_config['gamma'] 244 | scheduler = ExponentialLR(optimizer, gamma) 245 | 246 | for _ in range(last_epoch): 247 | scheduler.step() 248 | 249 | return scheduler 250 | 251 | 252 | def to_device(inputs, device): 253 | if isinstance(inputs, list): 254 | return [to_device(x, device) for x in inputs] 255 | elif isinstance(inputs, dict): 256 | return {k: to_device(v, device) for k, v in inputs.items()} 257 | else: 258 | if isinstance(inputs, int) or isinstance(inputs, float) \ 259 | or isinstance(inputs, str): 260 | return inputs 261 | return inputs.to(device) 262 | -------------------------------------------------------------------------------- /v2xvit/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/utils/__init__.py -------------------------------------------------------------------------------- /v2xvit/utils/box_overlaps.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | from cython.parallel import prange, parallel 11 | 12 | 13 | DTYPE = np.float32 14 | ctypedef float DTYPE_t 15 | 16 | 17 | def bbox_overlaps( 18 | np.ndarray[DTYPE_t, ndim=2] boxes, 19 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 20 | """ 21 | Parameters 22 | ---------- 23 | boxes: (N, 4) ndarray of float 24 | query_boxes: (K, 4) ndarray of float 25 | Returns 26 | ------- 27 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 28 | """ 29 | cdef unsigned int N = boxes.shape[0] 30 | cdef unsigned int K = query_boxes.shape[0] 31 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 32 | cdef DTYPE_t iw, ih, box_area 33 | cdef DTYPE_t ua 34 | cdef unsigned int k, n 35 | for k in range(K): 36 | box_area = ( 37 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 38 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 39 | ) 40 | for n in range(N): 41 | iw = ( 42 | min(boxes[n, 2], query_boxes[k, 2]) - 43 | max(boxes[n, 0], query_boxes[k, 0]) + 1 44 | ) 45 | if iw > 0: 46 | ih = ( 47 | min(boxes[n, 3], query_boxes[k, 3]) - 48 | max(boxes[n, 1], query_boxes[k, 1]) + 1 49 | ) 50 | if ih > 0: 51 | ua = float( 52 | (boxes[n, 2] - boxes[n, 0] + 1) * 53 | (boxes[n, 3] - boxes[n, 1] + 1) + 54 | box_area - iw * ih 55 | ) 56 | overlaps[n, k] = iw * ih / ua 57 | return overlaps 58 | 59 | def bbox_intersections( 60 | np.ndarray[DTYPE_t, ndim=2] boxes, 61 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 62 | """ 63 | For each query box compute the intersection ratio covered by boxes 64 | ---------- 65 | Parameters 66 | ---------- 67 | boxes: (N, 4) ndarray of float 68 | query_boxes: (K, 4) ndarray of float 69 | Returns 70 | ------- 71 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 72 | """ 73 | cdef unsigned int N = boxes.shape[0] 74 | cdef unsigned int K = query_boxes.shape[0] 75 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 76 | cdef DTYPE_t iw, ih, box_area 77 | cdef DTYPE_t ua 78 | cdef unsigned int k, n 79 | for k in range(K): 80 | box_area = ( 81 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 82 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 83 | ) 84 | for n in range(N): 85 | iw = ( 86 | min(boxes[n, 2], query_boxes[k, 2]) - 87 | max(boxes[n, 0], query_boxes[k, 0]) + 1 88 | ) 89 | if iw > 0: 90 | ih = ( 91 | min(boxes[n, 3], query_boxes[k, 3]) - 92 | max(boxes[n, 1], query_boxes[k, 1]) + 1 93 | ) 94 | if ih > 0: 95 | intersec[n, k] = iw * ih / box_area 96 | return intersec 97 | 98 | # Compute bounding box voting 99 | def box_vote( 100 | np.ndarray[float, ndim=2] dets_NMS, 101 | np.ndarray[float, ndim=2] dets_all): 102 | cdef np.ndarray[float, ndim=2] dets_voted = np.zeros((dets_NMS.shape[0], dets_NMS.shape[1]), dtype=np.float32) 103 | cdef unsigned int N = dets_NMS.shape[0] 104 | cdef unsigned int M = dets_all.shape[0] 105 | 106 | cdef np.ndarray[float, ndim=1] det 107 | cdef np.ndarray[float, ndim=1] acc_box 108 | cdef float acc_score 109 | 110 | cdef np.ndarray[float, ndim=1] det2 111 | cdef float bi0, bi1, bit2, bi3 112 | cdef float iw, ih, ua 113 | 114 | cdef float thresh=0.5 115 | 116 | for i in range(N): 117 | det = dets_NMS[i, :] 118 | acc_box = np.zeros((4), dtype=np.float32) 119 | acc_score = 0.0 120 | 121 | for m in range(M): 122 | det2 = dets_all[m, :] 123 | 124 | bi0 = max(det[0], det2[0]) 125 | bi1 = max(det[1], det2[1]) 126 | bi2 = min(det[2], det2[2]) 127 | bi3 = min(det[3], det2[3]) 128 | 129 | iw = bi2 - bi0 + 1 130 | ih = bi3 - bi1 + 1 131 | 132 | if not (iw > 0 and ih > 0): 133 | continue 134 | 135 | ua = (det[2] - det[0] + 1) * (det[3] - det[1] + 1) + (det2[2] - det2[0] + 1) * (det2[3] - det2[1] + 1) - iw * ih 136 | ov = iw * ih / ua 137 | 138 | if (ov < thresh): 139 | continue 140 | 141 | acc_box += det2[4] * det2[0:4] 142 | acc_score += det2[4] 143 | 144 | dets_voted[i][0:4] = acc_box / acc_score 145 | dets_voted[i][4] = det[4] # Keep the original score 146 | 147 | return dets_voted 148 | -------------------------------------------------------------------------------- /v2xvit/utils/common_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Common utilities 3 | """ 4 | 5 | import numpy as np 6 | import torch 7 | from shapely.geometry import Polygon 8 | 9 | 10 | def check_numpy_to_torch(x): 11 | if isinstance(x, np.ndarray): 12 | return torch.from_numpy(x).float(), True 13 | return x, False 14 | 15 | 16 | def check_contain_nan(x): 17 | if isinstance(x, dict): 18 | return any(check_contain_nan(v) for k, v in x.items()) 19 | if isinstance(x, list): 20 | return any(check_contain_nan(itm) for itm in x) 21 | if isinstance(x, int) or isinstance(x, float): 22 | return False 23 | if isinstance(x, np.ndarray): 24 | return np.any(np.isnan(x)) 25 | return torch.any(x.isnan()).detach().cpu().item() 26 | 27 | 28 | def rotate_points_along_z(points, angle): 29 | """ 30 | Args: 31 | points: (B, N, 3 + C) 32 | angle: (B), radians, angle along z-axis, angle increases x ==> y 33 | Returns: 34 | 35 | """ 36 | points, is_numpy = check_numpy_to_torch(points) 37 | angle, _ = check_numpy_to_torch(angle) 38 | 39 | cosa = torch.cos(angle) 40 | sina = torch.sin(angle) 41 | zeros = angle.new_zeros(points.shape[0]) 42 | ones = angle.new_ones(points.shape[0]) 43 | rot_matrix = torch.stack(( 44 | cosa, sina, zeros, 45 | -sina, cosa, zeros, 46 | zeros, zeros, ones 47 | ), dim=1).view(-1, 3, 3).float() 48 | points_rot = torch.matmul(points[:, :, 0:3].float(), rot_matrix) 49 | points_rot = torch.cat((points_rot, points[:, :, 3:]), dim=-1) 50 | return points_rot.numpy() if is_numpy else points_rot 51 | 52 | 53 | def rotate_points_along_z_2d(points, angle): 54 | """ 55 | Rorate the points along z-axis. 56 | Parameters 57 | ---------- 58 | points : torch.Tensor / np.ndarray 59 | (N, 2). 60 | angle : torch.Tensor / np.ndarray 61 | (N,) 62 | 63 | Returns 64 | ------- 65 | points_rot : torch.Tensor / np.ndarray 66 | Rorated points with shape (N, 2) 67 | 68 | """ 69 | points, is_numpy = check_numpy_to_torch(points) 70 | angle, _ = check_numpy_to_torch(angle) 71 | cosa = torch.cos(angle) 72 | sina = torch.sin(angle) 73 | # (N, 2, 2) 74 | rot_matrix = torch.stack((cosa, sina, -sina, cosa), dim=1).view(-1, 2, 75 | 2).float() 76 | points_rot = torch.einsum("ik, ikj->ij", points.float(), rot_matrix) 77 | return points_rot.numpy() if is_numpy else points_rot 78 | 79 | 80 | def remove_ego_from_objects(objects, ego_id): 81 | """ 82 | Avoid adding ego vehicle to the object dictionary. 83 | 84 | Parameters 85 | ---------- 86 | objects : dict 87 | The dictionary contained all objects. 88 | 89 | ego_id : int 90 | Ego id. 91 | """ 92 | if ego_id in objects: 93 | del objects[ego_id] 94 | 95 | 96 | def retrieve_ego_id(base_data_dict): 97 | """ 98 | Retrieve the ego vehicle id from sample(origin format). 99 | 100 | Parameters 101 | ---------- 102 | base_data_dict : dict 103 | Data sample in origin format. 104 | 105 | Returns 106 | ------- 107 | ego_id : str 108 | The id of ego vehicle. 109 | """ 110 | ego_id = None 111 | 112 | for cav_id, cav_content in base_data_dict.items(): 113 | if cav_content['ego']: 114 | ego_id = cav_id 115 | break 116 | return ego_id 117 | 118 | 119 | def compute_iou(box, boxes): 120 | """ 121 | Compute iou between box and boxes list 122 | Parameters 123 | ---------- 124 | box : shapely.geometry.Polygon 125 | Bounding box Polygon. 126 | 127 | boxes : list 128 | List of shapely.geometry.Polygon. 129 | 130 | Returns 131 | ------- 132 | iou : np.ndarray 133 | Array of iou between box and boxes. 134 | 135 | """ 136 | # Calculate intersection areas 137 | iou = [box.intersection(b).area / box.union(b).area for b in boxes] 138 | 139 | return np.array(iou, dtype=np.float32) 140 | 141 | 142 | def convert_format(boxes_array): 143 | """ 144 | Convert boxes array to shapely.geometry.Polygon format. 145 | Parameters 146 | ---------- 147 | boxes_array : np.ndarray 148 | (N, 4, 2) or (N, 8, 3). 149 | 150 | Returns 151 | ------- 152 | list of converted shapely.geometry.Polygon object. 153 | 154 | """ 155 | polygons = [Polygon([(box[i, 0], box[i, 1]) for i in range(4)]) for box in 156 | boxes_array] 157 | return np.array(polygons) 158 | 159 | 160 | def torch_tensor_to_numpy(torch_tensor): 161 | """ 162 | Convert a torch tensor to numpy. 163 | 164 | Parameters 165 | ---------- 166 | torch_tensor : torch.Tensor 167 | 168 | Returns 169 | ------- 170 | A numpy array. 171 | """ 172 | return torch_tensor.numpy() if not torch_tensor.is_cuda else \ 173 | torch_tensor.cpu().detach().numpy() 174 | -------------------------------------------------------------------------------- /v2xvit/utils/eval_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import torch 5 | 6 | from v2xvit.utils import common_utils 7 | from v2xvit.hypes_yaml import yaml_utils 8 | 9 | 10 | def voc_ap(rec, prec): 11 | """ 12 | VOC 2010 Average Precision. 13 | """ 14 | rec.insert(0, 0.0) 15 | rec.append(1.0) 16 | mrec = rec[:] 17 | 18 | prec.insert(0, 0.0) 19 | prec.append(0.0) 20 | mpre = prec[:] 21 | 22 | for i in range(len(mpre) - 2, -1, -1): 23 | mpre[i] = max(mpre[i], mpre[i + 1]) 24 | 25 | i_list = [] 26 | for i in range(1, len(mrec)): 27 | if mrec[i] != mrec[i - 1]: 28 | i_list.append(i) 29 | 30 | ap = 0.0 31 | for i in i_list: 32 | ap += ((mrec[i] - mrec[i - 1]) * mpre[i]) 33 | return ap, mrec, mpre 34 | 35 | 36 | def caluclate_tp_fp(det_boxes, det_score, gt_boxes, result_stat, iou_thresh): 37 | """ 38 | Calculate the true positive and false positive numbers of the current 39 | frames. 40 | 41 | Parameters 42 | ---------- 43 | det_boxes : torch.Tensor 44 | The detection bounding box, shape (N, 8, 3) or (N, 4, 2). 45 | det_score :torch.Tensor 46 | The confidence score for each preditect bounding box. 47 | gt_boxes : torch.Tensor 48 | The groundtruth bounding box. 49 | result_stat: dict 50 | A dictionary contains fp, tp and gt number. 51 | iou_thresh : float 52 | The iou thresh. 53 | """ 54 | # fp, tp and gt in the current frame 55 | fp = [] 56 | tp = [] 57 | gt = gt_boxes.shape[0] 58 | if det_boxes is not None: 59 | # convert bounding boxes to numpy array 60 | det_boxes = common_utils.torch_tensor_to_numpy(det_boxes) 61 | det_score = common_utils.torch_tensor_to_numpy(det_score) 62 | gt_boxes = common_utils.torch_tensor_to_numpy(gt_boxes) 63 | 64 | # sort the prediction bounding box by score 65 | score_order_descend = np.argsort(-det_score) 66 | det_polygon_list = list(common_utils.convert_format(det_boxes)) 67 | gt_polygon_list = list(common_utils.convert_format(gt_boxes)) 68 | 69 | # match prediction and gt bounding box 70 | for i in range(score_order_descend.shape[0]): 71 | det_polygon = det_polygon_list[score_order_descend[i]] 72 | ious = common_utils.compute_iou(det_polygon, gt_polygon_list) 73 | 74 | if len(gt_polygon_list) == 0 or np.max(ious) < iou_thresh: 75 | fp.append(1) 76 | tp.append(0) 77 | continue 78 | 79 | fp.append(0) 80 | tp.append(1) 81 | 82 | gt_index = np.argmax(ious) 83 | gt_polygon_list.pop(gt_index) 84 | 85 | result_stat[iou_thresh]['fp'] += fp 86 | result_stat[iou_thresh]['tp'] += tp 87 | result_stat[iou_thresh]['gt'] += gt 88 | 89 | 90 | def calculate_ap(result_stat, iou): 91 | """ 92 | Calculate the average precision and recall, and save them into a txt. 93 | 94 | Parameters 95 | ---------- 96 | result_stat : dict 97 | A dictionary contains fp, tp and gt number. 98 | iou : float 99 | """ 100 | iou_5 = result_stat[iou] 101 | 102 | fp = iou_5['fp'] 103 | tp = iou_5['tp'] 104 | assert len(fp) == len(tp) 105 | 106 | gt_total = iou_5['gt'] 107 | 108 | cumsum = 0 109 | for idx, val in enumerate(fp): 110 | fp[idx] += cumsum 111 | cumsum += val 112 | 113 | cumsum = 0 114 | for idx, val in enumerate(tp): 115 | tp[idx] += cumsum 116 | cumsum += val 117 | 118 | rec = tp[:] 119 | for idx, val in enumerate(tp): 120 | rec[idx] = float(tp[idx]) / gt_total 121 | 122 | prec = tp[:] 123 | for idx, val in enumerate(tp): 124 | prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx]) 125 | 126 | ap, mrec, mprec = voc_ap(rec[:], prec[:]) 127 | 128 | return ap, mrec, mprec 129 | 130 | 131 | def eval_final_results(result_stat, save_path): 132 | dump_dict = {} 133 | 134 | ap_30, mrec_30, mpre_30 = calculate_ap(result_stat, 0.30) 135 | ap_50, mrec_50, mpre_50 = calculate_ap(result_stat, 0.50) 136 | ap_70, mrec_70, mpre_70 = calculate_ap(result_stat, 0.70) 137 | 138 | dump_dict.update({'ap30': ap_30, 139 | 'ap_50': ap_50, 140 | 'ap_70': ap_70, 141 | 'mpre_50': mpre_50, 142 | 'mrec_50': mrec_50, 143 | 'mpre_70': mpre_70, 144 | 'mrec_70': mrec_70, 145 | }) 146 | yaml_utils.save_yaml(dump_dict, os.path.join(save_path, 'eval.yaml')) 147 | 148 | print('The Average Precision at IOU 0.3 is %.2f, ' 149 | 'The Average Precision at IOU 0.5 is %.2f, ' 150 | 'The Average Precision at IOU 0.7 is %.2f' % (ap_30, ap_50, ap_70)) 151 | return ap_30,ap_50,ap_70 152 | -------------------------------------------------------------------------------- /v2xvit/utils/pcd_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions related to point cloud 3 | """ 4 | 5 | import open3d as o3d 6 | import numpy as np 7 | 8 | 9 | def pcd_to_np(pcd_file): 10 | """ 11 | Read pcd and return numpy array. 12 | 13 | Parameters 14 | ---------- 15 | pcd_file : str 16 | The pcd file that contains the point cloud. 17 | 18 | Returns 19 | ------- 20 | pcd : o3d.PointCloud 21 | PointCloud object, used for visualization 22 | pcd_np : np.ndarray 23 | The lidar data in numpy format, shape:(n, 4) 24 | 25 | """ 26 | pcd = o3d.io.read_point_cloud(pcd_file) 27 | 28 | xyz = np.asarray(pcd.points) 29 | # we save the intensity in the first channel 30 | intensity = np.expand_dims(np.asarray(pcd.colors)[:, 0], -1) 31 | pcd_np = np.hstack((xyz, intensity)) 32 | 33 | return np.asarray(pcd_np, dtype=np.float32) 34 | 35 | 36 | def mask_points_by_range(points, limit_range): 37 | """ 38 | Remove the lidar points out of the boundary. 39 | 40 | Parameters 41 | ---------- 42 | points : np.ndarray 43 | Lidar points under lidar sensor coordinate system. 44 | 45 | limit_range : list 46 | [x_min, y_min, z_min, x_max, y_max, z_max] 47 | 48 | Returns 49 | ------- 50 | points : np.ndarray 51 | Filtered lidar points. 52 | """ 53 | 54 | mask = (points[:, 0] > limit_range[0]) & (points[:, 0] < limit_range[3])\ 55 | & (points[:, 1] > limit_range[1]) & ( 56 | points[:, 1] < limit_range[4]) \ 57 | & (points[:, 2] > limit_range[2]) & ( 58 | points[:, 2] < limit_range[5]) 59 | 60 | points = points[mask] 61 | 62 | return points 63 | 64 | 65 | def mask_ego_points(points): 66 | """ 67 | Remove the lidar points of the ego vehicle itself. 68 | 69 | Parameters 70 | ---------- 71 | points : np.ndarray 72 | Lidar points under lidar sensor coordinate system. 73 | 74 | Returns 75 | ------- 76 | points : np.ndarray 77 | Filtered lidar points. 78 | """ 79 | mask = (points[:, 0] >= -1.95) & (points[:, 0] <= 2.95) \ 80 | & (points[:, 1] >= -1.1) & (points[:, 1] <= 1.1) 81 | points = points[np.logical_not(mask)] 82 | 83 | return points 84 | 85 | 86 | def shuffle_points(points): 87 | shuffle_idx = np.random.permutation(points.shape[0]) 88 | points = points[shuffle_idx] 89 | 90 | return points 91 | 92 | 93 | def lidar_project(lidar_data, extrinsic): 94 | """ 95 | Given the extrinsic matrix, project lidar data to another space. 96 | 97 | Parameters 98 | ---------- 99 | lidar_data : np.ndarray 100 | Lidar data, shape: (n, 4) 101 | 102 | extrinsic : np.ndarray 103 | Extrinsic matrix, shape: (4, 4) 104 | 105 | Returns 106 | ------- 107 | projected_lidar : np.ndarray 108 | Projected lida data, shape: (n, 4) 109 | """ 110 | 111 | lidar_xyz = lidar_data[:, :3].T 112 | # (3, n) -> (4, n), homogeneous transformation 113 | lidar_xyz = np.r_[lidar_xyz, [np.ones(lidar_xyz.shape[1])]] 114 | lidar_int = lidar_data[:, 3] 115 | 116 | # transform to ego vehicle space, (3, n) 117 | project_lidar_xyz = np.dot(extrinsic, lidar_xyz)[:3, :] 118 | # (n, 3) 119 | project_lidar_xyz = project_lidar_xyz.T 120 | # concatenate the intensity with xyz, (n, 4) 121 | projected_lidar = np.hstack((project_lidar_xyz, 122 | np.expand_dims(lidar_int, -1))) 123 | 124 | return projected_lidar 125 | 126 | 127 | def projected_lidar_stack(projected_lidar_list): 128 | """ 129 | Stack all projected lidar together. 130 | 131 | Parameters 132 | ---------- 133 | projected_lidar_list : list 134 | The list containing all projected lidar. 135 | 136 | Returns 137 | ------- 138 | stack_lidar : np.ndarray 139 | Stack all projected lidar data together. 140 | """ 141 | stack_lidar = [] 142 | for lidar_data in projected_lidar_list: 143 | stack_lidar.append(lidar_data) 144 | 145 | return np.vstack(stack_lidar) 146 | 147 | 148 | def downsample_lidar(pcd_np, num): 149 | """ 150 | Downsample the lidar points to a certain number. 151 | 152 | Parameters 153 | ---------- 154 | pcd_np : np.ndarray 155 | The lidar points, (n, 4). 156 | 157 | num : int 158 | The downsample target number. 159 | 160 | Returns 161 | ------- 162 | pcd_np : np.ndarray 163 | The downsampled lidar points. 164 | """ 165 | assert pcd_np.shape[0] >= num 166 | 167 | selected_index = np.random.choice((pcd_np.shape[0]), 168 | num, 169 | replace=False) 170 | pcd_np = pcd_np[selected_index] 171 | 172 | return pcd_np 173 | 174 | 175 | def downsample_lidar_minimum(pcd_np_list): 176 | """ 177 | Given a list of pcd, find the minimum number and downsample all 178 | point clouds to the minimum number. 179 | 180 | Parameters 181 | ---------- 182 | pcd_np_list : list 183 | A list of pcd numpy array(n, 4). 184 | Returns 185 | ------- 186 | pcd_np_list : list 187 | Downsampled point clouds. 188 | """ 189 | minimum = np.Inf 190 | 191 | for i in range(len(pcd_np_list)): 192 | num = pcd_np_list[i].shape[0] 193 | minimum = num if minimum > num else minimum 194 | 195 | for (i, pcd_np) in enumerate(pcd_np_list): 196 | pcd_np_list[i] = downsample_lidar(pcd_np, minimum) 197 | 198 | return pcd_np_list 199 | -------------------------------------------------------------------------------- /v2xvit/utils/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | import numpy 4 | setup( 5 | name='box overlaps', 6 | ext_modules=cythonize('v2xvit/utils/box_overlaps.pyx'), 7 | include_dirs=[numpy.get_include()] 8 | ) -------------------------------------------------------------------------------- /v2xvit/utils/transformation_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Transformation utils 3 | """ 4 | 5 | import numpy as np 6 | 7 | 8 | def x_to_world(pose): 9 | """ 10 | The transformation matrix from x-coordinate system to carla world system 11 | 12 | Parameters 13 | ---------- 14 | pose : list 15 | [x, y, z, roll, yaw, pitch] 16 | 17 | Returns 18 | ------- 19 | matrix : np.ndarray 20 | The transformation matrix. 21 | """ 22 | x, y, z, roll, yaw, pitch = pose[:] 23 | 24 | # used for rotation matrix 25 | c_y = np.cos(np.radians(yaw)) 26 | s_y = np.sin(np.radians(yaw)) 27 | c_r = np.cos(np.radians(roll)) 28 | s_r = np.sin(np.radians(roll)) 29 | c_p = np.cos(np.radians(pitch)) 30 | s_p = np.sin(np.radians(pitch)) 31 | 32 | matrix = np.identity(4) 33 | # translation matrix 34 | matrix[0, 3] = x 35 | matrix[1, 3] = y 36 | matrix[2, 3] = z 37 | 38 | # rotation matrix 39 | matrix[0, 0] = c_p * c_y 40 | matrix[0, 1] = c_y * s_p * s_r - s_y * c_r 41 | matrix[0, 2] = -c_y * s_p * c_r - s_y * s_r 42 | matrix[1, 0] = s_y * c_p 43 | matrix[1, 1] = s_y * s_p * s_r + c_y * c_r 44 | matrix[1, 2] = -s_y * s_p * c_r + c_y * s_r 45 | matrix[2, 0] = s_p 46 | matrix[2, 1] = -c_p * s_r 47 | matrix[2, 2] = c_p * c_r 48 | 49 | return matrix 50 | 51 | 52 | def x1_to_x2(x1, x2): 53 | """ 54 | Transformation matrix from x1 to x2. 55 | 56 | Parameters 57 | ---------- 58 | x1 : list 59 | The pose of x1 under world coordinates. 60 | x2 : list 61 | The pose of x2 under world coordinates. 62 | 63 | Returns 64 | ------- 65 | transformation_matrix : np.ndarray 66 | The transformation matrix. 67 | 68 | """ 69 | x1_to_world = x_to_world(x1) 70 | x2_to_world = x_to_world(x2) 71 | world_to_x2 = np.linalg.inv(x2_to_world) 72 | 73 | transformation_matrix = np.dot(world_to_x2, x1_to_world) 74 | return transformation_matrix 75 | 76 | 77 | def dist_to_continuous(p_dist, displacement_dist, res, downsample_rate): 78 | """ 79 | Convert points discretized format to continuous space for BEV representation. 80 | Parameters 81 | ---------- 82 | p_dist : numpy.array 83 | Points in discretized coorindates. 84 | 85 | displacement_dist : numpy.array 86 | Discretized coordinates of bottom left origin. 87 | 88 | res : float 89 | Discretization resolution. 90 | 91 | downsample_rate : int 92 | Dowmsamping rate. 93 | 94 | Returns 95 | ------- 96 | p_continuous : numpy.array 97 | Points in continuous coorindates. 98 | 99 | """ 100 | p_dist = np.copy(p_dist) 101 | p_dist = p_dist + displacement_dist 102 | p_continuous = p_dist * res * downsample_rate 103 | return p_continuous 104 | -------------------------------------------------------------------------------- /v2xvit/version.py: -------------------------------------------------------------------------------- 1 | """Specifies the current version number of v2xvit.""" 2 | 3 | __version__ = "0.1.0" 4 | -------------------------------------------------------------------------------- /v2xvit/visualization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ydk122024/How2comm/8d357abdcb265ed4db5e4c6d9ee5400ea5799abe/v2xvit/visualization/__init__.py -------------------------------------------------------------------------------- /v2xvit/visualization/pinhole_param.json: -------------------------------------------------------------------------------- 1 | { 2 | "class_name" : "PinholeCameraParameters", 3 | "extrinsic" : 4 | [ 5 | 1.0, 6 | -0.0, 7 | -0.0, 8 | 0.0, 9 | 0.0, 10 | -1.0, 11 | -0.0, 12 | 0.0, 13 | 0.0, 14 | -0.0, 15 | -1.0, 16 | 0.0, 17 | 14.870189666748047, 18 | 0.0001621246337890625, 19 | 141.0903074604017, 20 | 1.0 21 | ], 22 | "intrinsic" : 23 | { 24 | "height" : 1025, 25 | "intrinsic_matrix" : 26 | [ 27 | 887.67603887904966, 28 | 0.0, 29 | 0.0, 30 | 0.0, 31 | 887.67603887904966, 32 | 0.0, 33 | 926.0, 34 | 512.0, 35 | 1.0 36 | ], 37 | "width" : 1853 38 | }, 39 | "version_major" : 1, 40 | "version_minor" : 0 41 | } -------------------------------------------------------------------------------- /v2xvit/visualization/vis_data_sequence.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | from torch.utils.data import DataLoader 4 | 5 | from v2xvit.hypes_yaml.yaml_utils import load_yaml 6 | from v2xvit.visualization import vis_utils 7 | from v2xvit.data_utils.datasets.early_fusion_vis_dataset import \ 8 | EarlyFusionVisDataset 9 | 10 | 11 | def vis_parser(): 12 | parser = argparse.ArgumentParser(description="data visualization") 13 | parser.add_argument('--color_mode', type=str, default="intensity", 14 | help='lidar color rendering mode, e.g. intensity,' 15 | 'z-value or constant.') 16 | opt = parser.parse_args() 17 | return opt 18 | 19 | 20 | if __name__ == '__main__': 21 | current_path = os.path.dirname(os.path.realpath(__file__)) 22 | params = load_yaml(os.path.join(current_path, 23 | '../hypes_yaml/visualization.yaml')) 24 | 25 | opencda_dataset = EarlyFusionVisDataset(params, visualize=True, 26 | train=False) 27 | data_loader = DataLoader(opencda_dataset, batch_size=1, num_workers=8, 28 | collate_fn=opencda_dataset.collate_batch_train, 29 | shuffle=False, 30 | pin_memory=False) 31 | 32 | opt = vis_parser() 33 | vis_utils.visualize_sequence_dataloader(data_loader, 34 | params['postprocess']['order'], 35 | color_mode=opt.color_mode) 36 | --------------------------------------------------------------------------------