├── F2BEV_code ├── .DS_Store ├── F2BEV │ ├── .DS_Store │ ├── pre_computation │ │ ├── .DS_Store │ │ ├── unity_data │ │ │ ├── bev_mask.npy │ │ │ └── reference_points_cam.npy │ │ ├── forward_looking_camera_model │ │ │ ├── data │ │ │ │ ├── bev │ │ │ │ │ └── 0.png │ │ │ │ ├── front │ │ │ │ │ └── 0.png │ │ │ │ ├── left │ │ │ │ │ └── 0.png │ │ │ │ ├── rear │ │ │ │ │ └── 0.png │ │ │ │ ├── right │ │ │ │ │ └── 0.png │ │ │ │ └── seg │ │ │ │ │ ├── BEV_0_seg.png │ │ │ │ │ ├── left_0_seg.png │ │ │ │ │ ├── rear_0_seg.png │ │ │ │ │ ├── front_0_seg.png │ │ │ │ │ └── right_0_seg.png │ │ │ ├── masks │ │ │ │ ├── front.npy │ │ │ │ ├── left.npy │ │ │ │ ├── rear.npy │ │ │ │ └── right.npy │ │ │ └── flcw_unity.yml │ │ └── computeNormalizedReferencePoints.py │ ├── bblocks │ │ ├── __pycache__ │ │ │ ├── ffn.cpython-37.pyc │ │ │ ├── bifpn.cpython-37.pyc │ │ │ ├── backbone.cpython-37.pyc │ │ │ ├── cnndecoder.cpython-37.pyc │ │ │ ├── bifpn_configs.cpython-37.pyc │ │ │ ├── backbone_bifpn.cpython-37.pyc │ │ │ ├── bevformer_block.cpython-37.pyc │ │ │ ├── encoder_height.cpython-37.pyc │ │ │ ├── deformable_attention.cpython-37.pyc │ │ │ ├── mask_head_decoder_seg.cpython-37.pyc │ │ │ ├── positional_encoding.cpython-37.pyc │ │ │ ├── mask_head_decoder_htseg.cpython-37.pyc │ │ │ ├── mask_head_pansegformer.cpython-37.pyc │ │ │ ├── spatial_cross_attention.cpython-37.pyc │ │ │ ├── temporal_self_attention.cpython-37.pyc │ │ │ ├── mask_head_decoder_height.cpython-37.pyc │ │ │ └── deformable_attention_function.cpython-37.pyc │ │ ├── ffn.py │ │ ├── backbone_bifpn.py │ │ ├── mask_head_decoder_height.py │ │ ├── mask_head_decoder_seg.py │ │ ├── backbone.py │ │ ├── bevformer_block.py │ │ ├── mask_head_decoder_htseg.py │ │ ├── positional_encoding.py │ │ ├── deformable_attention_function.py │ │ ├── spatial_cross_attention.py │ │ ├── cnndecoder.py │ │ ├── bifpn_configs.py │ │ └── deformable_attention.py │ ├── losses │ │ ├── __pycache__ │ │ │ ├── focal.cpython-37.pyc │ │ │ ├── functional.cpython-37.pyc │ │ │ └── smoothness.cpython-37.pyc │ │ ├── smoothness.py │ │ └── focal.py │ ├── model_f2bev_conv_st_seg.py │ ├── model_f2bev_conv_st_height.py │ ├── model_f2bev_attn_st_seg.py │ ├── model_f2bev_attn_st_height.py │ ├── model_f2bev_conv_mt.py │ ├── model_f2bev_attn_mt.py │ ├── test_f2bev_conv_st_height.py │ ├── test_f2bev_attn_st_height.py │ ├── test_f2bev_conv_st_seg.py │ ├── test_f2bev_attn_st_seg.py │ ├── loader_multi_task.py │ ├── test_loader_multi_task.py │ ├── test_f2bev_attn_mt.py │ ├── test_f2bev_conv_mt.py │ ├── loader_single_task.py │ ├── test_loader_single_task.py │ ├── train_f2bev_conv_st_height.py │ ├── train_f2bev_attn_st_height.py │ ├── train_f2bev_conv_st_seg.py │ └── train_f2bev_attn_st_seg.py ├── README.md └── f2bev_conda_env.yml ├── README.md └── FB-SSEM_dataset └── README.md /F2BEV_code/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/.DS_Store -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/.DS_Store -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/.DS_Store -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/ffn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/ffn.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/bifpn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/bifpn.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/losses/__pycache__/focal.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/losses/__pycache__/focal.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/unity_data/bev_mask.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/unity_data/bev_mask.npy -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/backbone.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/backbone.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/cnndecoder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/cnndecoder.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/losses/__pycache__/functional.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/losses/__pycache__/functional.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/losses/__pycache__/smoothness.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/losses/__pycache__/smoothness.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/bifpn_configs.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/bifpn_configs.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/backbone_bifpn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/backbone_bifpn.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/bevformer_block.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/bevformer_block.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/encoder_height.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/encoder_height.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/unity_data/reference_points_cam.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/unity_data/reference_points_cam.npy -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/deformable_attention.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/deformable_attention.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_decoder_seg.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_decoder_seg.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/positional_encoding.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/positional_encoding.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_decoder_htseg.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_decoder_htseg.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_pansegformer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_pansegformer.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/spatial_cross_attention.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/spatial_cross_attention.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/temporal_self_attention.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/temporal_self_attention.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_decoder_height.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_decoder_height.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/bev/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/bev/0.png -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/front/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/front/0.png -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/left/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/left/0.png -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/rear/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/rear/0.png -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/right/0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/right/0.png -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/front.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/front.npy -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/left.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/left.npy -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/rear.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/rear.npy -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/right.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/right.npy -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/__pycache__/deformable_attention_function.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/deformable_attention_function.cpython-37.pyc -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/BEV_0_seg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/BEV_0_seg.png -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/left_0_seg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/left_0_seg.png -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/rear_0_seg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/rear_0_seg.png -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/front_0_seg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/front_0_seg.png -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/right_0_seg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/right_0_seg.png -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/flcw_unity.yml: -------------------------------------------------------------------------------- 1 | %YAML:1.0 2 | --- 3 | K: !!opencv-matrix 4 | rows: 3 5 | cols: 3 6 | dt: d 7 | data: [ 659.9565405462982, -2.8848508379788056, 634.6329612029243, 0.0, 625.1032520893773, 544.7433055928482, 0., 0., 1. ] 8 | D: !!opencv-matrix 9 | rows: 1 10 | cols: 4 11 | dt: d 12 | data: [ -0.2900269437421997, 0.11089496468175668, -0.0003222479159157141, 0.0029110573007121382] 13 | xi: !!opencv-matrix 14 | rows: 1 15 | cols: 1 16 | dt: d 17 | data: [ 1.0866311153248236 ] 18 | board_width: 9 19 | board_height: 6 20 | square_size: 2.4229999631643295e-02 21 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/ffn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Aug 30 17:10:39 2022 5 | 6 | @author: Ekta 7 | 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | class FFN(nn.Module): 14 | def __init__(self): 15 | super(FFN, self).__init__() 16 | self.layers = nn.Sequential( 17 | nn.Linear(in_features = 256, out_features = 512, bias=True), 18 | nn.ReLU(inplace=True), 19 | nn.Dropout(p=0.2,inplace=False), 20 | nn.Linear(in_features = 512, out_features = 256, bias=True), 21 | nn.Dropout(p=0.2,inplace=False)) 22 | 23 | def forward(self,x): 24 | x = self.layers(x) 25 | return x 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # F2BEV 2 | 3 | F2BEV is a network for Bird's Eye View (BEV) generation from surround-view fisheye camera images for automated driving. 4 | 5 | Please navigate to the ```F2BEV_code``` folder in this repository for details 6 | 7 | 8 | # FB-SSEM-dataset 9 | 10 | The FB-SSEM dataset is a synthetic dataset consisting of surround-view fisheye camera images and BEV maps from simulated sequences of ego car motion. Please navigate to the ```FB-SSEM_dataset``` folder in this repository for details 11 | 12 | ## Citation 13 | If find our dataset or code beneficial, please cite the [F2BEV paper](https://arxiv.org/abs/2303.03651) 14 | 15 | ```bash 16 | @article{samani2023f2bev, 17 | title={F2BEV: Bird's Eye View Generation from Surround-View Fisheye Camera Images for Automated Driving}, 18 | author={Samani, Ekta U and Tao, Feng and Dasari, Harshavardhan R and Ding, Sihao and Banerjee, Ashis G}, 19 | journal={arXiv preprint arXiv:2303.03651}, 20 | year={2023}} 21 | ``` 22 | 23 | 24 | ## Contact 25 | Harshavardhan R. Dasari 26 | mail : harshavardhan.reddy.dasari@volvocars.com 27 | Ekta Samani 28 | mail : eusamani@gmail.com -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/backbone_bifpn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Wed Dec 28 10:51:01 2022 3 | 4 | @author: Ekta 5 | """ 6 | 7 | 8 | import torch 9 | import torch.nn as nn 10 | from collections import OrderedDict 11 | import timm 12 | from typing import Callable 13 | from .bifpn import BiFpn 14 | #from functools import partial 15 | #from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork 16 | 17 | def get_feature_info(backbone): 18 | if isinstance(backbone.feature_info, Callable): 19 | # old accessor for timm versions <= 0.1.30, efficientnet and mobilenetv3 and related nets only 20 | feature_info = [dict(num_chs=f['num_chs'], reduction=f['reduction']) 21 | for i, f in enumerate(backbone.feature_info())] 22 | else: 23 | # new feature info accessor, timm >= 0.2, all models supported 24 | feature_info = backbone.feature_info.get_dicts(keys=['num_chs', 'reduction']) 25 | return feature_info 26 | 27 | 28 | 29 | class ResNet34BiFPN(nn.Module): 30 | def __init__(self): 31 | super(ResNet34BiFPN,self).__init__() 32 | self.backbone = timm.create_model( 33 | 'resnet34', features_only=True, 34 | out_indices= (1, 2, 3, 4), 35 | pretrained=True) 36 | feature_info = get_feature_info(self.backbone) 37 | self.fpn = BiFpn(feature_info) 38 | 39 | 40 | def forward(self, x): 41 | x = self.backbone(x) 42 | x = self.fpn(x) 43 | return x 44 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/mask_head_decoder_height.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Jan 17 12:06:48 2023 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | from bblocks.mask_head_pansegformer import MaskHead 12 | 13 | class MaskHeadDecoder(nn.Module): 14 | def __init__(self): 15 | super().__init__() 16 | self.bev_h = 50 17 | self.bev_w = 50 18 | self.num_stuff_classes = 3 19 | self.embed_dims = 256 20 | self.stuff_query = nn.Embedding(self.num_stuff_classes, 21 | self.embed_dims * 2) 22 | self.stuff_mask_head = MaskHead(num_decoder_layers=3,self_attn=True) 23 | 24 | 25 | # self._reset_parameters() 26 | 27 | # def _reset_parameters(self): 28 | 29 | 30 | def forward(self,bev_embed): 31 | stuff_query, stuff_query_pos = torch.split(self.stuff_query.weight,self.embed_dims,dim=1) 32 | bs = bev_embed.shape[0] 33 | stuff_query_pos = stuff_query_pos.unsqueeze(0).expand(bs, -1, -1) 34 | stuff_query = stuff_query.unsqueeze(0).expand(bs, -1, -1) 35 | hw_lvl = torch.tensor([[self.bev_h, self.bev_w]], device=stuff_query.device) 36 | 37 | attn, masks, inter_query = self.stuff_mask_head(bev_embed,None,None,stuff_query, None, stuff_query_pos, 38 | hw_lvl) 39 | 40 | mask_stuff = attn.squeeze(-1) 41 | 42 | mask_stuff = mask_stuff.reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1]) 43 | 44 | inter_masks = [m.squeeze(-1).reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1]) for m in masks] 45 | 46 | return mask_stuff, inter_masks 47 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/mask_head_decoder_seg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Jan 17 12:06:48 2023 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | from bblocks.mask_head_pansegformer import MaskHead 12 | 13 | class MaskHeadDecoder(nn.Module): 14 | def __init__(self): 15 | super().__init__() 16 | self.bev_h = 50 17 | self.bev_w = 50 18 | self.num_stuff_classes = 5 19 | self.embed_dims = 256 20 | self.stuff_query = nn.Embedding(self.num_stuff_classes, 21 | self.embed_dims * 2) 22 | self.stuff_mask_head = MaskHead(num_decoder_layers=3,self_attn=True) 23 | 24 | 25 | # self._reset_parameters() 26 | 27 | # def _reset_parameters(self): 28 | 29 | 30 | def forward(self,bev_embed): 31 | stuff_query, stuff_query_pos = torch.split(self.stuff_query.weight,self.embed_dims,dim=1) 32 | bs = bev_embed.shape[0] 33 | stuff_query_pos = stuff_query_pos.unsqueeze(0).expand(bs, -1, -1) 34 | stuff_query = stuff_query.unsqueeze(0).expand(bs, -1, -1) 35 | hw_lvl = torch.tensor([[self.bev_h, self.bev_w]], device=stuff_query.device) 36 | 37 | attn, masks, inter_query = self.stuff_mask_head(bev_embed,None,None,stuff_query, None, stuff_query_pos, 38 | hw_lvl) 39 | 40 | mask_stuff = attn.squeeze(-1) 41 | 42 | mask_stuff = mask_stuff.reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1]) 43 | 44 | inter_masks = [m.squeeze(-1).reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1]) for m in masks] 45 | 46 | return mask_stuff, inter_masks 47 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/backbone.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Aug 29 15:19:36 2022 5 | 6 | @author: Ekta 7 | """ 8 | ## From: https://pytorch.org/vision/stable/feature_extraction.html 9 | import torch 10 | from torchvision.models import resnet50,resnet34 ,ResNet50_Weights, resnet34, ResNet34_Weights 11 | from torchvision.models.feature_extraction import create_feature_extractor 12 | from torchvision.models.detection.backbone_utils import LastLevelMaxPool 13 | from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork 14 | 15 | 16 | 17 | class Resnet34WithFPN(torch.nn.Module): 18 | def __init__(self): 19 | super(Resnet34WithFPN, self).__init__() 20 | # Get a resnet50 backbone 21 | #m = resnet50() 22 | #m = resnet50(weights=ResNet50_Weights.DEFAULT) 23 | m = resnet34(weights=ResNet34_Weights.DEFAULT) 24 | #m = resnet34() 25 | # Extract 4 main layers (note: MaskRCNN needs this particular name 26 | # mapping for return nodes) 27 | # print(resnet34) 28 | self.body = create_feature_extractor( 29 | m, return_nodes={f'layer{k}': str(v) 30 | for v, k in enumerate([1, 2, 3, 4])}) 31 | inp = torch.randn(1, 3, 540, 640) 32 | with torch.no_grad(): 33 | out = self.body(inp) 34 | in_channels_list = [o.shape[1] for o in out.values()] 35 | #print(in_channels_list) 36 | # # Build FPN 37 | self.out_channels = 256 38 | # self.fpn = FeaturePyramidNetwork( 39 | # in_channels_list, out_channels=self.out_channels, 40 | # extra_blocks=LastLevelMaxPool()) 41 | self.fpn = FeaturePyramidNetwork( 42 | in_channels_list, out_channels=self.out_channels) 43 | def forward(self, x): 44 | x = self.body(x) 45 | #print(x.keys(),x['0'].shape) 46 | x = self.fpn(x) 47 | return x 48 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/losses/smoothness.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Nov 23 16:45:22 2022 5 | 6 | @author: Ekta 7 | """ 8 | 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | 14 | class MonodepthLoss(nn.modules.Module): 15 | def __init__(self): 16 | super(MonodepthLoss, self).__init__() 17 | 18 | def gradient_x(self,img): 19 | # Pad input to keep output size consistent 20 | img = F.pad(img, (0, 1, 0, 0), mode="replicate") 21 | gx = img[:, :, :, :-1] - img[:, :, :, 1:] # NCHW 22 | #print(gx) 23 | return gx 24 | 25 | def gradient_y(self,img): 26 | # Pad input to keep output size consistent 27 | img = F.pad(img, (0, 0, 0, 1), mode="replicate") 28 | gy = img[:, :, :-1, :] - img[:, :, 1:, :] # NCHW 29 | return gy 30 | 31 | def disp_smoothness_fn(self,disp, img): 32 | disp_gradients_x = self.gradient_x(disp) 33 | disp_gradients_y = self.gradient_y(disp) 34 | 35 | #print(torch.unique(torch.isnan(disp_gradients_x))) 36 | #print(torch.unique(torch.isnan(disp_gradients_y))) 37 | 38 | image_gradients_x = self.gradient_x(img) 39 | image_gradients_y = self.gradient_y(img) 40 | 41 | weight_x = torch.exp(-torch.mean(torch.abs(image_gradients_x), 1, keepdim=True)) 42 | weight_y = torch.exp(-torch.mean(torch.abs(image_gradients_y), 1, keepdim=True)) 43 | 44 | smoothness_x = disp_gradients_x * weight_x 45 | smoothness_y = disp_gradients_y * weight_y 46 | 47 | 48 | return torch.abs(smoothness_x) + torch.abs(smoothness_y) 49 | 50 | 51 | def forward(self, height, seg): 52 | disp = 1/height 53 | #print(torch.unique(torch.isnan(disp))) 54 | disp_smoothness = self.disp_smoothness_fn(disp, seg) 55 | 56 | loss = torch.mean(torch.abs(disp_smoothness)) 57 | 58 | return loss 59 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/model_f2bev_conv_st_seg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Jan 5 14:35:41 2023 5 | 6 | @author: Ekta 7 | """ 8 | 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | import numpy as np 14 | 15 | from bblocks.backbone_bifpn import ResNet34BiFPN 16 | from bblocks.encoder_height import EncoderFLCW 17 | 18 | from bblocks.cnndecoder import DecoderCup, SegmentationHead, HeightHead, HeightMulticlassHead 19 | 20 | class FisheyeBEVFormer(nn.Module): 21 | def __init__(self): 22 | super(FisheyeBEVFormer, self).__init__() 23 | self.backbone = ResNet34BiFPN() 24 | 25 | self.encoder = EncoderFLCW() 26 | self.multiscale = True#False 27 | self.decoder = DecoderCup() 28 | self.segmentation_head = SegmentationHead() 29 | 30 | def forward(self,front,left,rear,right,can_buses,prev_bev=None): 31 | 32 | f_f = self.backbone(front) 33 | f_l = self.backbone(left) 34 | f_re = self.backbone(rear) 35 | f_r = self.backbone(right) 36 | 37 | if self.multiscale: 38 | level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 39 | level1 = torch.cat((f_f[1].unsqueeze(0),f_l[1].unsqueeze(0),f_re[1].unsqueeze(0),f_r[1].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 40 | level2 = torch.cat((f_f[2].unsqueeze(0),f_l[2].unsqueeze(0),f_re[2].unsqueeze(0),f_r[2].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 41 | level3 = torch.cat((f_f[3].unsqueeze(0),f_l[3].unsqueeze(0),f_re[3].unsqueeze(0),f_r[3].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 42 | mlvl_feats = [level0,level1,level2,level3] 43 | else: 44 | 45 | level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 46 | mlvl_feats = level0.unsqueeze(0) 47 | bevfeatures = self.encoder(mlvl_feats,can_buses,prev_bev) 48 | 49 | decoded = self.decoder(bevfeatures) 50 | 51 | output = self.segmentation_head(decoded) 52 | 53 | return output,bevfeatures 54 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/model_f2bev_conv_st_height.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Jan 5 14:35:41 2023 5 | 6 | @author: Ekta 7 | """ 8 | 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | import numpy as np 14 | 15 | from bblocks.backbone_bifpn import ResNet34BiFPN 16 | from bblocks.encoder_height import EncoderFLCW 17 | 18 | from bblocks.cnndecoder import DecoderCup, SegmentationHead, HeightHead, HeightMulticlassHead 19 | 20 | class FisheyeBEVFormer(nn.Module): 21 | def __init__(self): 22 | super(FisheyeBEVFormer, self).__init__() 23 | self.backbone = ResNet34BiFPN() 24 | self.encoder = EncoderFLCW() 25 | self.multiscale = True#False 26 | self.decoder = DecoderCup() 27 | self.height_multiclass_head = HeightMulticlassHead() 28 | 29 | def forward(self,front,left,rear,right,can_buses,prev_bev=None): 30 | 31 | 32 | f_f = self.backbone(front) 33 | f_l = self.backbone(left) 34 | f_re = self.backbone(rear) 35 | f_r = self.backbone(right) 36 | 37 | if self.multiscale: 38 | level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 39 | level1 = torch.cat((f_f[1].unsqueeze(0),f_l[1].unsqueeze(0),f_re[1].unsqueeze(0),f_r[1].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 40 | level2 = torch.cat((f_f[2].unsqueeze(0),f_l[2].unsqueeze(0),f_re[2].unsqueeze(0),f_r[2].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 41 | level3 = torch.cat((f_f[3].unsqueeze(0),f_l[3].unsqueeze(0),f_re[3].unsqueeze(0),f_r[3].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 42 | mlvl_feats = [level0,level1,level2,level3] 43 | else: 44 | 45 | level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 46 | mlvl_feats = level0.unsqueeze(0) 47 | bevfeatures = self.encoder(mlvl_feats,can_buses,prev_bev) 48 | 49 | decoded = self.decoder(bevfeatures) 50 | output = self.height_multiclass_head(decoded) 51 | 52 | return output,bevfeatures 53 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/model_f2bev_attn_st_seg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jan 9 17:07:00 2023 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | from bblocks.backbone import Resnet34WithFPN 13 | 14 | from bblocks.encoder_height import EncoderFLCW 15 | from bblocks.mask_head_decoder_seg import MaskHeadDecoder 16 | 17 | class FisheyeBEVFormer(nn.Module): 18 | def __init__(self): 19 | super(FisheyeBEVFormer, self).__init__() 20 | self.backbone = Resnet34WithFPN() 21 | 22 | self.encoder = EncoderFLCW() 23 | self.multiscale = True #False 24 | self.decoder = MaskHeadDecoder() 25 | self.deep_supervision = True 26 | 27 | def forward(self,front,left,rear,right,can_buses,prev_bev=None): 28 | 29 | 30 | f_f = self.backbone(front) 31 | f_l = self.backbone(left) 32 | f_re = self.backbone(rear) 33 | f_r = self.backbone(right) 34 | 35 | if self.multiscale: 36 | level0 = torch.cat((f_f['0'].unsqueeze(0),f_l['0'].unsqueeze(0),f_re['0'].unsqueeze(0),f_r['0'].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 37 | level1 = torch.cat((f_f['1'].unsqueeze(0),f_l['1'].unsqueeze(0),f_re['1'].unsqueeze(0),f_r['1'].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 38 | level2 = torch.cat((f_f['2'].unsqueeze(0),f_l['2'].unsqueeze(0),f_re['2'].unsqueeze(0),f_r['2'].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 39 | level3 = torch.cat((f_f['3'].unsqueeze(0),f_l['3'].unsqueeze(0),f_re['3'].unsqueeze(0),f_r['3'].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 40 | mlvl_feats = [level0,level1,level2,level3] 41 | 42 | else: 43 | 44 | level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 45 | mlvl_feats = level0.unsqueeze(0) 46 | 47 | bevfeatures = self.encoder(mlvl_feats,can_buses,prev_bev) 48 | output, inter_masks = self.decoder(bevfeatures) 49 | 50 | if self.deep_supervision: 51 | return output, inter_masks, bevfeatures 52 | else: 53 | return output,bevfeatures 54 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/model_f2bev_attn_st_height.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jan 9 17:07:00 2023 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | from bblocks.backbone import Resnet34WithFPN 13 | 14 | from bblocks.encoder_height import EncoderFLCW 15 | from bblocks.mask_head_decoder_height import MaskHeadDecoder 16 | 17 | class FisheyeBEVFormer(nn.Module): 18 | def __init__(self): 19 | super(FisheyeBEVFormer, self).__init__() 20 | self.backbone = Resnet34WithFPN() 21 | 22 | self.encoder = EncoderFLCW() 23 | self.multiscale = True #False 24 | self.decoder = MaskHeadDecoder() 25 | self.deep_supervision = True 26 | 27 | def forward(self,front,left,rear,right,can_buses,prev_bev=None): 28 | 29 | 30 | f_f = self.backbone(front) 31 | f_l = self.backbone(left) 32 | f_re = self.backbone(rear) 33 | f_r = self.backbone(right) 34 | 35 | if self.multiscale: 36 | level0 = torch.cat((f_f['0'].unsqueeze(0),f_l['0'].unsqueeze(0),f_re['0'].unsqueeze(0),f_r['0'].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 37 | level1 = torch.cat((f_f['1'].unsqueeze(0),f_l['1'].unsqueeze(0),f_re['1'].unsqueeze(0),f_r['1'].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 38 | level2 = torch.cat((f_f['2'].unsqueeze(0),f_l['2'].unsqueeze(0),f_re['2'].unsqueeze(0),f_r['2'].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 39 | level3 = torch.cat((f_f['3'].unsqueeze(0),f_l['3'].unsqueeze(0),f_re['3'].unsqueeze(0),f_r['3'].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 40 | mlvl_feats = [level0,level1,level2,level3] 41 | 42 | else: 43 | 44 | level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 45 | mlvl_feats = level0.unsqueeze(0) 46 | 47 | bevfeatures = self.encoder(mlvl_feats,can_buses,prev_bev) 48 | output, inter_masks = self.decoder(bevfeatures) 49 | 50 | if self.deep_supervision: 51 | return output, inter_masks, bevfeatures 52 | else: 53 | return output,bevfeatures 54 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/model_f2bev_conv_mt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Jan 6 10:45:19 2023 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | import numpy as np 13 | 14 | from bblocks.backbone_bifpn import ResNet34BiFPN 15 | 16 | from bblocks.encoder_height import EncoderFLCW 17 | 18 | 19 | from bblocks.cnndecoder import DecoderCup, SegmentationHead, HeightHead, HeightMulticlassHead 20 | 21 | class FisheyeBEVFormer(nn.Module): 22 | def __init__(self): 23 | super(FisheyeBEVFormer, self).__init__() 24 | self.backbone = ResNet34BiFPN() 25 | 26 | self.encoder = EncoderFLCW() 27 | self.multiscale = True #False 28 | self.decoder = DecoderCup() 29 | self.segmentation_head = SegmentationHead(out_channels=5) 30 | self.height_multiclass_head = HeightMulticlassHead(out_channels=3) 31 | 32 | def forward(self,front,left,rear,right,can_buses,prev_bev=None): 33 | 34 | f_f = self.backbone(front) 35 | f_l = self.backbone(left) 36 | f_re = self.backbone(rear) 37 | f_r = self.backbone(right) 38 | 39 | if self.multiscale: 40 | level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 41 | level1 = torch.cat((f_f[1].unsqueeze(0),f_l[1].unsqueeze(0),f_re[1].unsqueeze(0),f_r[1].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 42 | level2 = torch.cat((f_f[2].unsqueeze(0),f_l[2].unsqueeze(0),f_re[2].unsqueeze(0),f_r[2].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 43 | level3 = torch.cat((f_f[3].unsqueeze(0),f_l[3].unsqueeze(0),f_re[3].unsqueeze(0),f_r[3].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 44 | mlvl_feats = [level0,level1,level2,level3] 45 | else: 46 | 47 | 48 | level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 49 | mlvl_feats = level0.unsqueeze(0) 50 | 51 | bevfeatures = self.encoder(mlvl_feats,can_buses,prev_bev) 52 | 53 | decoded = self.decoder(bevfeatures) 54 | 55 | soutput = self.segmentation_head(decoded) 56 | houtput = self.height_multiclass_head(decoded) 57 | 58 | return soutput,houtput,bevfeatures 59 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/model_f2bev_attn_mt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Feb 23 12:03:40 2023 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | from bblocks.backbone import Resnet34WithFPN 13 | 14 | from bblocks.encoder_height import EncoderFLCW 15 | from bblocks.mask_head_decoder_htseg import MaskHeadDecoderHt, MaskHeadDecoderSeg 16 | 17 | 18 | class FisheyeBEVFormer(nn.Module): 19 | def __init__(self): 20 | super(FisheyeBEVFormer, self).__init__() 21 | self.backbone = Resnet34WithFPN() 22 | 23 | self.encoder = EncoderFLCW() 24 | self.multiscale = True #False 25 | self.segdecoder = MaskHeadDecoderSeg() 26 | self.htdecoder = MaskHeadDecoderHt() 27 | self.deep_supervision = True 28 | 29 | def forward(self,front,left,rear,right,can_buses,prev_bev=None): 30 | 31 | 32 | f_f = self.backbone(front) 33 | f_l = self.backbone(left) 34 | f_re = self.backbone(rear) 35 | f_r = self.backbone(right) 36 | 37 | if self.multiscale: 38 | level0 = torch.cat((f_f['0'].unsqueeze(0),f_l['0'].unsqueeze(0),f_re['0'].unsqueeze(0),f_r['0'].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 39 | level1 = torch.cat((f_f['1'].unsqueeze(0),f_l['1'].unsqueeze(0),f_re['1'].unsqueeze(0),f_r['1'].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 40 | level2 = torch.cat((f_f['2'].unsqueeze(0),f_l['2'].unsqueeze(0),f_re['2'].unsqueeze(0),f_r['2'].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 41 | level3 = torch.cat((f_f['3'].unsqueeze(0),f_l['3'].unsqueeze(0),f_re['3'].unsqueeze(0),f_r['3'].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 42 | mlvl_feats = [level0,level1,level2,level3] 43 | 44 | else: 45 | 46 | level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4) 47 | mlvl_feats = level0.unsqueeze(0) 48 | 49 | bevfeatures = self.encoder(mlvl_feats,can_buses,prev_bev) 50 | seg_output, seg_inter_masks = self.segdecoder(bevfeatures) 51 | ht_output, ht_inter_masks = self.htdecoder(bevfeatures) 52 | 53 | 54 | if self.deep_supervision: 55 | return seg_output, seg_inter_masks, ht_output, ht_inter_masks, bevfeatures 56 | else: 57 | return seg_output, ht_output, bevfeatures 58 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/bevformer_block.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Aug 30 16:47:35 2022 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | from bblocks.spatial_cross_attention import SpatialCrossAttention 12 | from bblocks.temporal_self_attention import TemporalSelfAttention 13 | from bblocks.ffn import FFN 14 | 15 | class BEVFormerBlock(nn.Module): 16 | def __init__(self): 17 | super(BEVFormerBlock, self).__init__() 18 | self.norm = nn.LayerNorm((256,), eps=1e-05, elementwise_affine=True) 19 | self.bev_h = 50 20 | self.bev_w = 50 21 | self.ffn = FFN() 22 | self.sca = SpatialCrossAttention() 23 | self.tsa = TemporalSelfAttention() 24 | 25 | def forward(self,bev_query,key,value,bev_pos,spatial_shapes,level_start_index,reference_points_cam,bev_mask,ref_2d,prev_bev=None): 26 | bs = bev_query.size(0) 27 | #print(spatial_shapes) 28 | #print(reference_points_cam.shape) 29 | 30 | # x = self.sca(query = bev_query, key = key, value = value, query_pos = bev_pos, 31 | # spatial_shapes = spatial_shapes, level_start_index = level_start_index, 32 | # reference_points_cam = reference_points_cam,bev_mask = bev_mask ) 33 | 34 | batch_reference_points_cam = reference_points_cam.repeat(1,bs,1,1,1) 35 | batch_bev_mask = bev_mask.repeat(1,bs,1,1) 36 | 37 | ##add temporal here 38 | x = self.tsa(query = bev_query, key = prev_bev, value = prev_bev, query_pos = bev_pos, 39 | reference_points = ref_2d, spatial_shapes=torch.tensor([[self.bev_h, self.bev_w]], device=bev_query.device), 40 | level_start_index=torch.tensor([0], device=bev_query.device)) 41 | x = self.norm(x) 42 | x = self.sca(query = x, key = key, value = value, query_pos = bev_pos, 43 | spatial_shapes = spatial_shapes, level_start_index = level_start_index, 44 | reference_points_cam = batch_reference_points_cam,bev_mask = batch_bev_mask ) 45 | #print(x.shape) 46 | x = self.norm(x) 47 | x = self.ffn(x) 48 | x = self.norm(x) 49 | 50 | 51 | return x 52 | 53 | 54 | # x = self.sca(query = bev_query, key = key, value = value, residual = None, query_pos = bev_pos, 55 | # key_padding_mask = None, reference_points = None, spatial_shapes = spatial_shapes, level_start_index = level_start_index, 56 | # reference_points_cam = reference_points_cam,bev_mask =bev_mask ) 57 | 58 | 59 | -------------------------------------------------------------------------------- /FB-SSEM_dataset/README.md: -------------------------------------------------------------------------------- 1 | # FB-SSEM-dataset 2 | 3 | The FB-SSEM dataset is a synthetic dataset consisting of surround-view fisheye camera images and BEV maps from simulated sequences of ego car motion 4 | 5 | ## About 6 | We use the Unity game engine to simulate a parking lot environment for our dataset. The parking lot consists of parked cars/trucks, buses, electric vehicle (EV) charging stations of varying dimensions, and large containers of varying heights (on the boundaries). All the vehicles in the parking lot, except the ego car, are static. For the ego car, we use a forward-looking wide camera to simulate its four surround-view fisheye cameras. Our dataset consists of 20 sequences of ego car motion through the parking lot environment. Each sequence represents a different parking lot setup, i.e., different placement of all the vehicles in the lot and ground textures. Each sequence consists of 1000 samples; each sample consists of RGB images from the four car-mounted fisheye cameras (i.e., front, left, rear, and right cameras) and the BEV camera. Corresponding semantic segmentation maps for all five views and normalized height maps for the BEV are also generated. In addition, ego-motion information (3D rotation and translation) corresponding to every sample is obtained. We consider five semantic classes for the BEV segmentation map: car (ego car and parked cars/trucks), bus, EV charger, ground, and a non-driveable area. 7 | 8 | [F2BEV: Bird's Eye View Generation from Surround-View Fisheye Camera Images for Automated Driving](https://arxiv.org/abs/2303.03651) 9 | 10 | ## Dataset 11 | Links to download FB-SSEM dataset are below. 12000 files per image sequence as described [here](https://fb-ssem.s3.us-west-2.amazonaws.com/README.pdf) 12 | 13 | * [Link to download data](https://fb-ssem.s3.us-west-2.amazonaws.com/index.html) 14 | 15 | ## Camera calibration parameters 16 | * [Camera intrinsics](https://fb-ssem.s3.us-west-2.amazonaws.com/CameraCalibrationParameters/camera_intrinsics.yml) 17 | * [Camera positions for extrinsics](https://fb-ssem.s3.us-west-2.amazonaws.com/CameraCalibrationParameters/camera_positions_for_extrinsics.txt) 18 | ## Legal notice 19 | * Volvo Cars Technology USA LLC is the sole and exclusive owner of this dataset. 20 | * The datset is licensed under [CC BY-SA 4.0 21 | ](https://creativecommons.org/licenses/by-sa/4.0/legalcode.en) 22 | * Any public use, distribution, display of this data set must contain this notice in its entirety. 23 | 24 | ## Privacy 25 | Volvo Cars takes reasonable care to remove or hide personal data. 26 | 27 | ## Public Distribution 28 | When using the FB-SSEM dataset for public distribution, we would be glad if you cite our [paper](https://arxiv.org/abs/2303.03651). Please cite the following: 29 | 30 | ``` 31 | @article{samani2023f2bev, 32 | title={F2BEV: Bird's Eye View Generation from Surround-View Fisheye Camera Images for Automated Driving}, 33 | author={Samani, Ekta U and Tao, Feng and Dasari, Harshavardhan R and Ding, Sihao and Banerjee, Ashis G}, 34 | journal={arXiv preprint arXiv:2303.03651}, 35 | year={2023}} 36 | ``` 37 | 38 | ## Contact 39 | Harshavardhan R. Dasari 40 | mail : harshavardhan.reddy.dasari@volvocars.com 41 | Ekta Samani 42 | mail : eusamani@gmail.com 43 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/mask_head_decoder_htseg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Feb 23 12:19:10 2023 5 | 6 | @author: Ekta 7 | """ 8 | 9 | 10 | import torch 11 | import torch.nn as nn 12 | from bblocks.mask_head_pansegformer import MaskHead 13 | 14 | class MaskHeadDecoderSeg(nn.Module): 15 | def __init__(self): 16 | super().__init__() 17 | self.bev_h = 50 18 | self.bev_w = 50 19 | self.num_stuff_classes = 5 20 | self.embed_dims = 256 21 | self.stuff_query = nn.Embedding(self.num_stuff_classes, 22 | self.embed_dims * 2) 23 | self.stuff_mask_head = MaskHead(num_decoder_layers=3,self_attn=True) 24 | 25 | 26 | # self._reset_parameters() 27 | 28 | # def _reset_parameters(self): 29 | 30 | 31 | def forward(self,bev_embed): 32 | stuff_query, stuff_query_pos = torch.split(self.stuff_query.weight,self.embed_dims,dim=1) 33 | bs = bev_embed.shape[0] 34 | stuff_query_pos = stuff_query_pos.unsqueeze(0).expand(bs, -1, -1) 35 | stuff_query = stuff_query.unsqueeze(0).expand(bs, -1, -1) 36 | hw_lvl = torch.tensor([[self.bev_h, self.bev_w]], device=stuff_query.device) 37 | 38 | attn, masks, inter_query = self.stuff_mask_head(bev_embed,None,None,stuff_query, None, stuff_query_pos, 39 | hw_lvl) 40 | 41 | mask_stuff = attn.squeeze(-1) 42 | 43 | mask_stuff = mask_stuff.reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1]) 44 | 45 | inter_masks = [m.squeeze(-1).reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1]) for m in masks] 46 | 47 | return mask_stuff, inter_masks 48 | 49 | 50 | class MaskHeadDecoderHt(nn.Module): 51 | def __init__(self): 52 | super().__init__() 53 | self.bev_h = 50 54 | self.bev_w = 50 55 | self.num_stuff_classes = 3 56 | self.embed_dims = 256 57 | self.stuff_query = nn.Embedding(self.num_stuff_classes, 58 | self.embed_dims * 2) 59 | self.stuff_mask_head = MaskHead(num_decoder_layers=3,self_attn=True) 60 | 61 | 62 | # self._reset_parameters() 63 | 64 | # def _reset_parameters(self): 65 | 66 | 67 | def forward(self,bev_embed): 68 | stuff_query, stuff_query_pos = torch.split(self.stuff_query.weight,self.embed_dims,dim=1) 69 | bs = bev_embed.shape[0] 70 | stuff_query_pos = stuff_query_pos.unsqueeze(0).expand(bs, -1, -1) 71 | stuff_query = stuff_query.unsqueeze(0).expand(bs, -1, -1) 72 | hw_lvl = torch.tensor([[self.bev_h, self.bev_w]], device=stuff_query.device) 73 | 74 | attn, masks, inter_query = self.stuff_mask_head(bev_embed,None,None,stuff_query, None, stuff_query_pos, 75 | hw_lvl) 76 | 77 | mask_stuff = attn.squeeze(-1) 78 | 79 | mask_stuff = mask_stuff.reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1]) 80 | 81 | inter_masks = [m.squeeze(-1).reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1]) for m in masks] 82 | 83 | return mask_stuff, inter_masks 84 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/losses/focal.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from typing import Optional 3 | import torch 4 | from torch import Tensor, nn 5 | from torch.nn.modules.loss import _Loss 6 | 7 | from .functional import focal_loss_with_logits, softmax_focal_loss_with_logits 8 | 9 | 10 | #__all__ = ["CrossEntropyFocalLoss", "BinaryFocalLoss"] 11 | 12 | 13 | class BinaryFocalLoss(nn.Module): 14 | def __init__( 15 | self, 16 | alpha: Optional[float] = None, 17 | gamma: float = 2.0, 18 | ignore_index: Optional[int] = None, 19 | reduction: str = "mean", 20 | normalized: bool = False, 21 | reduced_threshold: Optional[float] = None, 22 | activation: str = "sigmoid", 23 | softmax_dim: Optional[int] = None, 24 | ): 25 | """ 26 | :param alpha: Prior probability of having positive value in target. 27 | :param gamma: Power factor for dampening weight (focal strength). 28 | :param ignore_index: If not None, targets may contain values to be ignored. 29 | Target values equal to ignore_index will be ignored from loss computation. 30 | :param reduced: Switch to reduced focal loss. Note, when using this mode you should use `reduction="sum"`. 31 | :param activation: Either `sigmoid` or `softmax`. If `softmax` is used, `softmax_dim` must be also specified. 32 | """ 33 | super().__init__() 34 | self.focal_loss_fn = partial( 35 | focal_loss_with_logits, 36 | alpha=alpha, 37 | gamma=gamma, 38 | reduced_threshold=reduced_threshold, 39 | reduction=reduction, 40 | normalized=normalized, 41 | ignore_index=ignore_index, 42 | activation=activation, 43 | softmax_dim=softmax_dim, 44 | ) 45 | 46 | def forward(self, inputs: Tensor, targets: Tensor) -> Tensor: 47 | """Compute focal loss for binary classification problem.""" 48 | loss = self.focal_loss_fn(inputs, targets) 49 | return loss 50 | 51 | 52 | class CrossEntropyFocalLoss(nn.Module): 53 | """ 54 | Focal loss for multi-class problem. It uses softmax to compute focal term instead of sigmoid as in 55 | original paper. This loss expects target labes to have one dimension less (like in nn.CrossEntropyLoss). 56 | """ 57 | 58 | def __init__( 59 | self, 60 | gamma: float = 2.0, 61 | reduction: str = "mean", 62 | normalized: bool = False, 63 | reduced_threshold: Optional[float] = None, 64 | ignore_index: int = -100, 65 | ): 66 | """ 67 | :param alpha: 68 | :param gamma: 69 | :param ignore_index: If not None, targets with given index are ignored 70 | :param reduced_threshold: A threshold factor for computing reduced focal loss 71 | """ 72 | super().__init__() 73 | self.gamma = gamma 74 | self.reduction = reduction 75 | self.reduced_threshold = reduced_threshold 76 | self.normalized = normalized 77 | self.ignore_index = ignore_index 78 | 79 | def forward(self, inputs: Tensor, targets: Tensor) -> Tensor: 80 | return softmax_focal_loss_with_logits( 81 | inputs, 82 | targets, 83 | gamma=self.gamma, 84 | reduction=self.reduction, 85 | normalized=self.normalized, 86 | reduced_threshold=self.reduced_threshold, 87 | ignore_index=self.ignore_index, 88 | ) 89 | 90 | 91 | -------------------------------------------------------------------------------- /F2BEV_code/README.md: -------------------------------------------------------------------------------- 1 | # F2BEV: Bird's Eye View Generation from Surround-View Fisheye Camera Images for Automated Driving 2 | 3 | 4 | ## Requirements 5 | 6 | Package requirements are included in the ```f2bev_conda_env.yml``` file. A conda virtual environment can be created using this file as follows: 7 | 8 | ```bash 9 | conda env create --file f2bev_conda_env.yml 10 | ``` 11 | 12 | 13 | ## Data 14 | 15 | Download the FB-SSEM dataset from [here](https://github.com/volvo-cars/FB-SSEM-dataset). In particular, download ```.zip``` files corresponding to all twenty sequences, unzip them, and place them in a single folder named ```data```. This folder muster be placed inside the ```F2BEV``` folder generated from cloning this repository. 16 | 17 | ## Compute Reference Points 18 | 19 | A part of the reference point computation for the distortion-aware spatial cross attention in the network can be done offline to save on training time. 20 | 21 | Run the following commands from within the ```F2BEV``` folder. 22 | 23 | ```bash 24 | cd pre_computation 25 | python3 computeNormalizedReferencePoints.py 26 | cd ../ 27 | ``` 28 | The outputs of this code are already placed in the ```unity_data``` fold inside the ```pre_computation``` folder for convenience. 29 | 30 | ## Training and Testing F2BEV 31 | 32 | Use ```train_.py``` to train an F2BEV network and ```test_.py``` to test a trained F2BEV network. 33 | 34 | For e.g., to train an F2BEV network to generate (only) a discretized BEV height map using an attention-based task-specific head, where the height of every pixel is classified into one of three classes (below car bumper, above car height, or car height) run the following. The traininglog will be saved in ```traininglog_f2bev_attn_st_height.out```. 35 | 36 | ```bash 37 | nohup python3 -u train_f2bev_attn_st_height.py > traininglog_f2bev_attn_st_height.out & 38 | ``` 39 | 40 | To test a trained F2BEv network, run ```test_.py```. 41 | For e.g., to test the above trained model, run the following 42 | 43 | ```bash 44 | python3 test_f2bev_attn_st_height.py 45 | ``` 46 | 47 | Training and test scripts for the all model types discussed in the [F2BEV paper](https://arxiv.org/abs/2303.03651) are included in this repository. They are as follows 48 | 49 | 50 | | | Description | 51 | | ------ | ------ | 52 | | f2bev_attn_st_height | To generate discretized BEV height maps (alone) using an attention-based task-specific head, where the height of every pixel in classified into one of three classes | 53 | | f2bev_attn_st_seg | To generate BEV semantic segmentation maps (alone) using an attention-based task-specific head | 54 | | f2bev_attn_mt | To generate discretized BEV height maps and BEV semantic segmentation maps simultaneously using attention-based task-specific heads | 55 | | f2bev_conv_st_height | To generate discretized BEV height maps (alone) using a convolution-based task-specific head, where the height of every pixel in classified into one of three classes| 56 | | f2bev_conv_st_seg | To generate BEV semantic segmentation maps (alone) using a convolution-based task-specific head| 57 | | f2bev_conv_mt | To generate discretized BEV height maps and BEV semantic segmentation maps simultaneously using convolution-based task-specific heads | 58 | 59 | ## Citation 60 | If you find our code beneficial, please cite the [F2BEV paper](https://arxiv.org/abs/2303.03651) 61 | 62 | ```bash 63 | @article{samani2023f2bev, 64 | title={F2BEV: Bird's Eye View Generation from Surround-View Fisheye Camera Images for Automated Driving}, 65 | author={Samani, Ekta U and Tao, Feng and Dasari, Harshavardhan R and Ding, Sihao and Banerjee, Ashis G}, 66 | journal={arXiv preprint arXiv:2303.03651}, 67 | year={2023}} 68 | ``` 69 | 70 | 71 | ## Contact 72 | Harshavardhan R. Dasari 73 | mail : harshavardhan.reddy.dasari@volvocars.com 74 | Ekta Samani 75 | mail : eusamani@gmail.com 76 | 77 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/positional_encoding.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Aug 30 15:32:38 2022 5 | 6 | @author: Ekta 7 | """ 8 | 9 | 10 | import torch 11 | import torch.nn as nn 12 | 13 | 14 | ### from: https://mmdetection.readthedocs.io/en/latest/_modules/mmdet/models/utils/positional_encoding.html 15 | 16 | class LearnedPositionalEncoding(nn.Module): 17 | """Position embedding with learnable embedding weights. 18 | 19 | Args: 20 | num_feats (int): The feature dimension for each position 21 | along x-axis or y-axis. The final returned dimension for 22 | each position is 2 times of this value. 23 | row_num_embed (int, optional): The dictionary size of row embeddings. 24 | Default 50. 25 | col_num_embed (int, optional): The dictionary size of col embeddings. 26 | Default 50. 27 | init_cfg (dict or list[dict], optional): Initialization config dict. 28 | """ 29 | 30 | def __init__(self, 31 | num_feats, 32 | row_num_embed=50, 33 | col_num_embed=50): 34 | super(LearnedPositionalEncoding, self).__init__() 35 | self.row_embed = nn.Embedding(row_num_embed, num_feats) 36 | self.col_embed = nn.Embedding(col_num_embed, num_feats) 37 | self.num_feats = num_feats 38 | self.row_num_embed = row_num_embed 39 | self.col_num_embed = col_num_embed 40 | 41 | def forward(self, mask): 42 | """Forward function for `LearnedPositionalEncoding`. 43 | 44 | Args: 45 | mask (Tensor): ByteTensor mask. Non-zero values representing 46 | ignored positions, while zero values means valid positions 47 | for this image. Shape [bs, h, w]. 48 | 49 | Returns: 50 | pos (Tensor): Returned position embedding with shape 51 | [bs, num_feats*2, h, w]. 52 | """ 53 | h, w = mask.shape[-2:] 54 | x = torch.arange(w, device=mask.device) 55 | y = torch.arange(h, device=mask.device) 56 | x_embed = self.col_embed(x) 57 | y_embed = self.row_embed(y) 58 | pos = torch.cat((x_embed.unsqueeze(0).repeat(h, 1, 1), y_embed.unsqueeze(1).repeat(1, w, 1)), 59 | dim=-1).permute(2, 0, 1).unsqueeze(0).repeat(mask.shape[0], 1, 1, 1) 60 | return pos 61 | 62 | 63 | # import torch 64 | # import torch.nn as nn 65 | # from typing import Optional 66 | # from torch import Tensor 67 | 68 | # ### https://github.com/fundamentalvision/Deformable-DETR/blob/11169a60c33333af00a4849f1808023eba96a931/models/position_encoding.py 69 | 70 | # class NestedTensor(object): 71 | # def __init__(self, tensors, mask: Optional[Tensor]): 72 | # self.tensors = tensors 73 | # self.mask = mask 74 | 75 | # def to(self, device, non_blocking=False): 76 | # # type: (Device) -> NestedTensor # noqa 77 | # cast_tensor = self.tensors.to(device, non_blocking=non_blocking) 78 | # mask = self.mask 79 | # if mask is not None: 80 | # assert mask is not None 81 | # cast_mask = mask.to(device, non_blocking=non_blocking) 82 | # else: 83 | # cast_mask = None 84 | # return NestedTensor(cast_tensor, cast_mask) 85 | 86 | # class PositionEmbeddingLearned(nn.Module): 87 | # """ 88 | # Absolute pos embedding, learned. 89 | # """ 90 | # def __init__(self, num_pos_feats=256): 91 | # super().__init__() 92 | # self.row_embed = nn.Embedding(50, num_pos_feats) 93 | # self.col_embed = nn.Embedding(50, num_pos_feats) 94 | # self.reset_parameters() 95 | 96 | # def reset_parameters(self): 97 | # nn.init.uniform_(self.row_embed.weight) 98 | # nn.init.uniform_(self.col_embed.weight) 99 | 100 | # def forward(self, tensor_list: NestedTensor): 101 | # x = tensor_list.tensors 102 | # h, w = x.shape[-2:] 103 | # i = torch.arange(w, device=x.device) 104 | # j = torch.arange(h, device=x.device) 105 | # x_emb = self.col_embed(i) 106 | # y_emb = self.row_embed(j) 107 | # pos = torch.cat([ 108 | # x_emb.unsqueeze(0).repeat(h, 1, 1), 109 | # y_emb.unsqueeze(1).repeat(1, w, 1), 110 | # ], dim=-1).permute(2, 0, 1).unsqueeze(0).repeat(x.shape[0], 1, 1, 1) 111 | # return pos -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/deformable_attention_function.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Aug 25 14:12:46 2022 5 | 6 | @author: Ekta 7 | """ 8 | from __future__ import absolute_import 9 | from __future__ import print_function 10 | from __future__ import division 11 | 12 | import torch 13 | import torch.nn.functional as F 14 | from torch.autograd import Function 15 | from torch.autograd.function import once_differentiable 16 | from torch.cuda.amp import custom_bwd, custom_fwd 17 | 18 | #import MultiScaleDeformableAttention as MSDA ##TODO: Installation for this comes from DETR repo -- need to build sth to get this 19 | 20 | # class MSDeformAttnFunction(Function): 21 | # @staticmethod 22 | # @custom_fwd(cast_inputs=torch.float16) 23 | # def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step): 24 | # """GPU version of multi-scale deformable attention. 25 | # Args: 26 | # value (Tensor): The value has shape 27 | # (bs, num_keys, mum_heads, embed_dims//num_heads) 28 | # value_spatial_shapes (Tensor): Spatial shape of 29 | # each feature map, has shape (num_levels, 2), 30 | # last dimension 2 represent (h, w) 31 | # sampling_locations (Tensor): The location of sampling points, 32 | # has shape 33 | # (bs ,num_queries, num_heads, num_levels, num_points, 2), 34 | # the last dimension 2 represent (x, y). 35 | # attention_weights (Tensor): The weight of sampling points used 36 | # when calculate the attention, has shape 37 | # (bs ,num_queries, num_heads, num_levels, num_points), 38 | # im2col_step (Tensor): The step used in image to column. 39 | # Returns: 40 | # Tensor: has shape (N, Len_q, d_model) 41 | # """ 42 | 43 | # ctx.im2col_step = im2col_step 44 | # #print(type(value),type(value_spatial_shapes),type(value_level_start_index),type(sampling_locations),type(attention_weights),type(ctx.im2col_step)) 45 | 46 | # output = MSDA.ms_deform_attn_forward( 47 | # value = value, value_spatial_shapes=value_spatial_shapes, value_level_start_index=value_level_start_index, sampling_locations=sampling_locations, attention_weights=attention_weights, im2col_step = ctx.im2col_step) 48 | # ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights) 49 | # return output 50 | 51 | # @staticmethod 52 | # @once_differentiable 53 | # @custom_bwd 54 | # def backward(ctx, grad_output): 55 | # """GPU version of backward function. 56 | # Args: 57 | # grad_output (Tensor): Gradient 58 | # of output tensor of forward. 59 | # Returns: 60 | # Tuple[Tensor]: Gradient 61 | # of input tensors in forward. 62 | # """ 63 | # value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors 64 | # grad_value = torch.zeros_like(value) 65 | # grad_sampling_loc = torch.zeros_like(sampling_locations) 66 | # grad_attn_weight = torch.zeros_like(attention_weights) 67 | 68 | # MSDA.ms_deform_attn_backward(value,value_spatial_shapes,value_level_start_index,sampling_locations,attention_weights,grad_output.contiguous(),grad_value,grad_sampling_loc,grad_attn_weight,im2col_step=ctx.im2col_step) 69 | 70 | # return grad_value, None, None, \ 71 | # grad_sampling_loc, grad_attn_weight, None 72 | 73 | 74 | def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights): 75 | # for debug and test only, 76 | # need to use cuda version instead 77 | N_, S_, M_, D_ = value.shape 78 | #print(value.shape) 79 | _, Lq_, M_, L_, P_, _ = sampling_locations.shape 80 | value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1) 81 | sampling_grids = 2 * sampling_locations - 1 82 | sampling_value_list = [] 83 | for lid_, (H_, W_) in enumerate(value_spatial_shapes): 84 | # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_ 85 | value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_) 86 | # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2 87 | sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1) 88 | # N_*M_, D_, Lq_, P_ 89 | sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_, 90 | mode='bilinear', padding_mode='zeros', align_corners=False) 91 | sampling_value_list.append(sampling_value_l_) 92 | # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_) 93 | attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_) 94 | output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_) 95 | return output.transpose(1, 2).contiguous() 96 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/test_f2bev_conv_st_height.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Dec 22 11:47:19 2022 5 | 6 | @author: Ekta 7 | """ 8 | import torch,time,ntpath #cv2 9 | from torch import nn, optim 10 | import numpy as np 11 | from test_loader_single_task import UnityImageDataset 12 | import os,fnmatch 13 | from torch.utils.data import DataLoader 14 | from model_f2bev_conv_st_height import FisheyeBEVFormer 15 | import torchvision.transforms as T 16 | from torchmetrics.functional import jaccard_index 17 | 18 | 19 | def numpy_sigmoid(x): 20 | return 1/(1 + np.exp(-x)) 21 | 22 | if not os.path.exists('./predictions/'): 23 | os.makedirs('./predictions/') 24 | 25 | if not os.path.exists('./predictions/f2bev_conv_st_height/'): 26 | os.makedirs('./predictions/f2bev_conv_st_height/') 27 | if not os.path.exists('./predictions/f2bev_conv_st_height/bevfeatures'): 28 | os.makedirs('./predictions/f2bev_conv_st_height/features') 29 | if not os.path.exists('./predictions/f2bev_conv_st_height/predfull/'): 30 | os.makedirs('./predictions/f2bev_conv_st_height/predfull/') 31 | if not os.path.exists('./predictions/f2bev_conv_st_height/predfull/ce/'): 32 | os.makedirs('./predictions/f2bev_conv_st_height/predfull/ce/') 33 | 34 | num_data_sequences = 20 35 | 36 | 37 | bev_dirs = ['./data/images'+str(i)+'/test/depth' for i in range(num_data_sequences)] 38 | front_dirs = ['./data/images'+str(i)+'/test/rgb/front' for i in range(num_data_sequences)] 39 | left_dirs = ['./data/images'+str(i)+'/test/rgb/left' for i in range(num_data_sequences)] 40 | rear_dirs = ['./data/images'+str(i)+'/test/rgb/rear' for i in range(num_data_sequences)] 41 | right_dirs = ['./data/images'+str(i)+'/test/rgb/right' for i in range(num_data_sequences)] 42 | config_dirs = ['./data/images'+str(i)+'/test/cameraconfig' for i in range(num_data_sequences)] 43 | 44 | 45 | seq_len = 1 46 | 47 | image_lists = [] 48 | 49 | datalengths = [] 50 | 51 | for bev_dir in bev_dirs: 52 | names = fnmatch.filter(os.listdir(bev_dir), '*.png') 53 | 54 | files = [] 55 | for name in names: 56 | files.append(os.path.splitext(ntpath.basename(name))[0]) 57 | 58 | filelist = sorted(files,key=int) 59 | 60 | image_lists.append([f + '.png' for f in filelist]) 61 | datalengths.append(len(names)) 62 | 63 | 64 | 65 | transforms = torch.nn.Sequential(T.Resize((540,640)),) 66 | target_transforms = torch.nn.Sequential(T.Grayscale(num_output_channels=1)) 67 | 68 | test_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, 69 | transform = transforms, target_transform= target_transforms) 70 | 71 | 72 | test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False) 73 | 74 | 75 | device = "cuda" if torch.cuda.is_available() else "cpu" 76 | #device = "cpu" 77 | print(f"Using {device} device") 78 | 79 | model = FisheyeBEVFormer().to(device) 80 | 81 | checkpoint = torch.load('./f2bev_conv_st_height.pt') 82 | model.load_state_dict(checkpoint['model_state_dict']) 83 | 84 | 85 | 86 | def test_temporal(test_dataloader,seq_len,model,loss_fn,image_list): 87 | num_batches = len(test_dataloader) 88 | model.eval() 89 | test_loss = 0 90 | test_iou = 0 91 | with torch.no_grad(): 92 | for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader): 93 | inp_img_seq, can_buses_seq = dataseq 94 | bs = targetseq[0].shape[0] 95 | for ctr in range(seq_len): 96 | front = inp_img_seq[ctr][0] 97 | left = inp_img_seq[ctr][1] 98 | rear = inp_img_seq[ctr][2] 99 | right = inp_img_seq[ctr][3] 100 | 101 | 102 | 103 | target = targetseq[ctr] 104 | front = front.to(device) 105 | left = left.to(device) 106 | rear = rear.to(device) 107 | right = right.to(device) 108 | 109 | target = torch.squeeze(target,dim=1) 110 | idx2 = torch.where(target <= 0.35) 111 | idx0 = torch.where(target >= 0.69) 112 | target[target >= 0] = 1 113 | target[idx2] = 2 114 | target[idx0] = 0 115 | 116 | target = target.to(torch.int64).to(device) 117 | can_buses = can_buses_seq[ctr] 118 | 119 | 120 | if batch_idx == 0: 121 | prev_bev = None 122 | 123 | else: 124 | prev_bev = torch.Tensor([np.load('./predictions/f2bev_conv_st_height/bevfeatures/'+image_list[batch_idx-1].split('.')[0]+'.npy')]).to(device) 125 | pred, for_prev_bev = model(front,left,rear,right, list(can_buses),prev_bev) 126 | 127 | for i,p in enumerate(for_prev_bev): 128 | np.save('./predictions/f2bev_conv_st_height/bevfeatures/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy()) 129 | for i,p in enumerate(pred): 130 | np.save('./predictions/f2bev_conv_st_height/predfull/focal/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy()) 131 | test_loss += loss_fn(pred, target).item() 132 | test_iou += jaccard_index(pred,target.to(pred.device),num_classes=3,average='none') 133 | 134 | test_loss/= num_batches*seq_len 135 | test_iou /= num_batches 136 | print(f"Test Error: Avg loss: {test_loss:>8f} \n") 137 | print(test_iou) 138 | return test_loss 139 | 140 | all_images = [] 141 | for test_list in image_lists: 142 | all_images = all_images + test_list 143 | 144 | loss = nn.CrossEntropyLoss() #nn.MSELoss() #BinaryFocalLoss(alpha=0.25,gamma=2) 145 | test_temporal(test_dataloader,seq_len,model,loss,all_images) 146 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/test_f2bev_attn_st_height.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Dec 22 11:47:19 2022 5 | 6 | @author: Ekta 7 | """ 8 | import os 9 | os.environ["CUDA_VISIBLE_DEVICES"]="1" 10 | import torch,time,ntpath #cv2 11 | from torch import nn, optim 12 | import numpy as np 13 | from test_loader_single_task import UnityImageDataset 14 | import fnmatch 15 | from torch.utils.data import DataLoader 16 | from model_f2bev_attn_st_height import FisheyeBEVFormer 17 | import torchvision.transforms as T 18 | from torchmetrics.functional import jaccard_index 19 | 20 | 21 | def numpy_sigmoid(x): 22 | return 1/(1 + np.exp(-x)) 23 | 24 | if not os.path.exists('./predictions/'): 25 | os.makedirs('./predictions/') 26 | if not os.path.exists('./predictions/f2bev_attn_st_height/'): 27 | os.makedirs('./predictions/f2bev_attn_st_height/') 28 | if not os.path.exists('./predictions/f2bev_attn_st_height/bevfeatures'): 29 | os.makedirs('./predictions/f2bev_attn_st_height/features') 30 | if not os.path.exists('./predictions/f2bev_attn_st_height/predfull/'): 31 | os.makedirs('./predictions/f2bev_attn_st_height/predfull/') 32 | if not os.path.exists('./predictions/f2bev_attn_st_height/predfull/ce/'): 33 | os.makedirs('./predictions/f2bev_attn_st_height/predfull/ce/') 34 | 35 | 36 | num_data_sequences = 20 37 | 38 | bev_dirs = ['./data/images'+str(i)+'/test/depth' for i in range(num_data_sequences)] 39 | front_dirs = ['./data/images'+str(i)+'/test/rgb/front' for i in range(num_data_sequences)] 40 | left_dirs = ['./data/images'+str(i)+'/test/rgb/left' for i in range(num_data_sequences)] 41 | rear_dirs = ['./data/images'+str(i)+'/test/rgb/rear' for i in range(num_data_sequences)] 42 | right_dirs = ['./data/images'+str(i)+'/test/rgb/right' for i in range(num_data_sequences)] 43 | config_dirs = ['./data/images'+str(i)+'/test/cameraconfig' for i in range(num_data_sequences)] 44 | 45 | 46 | seq_len = 1 47 | 48 | image_lists = [] 49 | 50 | datalengths = [] 51 | 52 | for bev_dir in bev_dirs: 53 | names = fnmatch.filter(os.listdir(bev_dir), '*.png') 54 | 55 | files = [] 56 | for name in names: 57 | files.append(os.path.splitext(ntpath.basename(name))[0]) 58 | 59 | filelist = sorted(files,key=int) 60 | 61 | image_lists.append([f + '.png' for f in filelist]) 62 | datalengths.append(len(names)) 63 | 64 | 65 | 66 | transforms = torch.nn.Sequential(T.Resize((540,640)),) 67 | target_transforms = torch.nn.Sequential(T.Resize((50,50)),T.Grayscale(num_output_channels=1)) 68 | 69 | test_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, 70 | transform = transforms, target_transform= target_transforms) 71 | 72 | 73 | test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False) 74 | 75 | 76 | device = "cuda" if torch.cuda.is_available() else "cpu" 77 | #device = "cpu" 78 | print(f"Using {device} device") 79 | 80 | model = FisheyeBEVFormer().to(device) 81 | 82 | checkpoint = torch.load('./f2bev_attn_st_height.pt') 83 | model.load_state_dict(checkpoint['model_state_dict']) 84 | 85 | 86 | 87 | def test_temporal(test_dataloader,seq_len,model,loss_fn,image_list): 88 | num_batches = len(test_dataloader) 89 | model.eval() 90 | test_loss = 0 91 | test_iou = 0 92 | with torch.no_grad(): 93 | for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader): 94 | inp_img_seq, can_buses_seq = dataseq 95 | bs = targetseq[0].shape[0] 96 | for ctr in range(seq_len): 97 | front = inp_img_seq[ctr][0] 98 | left = inp_img_seq[ctr][1] 99 | rear = inp_img_seq[ctr][2] 100 | right = inp_img_seq[ctr][3] 101 | 102 | 103 | 104 | target = targetseq[ctr] 105 | front = front.to(device) 106 | left = left.to(device) 107 | rear = rear.to(device) 108 | right = right.to(device) 109 | 110 | target = torch.squeeze(target,dim=1) 111 | idx2 = torch.where(target <= 0.35) 112 | idx0 = torch.where(target >= 0.69) 113 | target[target >= 0] = 1 114 | target[idx2] = 2 115 | target[idx0] = 0 116 | 117 | target = target.to(torch.int64).to(device) 118 | can_buses = can_buses_seq[ctr] 119 | 120 | 121 | if batch_idx == 0: 122 | prev_bev = None 123 | 124 | else: 125 | prev_bev = torch.Tensor([np.load('./predictions/f2bev_attn_st_height/bevfeatures/'+image_list[batch_idx-1].split('.')[0]+'.npy')]).to(device) 126 | pred, _, for_prev_bev = model(front,left,rear,right, list(can_buses),prev_bev) 127 | 128 | for i,p in enumerate(for_prev_bev): 129 | np.save('./predictions/f2bev_attn_st_height/bevfeatures/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy()) 130 | for i,p in enumerate(pred): 131 | np.save('./predictions/f2bev_attn_st_height/predfull/ce/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy()) 132 | test_loss += loss_fn(pred, target).item() 133 | test_iou += jaccard_index(pred,target.to(pred.device),num_classes=3,average='none') 134 | 135 | test_loss/= num_batches*seq_len 136 | test_iou /= num_batches 137 | print(f"Test Error: Avg loss: {test_loss:>8f} \n") 138 | print(test_iou) 139 | return test_loss 140 | 141 | all_images = [] 142 | for test_list in image_lists: 143 | all_images = all_images + test_list 144 | 145 | loss = nn.CrossEntropyLoss() #nn.MSELoss() #BinaryFocalLoss(alpha=0.25,gamma=2) 146 | test_temporal(test_dataloader,seq_len,model,loss,all_images) 147 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/test_f2bev_conv_st_seg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Dec 22 11:47:19 2022 5 | 6 | @author: Ekta 7 | """ 8 | import torch,time,ntpath #cv2 9 | from torch import nn, optim 10 | import numpy as np 11 | from test_loader_single_task import UnityImageDataset 12 | import os,fnmatch 13 | from torch.utils.data import DataLoader 14 | from model_f2bev_conv_st_seg import FisheyeBEVFormer 15 | import torchvision.transforms as T 16 | from torchmetrics.functional import jaccard_index 17 | 18 | 19 | def numpy_sigmoid(x): 20 | return 1/(1 + np.exp(-x)) 21 | 22 | if not os.path.exists('./predictions/'): 23 | os.makedirs('./predictions/') 24 | if not os.path.exists('./predictions/f2bev_conv_st_seg/'): 25 | os.makedirs('./predictions/f2bev_conv_st_seg/') 26 | if not os.path.exists('./predictions/f2bev_conv_st_seg/bevfeatures'): 27 | os.makedirs('./predictions/f2bev_conv_st_seg/features') 28 | if not os.path.exists('./predictions/f2bev_conv_st_seg/predfull/'): 29 | os.makedirs('./predictions/f2bev_conv_st_seg/predfull/') 30 | if not os.path.exists('./predictions/f2bev_conv_st_seg/predfull/ce/'): 31 | os.makedirs('./predictions/f2bev_conv_st_seg/predfull/ce/') 32 | 33 | num_data_sequences = 20 34 | bev_dirs = ['./data/images'+str(i)+'/test/seg/bev' for i in range(num_data_sequences)] 35 | front_dirs = ['./data/images'+str(i)+'/test/rgb/front' for i in range(num_data_sequences)] 36 | left_dirs = ['./data/images'+str(i)+'/test/rgb/left' for i in range(num_data_sequences)] 37 | rear_dirs = ['./data/images'+str(i)+'/test/rgb/rear' for i in range(num_data_sequences)] 38 | right_dirs = ['./data/images'+str(i)+'/test/rgb/right' for i in range(num_data_sequences)] 39 | config_dirs = ['./data/images'+str(i)+'/test/cameraconfig' for i in range(num_data_sequences)] 40 | 41 | 42 | 43 | seq_len = 1 44 | 45 | image_lists = [] 46 | 47 | datalengths = [] 48 | 49 | for bev_dir in bev_dirs: 50 | names = fnmatch.filter(os.listdir(bev_dir), '*.png') 51 | 52 | files = [] 53 | for name in names: 54 | files.append(os.path.splitext(ntpath.basename(name))[0]) 55 | 56 | filelist = sorted(files,key=int) 57 | 58 | image_lists.append([f + '.png' for f in filelist]) 59 | datalengths.append(len(names)) 60 | 61 | 62 | 63 | transforms = torch.nn.Sequential(T.Resize((540,640)),) 64 | target_transforms = torch.nn.Sequential(T.Grayscale(num_output_channels=1)) 65 | 66 | test_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, 67 | transform = transforms, target_transform= target_transforms) 68 | 69 | 70 | test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False) 71 | 72 | 73 | device = "cuda" if torch.cuda.is_available() else "cpu" 74 | #device = "cpu" 75 | print(f"Using {device} device") 76 | 77 | model = FisheyeBEVFormer().to(device) 78 | 79 | checkpoint = torch.load('./f2bev_conv_st_seg.pt') 80 | 81 | model.load_state_dict(checkpoint['model_state_dict']) 82 | 83 | 84 | 85 | def test_temporal(test_dataloader,seq_len,model,loss_fn,image_list): 86 | num_batches = len(test_dataloader) 87 | model.eval() 88 | test_loss = 0 89 | test_iou = 0 90 | with torch.no_grad(): 91 | for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader): 92 | inp_img_seq, can_buses_seq = dataseq 93 | bs = targetseq[0].shape[0] 94 | for ctr in range(seq_len): 95 | front = inp_img_seq[ctr][0] 96 | left = inp_img_seq[ctr][1] 97 | rear = inp_img_seq[ctr][2] 98 | right = inp_img_seq[ctr][3] 99 | 100 | 101 | 102 | target = targetseq[ctr] 103 | front = front.to(device) 104 | left = left.to(device) 105 | rear = rear.to(device) 106 | right = right.to(device) 107 | 108 | target = torch.squeeze(target,dim=1) 109 | idx0 = torch.where(target <= 0.02) 110 | target[idx0] = 10 111 | idx1 = torch.where(target <= 0.07) 112 | target[idx1] = 11 113 | idx2 = torch.where(target <= 0.22) 114 | target[idx2] = 12 115 | idx3 = torch.where(target <= 0.60) 116 | target[idx3] = 13 117 | idx4 = torch.where(target <= 1) 118 | target[idx4] = 14 119 | target = target - 10 120 | target = target.to(torch.int64).to(device) 121 | 122 | can_buses = can_buses_seq[ctr] 123 | 124 | 125 | if batch_idx == 0: 126 | prev_bev = None 127 | 128 | else: 129 | prev_bev = torch.Tensor([np.load('./predictions/f2bev_conv_st_seg/bevfeatures/'+image_list[batch_idx-1].split('.')[0]+'.npy')]).to(device) 130 | pred, for_prev_bev = model(front,left,rear,right, list(can_buses),prev_bev) 131 | 132 | 133 | for i,p in enumerate(for_prev_bev): 134 | np.save('./predictions/f2bev_conv_st_seg/bevfeatures/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy()) 135 | for i,p in enumerate(pred): 136 | np.save('./predictions/f2bev_conv_st_seg/predfull/focal/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy()) 137 | test_loss += loss_fn(pred, target).item() 138 | test_iou += jaccard_index(pred,target.to(pred.device),num_classes=5,average='none') 139 | 140 | test_loss/= num_batches*seq_len 141 | test_iou /= num_batches 142 | print(f"Test Error: Avg loss: {test_loss:>8f} \n") 143 | print(test_iou) 144 | return test_loss 145 | 146 | all_images = [] 147 | for test_list in image_lists: 148 | all_images = all_images + test_list 149 | 150 | loss = nn.CrossEntropyLoss() #nn.MSELoss() #BinaryFocalLoss(alpha=0.25,gamma=2) 151 | test_temporal(test_dataloader,seq_len,model,loss,all_images) 152 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/test_f2bev_attn_st_seg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Dec 22 11:47:19 2022 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import os 10 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 11 | import torch,time,ntpath #cv2 12 | from torch import nn, optim 13 | import numpy as np 14 | from test_loader_single_task import UnityImageDataset 15 | import fnmatch 16 | from torch.utils.data import DataLoader 17 | from model_f2bev_attn_st_seg import FisheyeBEVFormer 18 | 19 | import torchvision.transforms as T 20 | from torchmetrics.functional import jaccard_index 21 | 22 | 23 | 24 | def numpy_sigmoid(x): 25 | return 1/(1 + np.exp(-x)) 26 | 27 | if not os.path.exists('./predictions/'): 28 | os.makedirs('./predictions/') 29 | 30 | if not os.path.exists('./predictions/f2bev_attn_st_seg/'): 31 | os.makedirs('./predictions/f2bev_attn_st_seg/') 32 | if not os.path.exists('./predictions/f2bev_attn_st_seg/bevfeatures'): 33 | os.makedirs('./predictions/f2bev_attn_st_seg/features') 34 | if not os.path.exists('./predictions/f2bev_attn_st_seg/predfull/'): 35 | os.makedirs('./predictions/f2bev_attn_st_seg/predfull/') 36 | if not os.path.exists('./predictions/f2bev_attn_st_seg/predfull/ce/'): 37 | os.makedirs('./predictions/f2bev_attn_st_seg/predfull/ce/') 38 | 39 | num_data_sequences = 20 40 | 41 | bev_dirs = ['./data/images'+str(i)+'/test/seg/bev' for i in range(num_data_sequences)] 42 | front_dirs = ['./data/images'+str(i)+'/test/rgb/front' for i in range(num_data_sequences)] 43 | left_dirs = ['./data/images'+str(i)+'/test/rgb/left' for i in range(num_data_sequences)] 44 | rear_dirs = ['./data/images'+str(i)+'/test/rgb/rear' for i in range(num_data_sequences)] 45 | right_dirs = ['./data/images'+str(i)+'/test/rgb/right' for i in range(num_data_sequences)] 46 | config_dirs = ['./data/images'+str(i)+'/test/cameraconfig' for i in range(num_data_sequences)] 47 | 48 | 49 | seq_len = 1 50 | 51 | image_lists = [] 52 | 53 | datalengths = [] 54 | 55 | for bev_dir in bev_dirs: 56 | names = fnmatch.filter(os.listdir(bev_dir), '*.png') 57 | 58 | files = [] 59 | for name in names: 60 | files.append(os.path.splitext(ntpath.basename(name))[0]) 61 | 62 | filelist = sorted(files,key=int) 63 | 64 | image_lists.append([f + '.png' for f in filelist]) 65 | datalengths.append(len(names)) 66 | 67 | 68 | 69 | transforms = torch.nn.Sequential(T.Resize((540,640)),) 70 | target_transforms = torch.nn.Sequential(T.Resize((50,50)),T.Grayscale(num_output_channels=1)) 71 | 72 | test_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, 73 | transform = transforms, target_transform= target_transforms) 74 | 75 | 76 | test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False) 77 | 78 | 79 | device = "cuda" if torch.cuda.is_available() else "cpu" 80 | #device = "cpu" 81 | print(f"Using {device} device") 82 | 83 | model = FisheyeBEVFormer().to(device) 84 | 85 | checkpoint = torch.load('./f2bev_attn_st_seg.pt') 86 | model.load_state_dict(checkpoint['model_state_dict']) 87 | 88 | 89 | 90 | def test_temporal(test_dataloader,seq_len,model,loss_fn,image_list): 91 | num_batches = len(test_dataloader) 92 | model.eval() 93 | test_loss = 0 94 | test_iou = 0 95 | with torch.no_grad(): 96 | for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader): 97 | inp_img_seq, can_buses_seq = dataseq 98 | bs = targetseq[0].shape[0] 99 | for ctr in range(seq_len): 100 | front = inp_img_seq[ctr][0] 101 | left = inp_img_seq[ctr][1] 102 | rear = inp_img_seq[ctr][2] 103 | right = inp_img_seq[ctr][3] 104 | 105 | 106 | 107 | target = targetseq[ctr] 108 | front = front.to(device) 109 | left = left.to(device) 110 | rear = rear.to(device) 111 | right = right.to(device) 112 | 113 | target = torch.squeeze(target,dim=1) 114 | #print(torch.unique(starget)) 115 | idx0 = torch.where(target <= 0.02) 116 | target[idx0] = 10 117 | idx1 = torch.where(target <= 0.07) 118 | target[idx1] = 11 119 | idx2 = torch.where(target <= 0.22) 120 | target[idx2] = 12 121 | idx3 = torch.where(target <= 0.60) 122 | target[idx3] = 13 123 | idx4 = torch.where(target <= 1) 124 | target[idx4] = 14 125 | target = target - 10 126 | target = target.to(torch.int64).to(device) 127 | 128 | can_buses = can_buses_seq[ctr] 129 | 130 | 131 | if batch_idx == 0: 132 | prev_bev = None 133 | 134 | else: 135 | prev_bev = torch.Tensor([np.load('./predictions/f2bev_attn_st_seg/bevfeatures/'+image_list[batch_idx-1].split('.')[0]+'.npy')]).to(device) 136 | pred, _, for_prev_bev = model(front,left,rear,right, list(can_buses),prev_bev) 137 | 138 | 139 | for i,p in enumerate(for_prev_bev): 140 | np.save('./predictions/f2bev_attn_st_seg/bevfeatures/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy()) 141 | for i,p in enumerate(pred): 142 | np.save('./predictions/f2bev_attn_st_seg/predfull/ce/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy()) 143 | test_loss += loss_fn(pred, target).item() 144 | test_iou += jaccard_index(pred,target.to(pred.device),num_classes=5,average='none') 145 | 146 | test_loss/= num_batches*seq_len 147 | test_iou /= num_batches 148 | print(f"Test Error: Avg loss: {test_loss:>8f} \n") 149 | print(test_iou) 150 | return test_loss 151 | 152 | all_images = [] 153 | for test_list in image_lists: 154 | all_images = all_images + test_list 155 | 156 | loss = nn.CrossEntropyLoss() #nn.MSELoss() #BinaryFocalLoss(alpha=0.25,gamma=2) 157 | test_temporal(test_dataloader,seq_len,model,loss,all_images) 158 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/loader_multi_task.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Jan 6 10:09:43 2023 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import torch 10 | from torch.utils.data import Dataset 11 | #from torchvision import datasets 12 | #from torchvision.transforms import ToTensor 13 | #import matplotlib.pyplot as plt 14 | #from torch.utils.data import DataLoader 15 | import numpy as np 16 | import os#,fnmatch 17 | from torchvision.io import read_image 18 | import random 19 | class UnityImageDataset(Dataset): 20 | def __init__(self, bev_dirs, bev_depth_dirs, front_dirs, left_dirs, rear_dirs, right_dirs, image_lists, config_dirs, seq_len, datalengths, num_data_sequences, transform=None, target_transform=None): 21 | self.bev_dirs = bev_dirs 22 | self.bev_depth_dirs = bev_depth_dirs 23 | self.front_dirs = front_dirs 24 | self.left_dirs = left_dirs 25 | self.rear_dirs = rear_dirs 26 | self.right_dirs = right_dirs 27 | self.image_lists = image_lists 28 | self.config_dirs = config_dirs 29 | self.transform = transform 30 | self.target_transform = target_transform 31 | self.seq_len = seq_len 32 | self.datalengths = datalengths 33 | self.num_data_sequences = num_data_sequences 34 | 35 | def __len__(self): 36 | total = 0 37 | for count in self.datalengths: 38 | total = total + count 39 | return total 40 | 41 | def find_which_sequence(self,idx): 42 | 43 | eff_data_lens = [x for x in self.datalengths] 44 | 45 | 46 | currptr = 0 47 | nextptr = eff_data_lens[0] 48 | 49 | for i in range(self.num_data_sequences): 50 | if i == 0: 51 | currptr = 0 52 | nextptr = eff_data_lens[0] 53 | 54 | if idx > currptr -1 and idx < nextptr: 55 | seq_idx = 0 56 | else: 57 | currptr = sum(eff_data_lens[:i]) 58 | nextptr = sum(eff_data_lens[:i+1]) 59 | if idx > currptr -1 and idx < nextptr: 60 | seq_idx = i 61 | 62 | 63 | 64 | return seq_idx 65 | 66 | def get_id_in_seq(self,seq_idx,idx): 67 | eff_data_lens = [x for x in self.datalengths] 68 | 69 | 70 | if seq_idx == 0: 71 | subtract = 0 72 | else: 73 | subtract = sum(eff_data_lens[:seq_idx]) 74 | return idx - subtract 75 | 76 | def read_config_for_bevposrot(self,configdir,filename): 77 | with open(os.path.join(configdir, filename)) as f: 78 | lines = f.readlines() 79 | 80 | bpos = [float(lines[5].split(',')[1]),float(lines[5].split(',')[2]),float(lines[5].split(',')[3])] 81 | brot = [float(lines[5].split(',')[4]),float(lines[5].split(',')[5]),float(lines[5].split(',')[6])] 82 | return [bpos,brot] 83 | 84 | 85 | def __getitem__(self, idx): 86 | 87 | seq_idx = self.find_which_sequence(idx) 88 | 89 | bev_dir = self.bev_dirs[seq_idx] 90 | bev_depth_dir = self.bev_depth_dirs[seq_idx] 91 | image_list = self.image_lists[seq_idx] 92 | front_dir = self.front_dirs[seq_idx] 93 | left_dir = self.left_dirs[seq_idx] 94 | rear_dir = self.rear_dirs[seq_idx] 95 | right_dir = self.right_dirs[seq_idx] 96 | config_dir = self.config_dirs[seq_idx] 97 | 98 | idinseq = self.get_id_in_seq(seq_idx,idx) 99 | 100 | return_images_tensor = [] 101 | return_starget = [] 102 | return_htarget = [] 103 | return_can_bus = [] 104 | ##first image 105 | 106 | index_list = list(range(idinseq-self.seq_len, idinseq)) 107 | random.shuffle(index_list) 108 | index_list = sorted(index_list[1:]) 109 | index_list.append(idinseq) 110 | 111 | for idxctr,cidx in enumerate(index_list): 112 | cidx = max(0, cidx) 113 | 114 | star_path = os.path.join(bev_dir, image_list[cidx]) 115 | star = read_image(star_path)[:,100:~99,100:~99] 116 | star = torch.mul(star.float(),1/255) 117 | 118 | htar_path = os.path.join(bev_depth_dir, image_list[cidx]) 119 | htar = read_image(htar_path)[:,100:~99,100:~99] 120 | htar = torch.mul(htar.float(),1/255) 121 | if self.target_transform: 122 | star = self.target_transform(star) 123 | htar = self.target_transform(htar) 124 | inp = [] 125 | for cam_views in [front_dir, left_dir, rear_dir, right_dir]: 126 | img_path = os.path.join(cam_views, image_list[cidx]) 127 | image = read_image(img_path) 128 | image = torch.mul(image.float(),1/255) 129 | # if self.transform: 130 | # image = self.transform(image) 131 | inp.append(image) 132 | 133 | [bpos,brot] = self.read_config_for_bevposrot(config_dir,image_list[cidx].split('.')[0]+'.txt') 134 | 135 | 136 | can_bus = np.zeros((5,)) 137 | if idxctr == 0: 138 | #pos 139 | can_bus[0] = 0 140 | can_bus[1] = 0 141 | can_bus[2] = 0 142 | #angle 143 | can_bus[3] = brot[1] #(90 - bevrot[num,1])/180*np.pi ##ego_angle is kept unchanged .. i.e. no delta ##before that 270 - 144 | can_bus[4] = 0 145 | 146 | else: 147 | can_bus[0] = bpos[0] - return_can_bus[idxctr-1][0] 148 | can_bus[1] = bpos[2] - return_can_bus[idxctr-1][2] 149 | can_bus[2] = bpos[1] - return_can_bus[idxctr-1][1] 150 | can_bus[3] = brot[1] 151 | 152 | can_bus[4] = brot[1] - return_can_bus[idxctr-1][3] 153 | 154 | 155 | return_images_tensor.append(torch.stack(inp)) 156 | return_starget.append(star) 157 | return_htarget.append(htar) 158 | return_can_bus.append(can_bus) 159 | 160 | 161 | if self.transform: 162 | return_images_tensor = self.transform(torch.cat(return_images_tensor, dim=0)) 163 | 164 | #return_images = [list(torch.split(x, 4)) for x in list(torch.split(return_images, self.seq_len))] #because 4 camera views 165 | return_images = [] 166 | for frameidx in range(self.seq_len): 167 | inp = [] 168 | for camnum in range(4): #4 cam views 169 | inp.append(return_images_tensor[(4*frameidx) + camnum, :,:,:]) 170 | return_images.append(inp) 171 | 172 | #return torch.cat((inp[0],inp[1],inp[2],inp[3]),axis=0), tar 173 | return [return_images,return_can_bus], [return_starget,return_htarget] 174 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/test_loader_multi_task.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Jan 6 10:09:43 2023 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import torch 10 | from torch.utils.data import Dataset 11 | #from torchvision import datasets 12 | #from torchvision.transforms import ToTensor 13 | #import matplotlib.pyplot as plt 14 | #from torch.utils.data import DataLoader 15 | import numpy as np 16 | import os#,fnmatch 17 | from torchvision.io import read_image 18 | import random 19 | class UnityImageDataset(Dataset): 20 | def __init__(self, bev_dirs, bev_depth_dirs, front_dirs, left_dirs, rear_dirs, right_dirs, image_lists, config_dirs, seq_len, datalengths, num_data_sequences, transform=None, target_transform=None): 21 | self.bev_dirs = bev_dirs 22 | self.bev_depth_dirs = bev_depth_dirs 23 | self.front_dirs = front_dirs 24 | self.left_dirs = left_dirs 25 | self.rear_dirs = rear_dirs 26 | self.right_dirs = right_dirs 27 | self.image_lists = image_lists 28 | self.config_dirs = config_dirs 29 | self.transform = transform 30 | self.target_transform = target_transform 31 | self.seq_len = seq_len 32 | self.datalengths = datalengths 33 | self.num_data_sequences = num_data_sequences 34 | 35 | def __len__(self): 36 | total = 0 37 | for count in self.datalengths: 38 | total = total + count 39 | return total 40 | 41 | def find_which_sequence(self,idx): 42 | 43 | eff_data_lens = [x for x in self.datalengths] 44 | 45 | 46 | currptr = 0 47 | nextptr = eff_data_lens[0] 48 | 49 | for i in range(self.num_data_sequences): 50 | if i == 0: 51 | currptr = 0 52 | nextptr = eff_data_lens[0] 53 | 54 | if idx > currptr -1 and idx < nextptr: 55 | seq_idx = 0 56 | else: 57 | currptr = sum(eff_data_lens[:i]) 58 | nextptr = sum(eff_data_lens[:i+1]) 59 | if idx > currptr -1 and idx < nextptr: 60 | seq_idx = i 61 | 62 | 63 | 64 | return seq_idx 65 | 66 | def get_id_in_seq(self,seq_idx,idx): 67 | eff_data_lens = [x for x in self.datalengths] 68 | 69 | 70 | if seq_idx == 0: 71 | subtract = 0 72 | else: 73 | subtract = sum(eff_data_lens[:seq_idx]) 74 | return idx - subtract 75 | 76 | def read_config_for_bevposrot(self,configdir,filename): 77 | with open(os.path.join(configdir, filename)) as f: 78 | lines = f.readlines() 79 | 80 | bpos = [float(lines[5].split(',')[1]),float(lines[5].split(',')[2]),float(lines[5].split(',')[3])] 81 | brot = [float(lines[5].split(',')[4]),float(lines[5].split(',')[5]),float(lines[5].split(',')[6])] 82 | return [bpos,brot] 83 | 84 | 85 | def __getitem__(self, idx): 86 | 87 | seq_idx = self.find_which_sequence(idx) 88 | 89 | bev_dir = self.bev_dirs[seq_idx] 90 | bev_depth_dir = self.bev_depth_dirs[seq_idx] 91 | image_list = self.image_lists[seq_idx] 92 | front_dir = self.front_dirs[seq_idx] 93 | left_dir = self.left_dirs[seq_idx] 94 | rear_dir = self.rear_dirs[seq_idx] 95 | right_dir = self.right_dirs[seq_idx] 96 | config_dir = self.config_dirs[seq_idx] 97 | 98 | idinseq = self.get_id_in_seq(seq_idx,idx) 99 | 100 | return_images_tensor = [] 101 | return_starget = [] 102 | return_htarget = [] 103 | return_can_bus = [] 104 | ##first image 105 | 106 | index_list = list(range(idinseq-self.seq_len, idinseq)) 107 | random.shuffle(index_list) 108 | index_list = sorted(index_list[1:]) 109 | index_list.append(idinseq) 110 | 111 | for idxctr,cidx in enumerate(index_list): 112 | cidx = max(0, cidx) 113 | 114 | star_path = os.path.join(bev_dir, image_list[cidx]) 115 | star = read_image(star_path)[:,100:~99,100:~99] 116 | star = torch.mul(star.float(),1/255) 117 | 118 | htar_path = os.path.join(bev_depth_dir, image_list[cidx]) 119 | htar = read_image(htar_path)[:,100:~99,100:~99] 120 | htar = torch.mul(htar.float(),1/255) 121 | if self.target_transform: 122 | star = self.target_transform(star) 123 | htar = self.target_transform(htar) 124 | inp = [] 125 | for cam_views in [front_dir, left_dir, rear_dir, right_dir]: 126 | img_path = os.path.join(cam_views, image_list[cidx]) 127 | image = read_image(img_path) 128 | image = torch.mul(image.float(),1/255) 129 | # if self.transform: 130 | # image = self.transform(image) 131 | inp.append(image) 132 | 133 | [bpos,brot] = self.read_config_for_bevposrot(config_dir,image_list[cidx].split('.')[0]+'.txt') 134 | 135 | 136 | can_bus = np.zeros((5,)) 137 | if cidx == 0: 138 | #pos 139 | can_bus[0] = 0 140 | can_bus[1] = 0 141 | can_bus[2] = 0 142 | #angle 143 | can_bus[3] = brot[1] #(90 - bevrot[num,1])/180*np.pi ##ego_angle is kept unchanged .. i.e. no delta ##before that 270 - 144 | can_bus[4] = 0 145 | 146 | else: 147 | [prev_bpos,prev_brot] = self.read_config_for_bevposrot(config_dir,image_list[cidx-1].split('.')[0]+'.txt') 148 | 149 | can_bus[0] = bpos[0] - prev_bpos[0] 150 | can_bus[1] = bpos[2] - prev_bpos[2] 151 | can_bus[2] = bpos[1] - prev_bpos[1] 152 | can_bus[3] = brot[1] 153 | 154 | can_bus[4] = brot[1] - prev_brot[1] 155 | 156 | 157 | return_images_tensor.append(torch.stack(inp)) 158 | return_starget.append(star) 159 | return_htarget.append(htar) 160 | return_can_bus.append(can_bus) 161 | 162 | 163 | if self.transform: 164 | return_images_tensor = self.transform(torch.cat(return_images_tensor, dim=0)) 165 | 166 | #return_images = [list(torch.split(x, 4)) for x in list(torch.split(return_images, self.seq_len))] #because 4 camera views 167 | return_images = [] 168 | for frameidx in range(self.seq_len): 169 | inp = [] 170 | for camnum in range(4): #4 cam views 171 | inp.append(return_images_tensor[(4*frameidx) + camnum, :,:,:]) 172 | return_images.append(inp) 173 | 174 | #return torch.cat((inp[0],inp[1],inp[2],inp[3]),axis=0), tar 175 | return [return_images,return_can_bus], [return_starget,return_htarget] 176 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/spatial_cross_attention.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Aug 30 16:51:07 2022 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | 13 | from bblocks.deformable_attention import MSDeformAttn3D 14 | 15 | from torch.nn.init import xavier_uniform_, constant_ 16 | 17 | class SpatialCrossAttention(nn.Module): 18 | """An attention module used in BEVFormer. 19 | Args: 20 | embed_dims (int): The embedding dimension of Attention. 21 | Default: 256. 22 | num_cams (int): The number of cameras 23 | dropout (float): A Dropout layer on `inp_residual`. 24 | Default: 0.. 25 | init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. 26 | Default: None. 27 | deformable_attention: (dict): The config for the deformable attention used in SCA. 28 | """ 29 | 30 | def __init__(self,embed_dims=256,num_cams=4,dropout=0.2): 31 | super(SpatialCrossAttention, self).__init__() 32 | 33 | self.dropout = nn.Dropout(dropout) 34 | self.deformable_attention = MSDeformAttn3D() 35 | self.embed_dims = embed_dims 36 | self.num_cams = num_cams 37 | self.output_proj = nn.Linear(embed_dims, embed_dims) 38 | self.init_weight() 39 | 40 | def init_weight(self): 41 | """Default initialization for Parameters of Module.""" 42 | xavier_uniform_(self.output_proj.weight.data) 43 | constant_(self.output_proj.bias.data, 0.) ##done 44 | 45 | 46 | 47 | #@force_fp32(apply_to=('query', 'key', 'value', 'query_pos', 'reference_points_cam')) ##TODO: is this mandatory figure it out 48 | def forward(self, 49 | query, 50 | key, 51 | value, 52 | residual=None, 53 | query_pos=None, 54 | key_padding_mask=None, 55 | reference_points=None, 56 | spatial_shapes=None, 57 | reference_points_cam=None, 58 | bev_mask=None, 59 | level_start_index=None, 60 | **kwargs): 61 | """Forward Function of Detr3DCrossAtten. 62 | Args: 63 | query (Tensor): Query of Transformer with shape 64 | (num_query, bs, embed_dims). 65 | key (Tensor): The key tensor with shape 66 | `(num_key, bs, embed_dims)`. 67 | value (Tensor): The value tensor with shape 68 | `(num_key, bs, embed_dims)`. (B, N, C, H, W) 69 | residual (Tensor): The tensor used for addition, with the 70 | same shape as `x`. Default None. If None, `x` will be used. 71 | query_pos (Tensor): The positional encoding for `query`. 72 | Default: None. 73 | key_pos (Tensor): The positional encoding for `key`. Default 74 | None. 75 | reference_points (Tensor): The normalized reference 76 | points with shape (bs, num_query, 4), 77 | all elements is range in [0, 1], top-left (0,0), 78 | bottom-right (1, 1), including padding area. 79 | or (N, Length_{query}, num_levels, 4), add 80 | additional two dimensions is (w, h) to 81 | form reference boxes. 82 | key_padding_mask (Tensor): ByteTensor for `query`, with 83 | shape [bs, num_key]. 84 | spatial_shapes (Tensor): Spatial shape of features in 85 | different level. With shape (num_levels, 2), 86 | last dimension represent (h, w). 87 | level_start_index (Tensor): The start index of each level. 88 | A tensor has shape (num_levels) and can be represented 89 | as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...]. 90 | Returns: 91 | Tensor: forwarded results with shape [num_query, bs, embed_dims]. 92 | """ 93 | 94 | if key is None: 95 | key = query 96 | if value is None: 97 | value = key 98 | 99 | if residual is None: 100 | inp_residual = query 101 | slots = torch.zeros_like(query) 102 | if query_pos is not None: 103 | query = query + query_pos 104 | #query shape is 2,2500,256 105 | bs, num_query, _ = query.size() 106 | #reference_points_cam size is 6 2 2500 4 2 107 | #bev mask size is 6 2 2500 4 108 | D = reference_points_cam.size(3) 109 | #print(D) 110 | indexes = [] 111 | for i, mask_per_img in enumerate(bev_mask): 112 | index_query_per_img = mask_per_img[0].sum(-1).nonzero().squeeze(-1) 113 | indexes.append(index_query_per_img) 114 | max_len = max([len(each) for each in indexes]) 115 | #print([len(each) for each in indexes]) 116 | # each camera only interacts with its corresponding BEV queries. This step can greatly save GPU memory. 117 | queries_rebatch = query.new_zeros( 118 | [bs, self.num_cams, max_len, self.embed_dims]) 119 | reference_points_rebatch = reference_points_cam.new_zeros( 120 | [bs, self.num_cams, max_len, D, 2]) 121 | 122 | #print(queries_rebatch.shape) 123 | #print(reference_points_rebatch.shape) 124 | #queries rebatch 2,6,sth,256 125 | #reference points rebatch 2,6,sth,4,2 126 | for j in range(bs): 127 | for i, reference_points_per_img in enumerate(reference_points_cam): 128 | index_query_per_img = indexes[i] 129 | queries_rebatch[j, i, :len(index_query_per_img)] = query[j, index_query_per_img] 130 | reference_points_rebatch[j, i, :len(index_query_per_img)] = reference_points_per_img[j, index_query_per_img] 131 | #print(torch.unique(reference_points_per_img[j, index_query_per_img])) 132 | 133 | num_cams, l, bs, embed_dims = key.shape 134 | 135 | key = key.permute(2, 0, 1, 3).reshape( 136 | bs * self.num_cams, l, self.embed_dims) 137 | value = value.permute(2, 0, 1, 3).reshape( 138 | bs * self.num_cams, l, self.embed_dims) 139 | 140 | queries = self.deformable_attention(query=queries_rebatch.view(bs*self.num_cams, max_len, self.embed_dims), query_pos = query_pos, 141 | reference_points=reference_points_rebatch.view(bs*self.num_cams, max_len, D, 2), input_flatten = value, 142 | input_spatial_shapes=spatial_shapes, input_level_start_index=level_start_index).view(bs, self.num_cams, max_len, self.embed_dims) 143 | for j in range(bs): 144 | for i, index_query_per_img in enumerate(indexes): 145 | slots[j, index_query_per_img] += queries[j, i, :len(index_query_per_img)] 146 | 147 | count = bev_mask.sum(-1) > 0 148 | count = count.permute(1, 2, 0).sum(-1) 149 | count = torch.clamp(count, min=1.0) 150 | slots = slots / count[..., None] 151 | slots = self.output_proj(slots) 152 | 153 | return self.dropout(slots) + inp_residual 154 | 155 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/cnndecoder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Aug 31 11:12:33 2022 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | import numpy as np 12 | 13 | class HeightMulticlassHead(nn.Sequential): 14 | 15 | def __init__(self, in_channels=16, out_channels=3, kernel_size=3, upsampling=1): 16 | conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2) 17 | 18 | upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity() 19 | #softmax = nn.Softmax() 20 | super().__init__(conv2d,upsampling) 21 | 22 | 23 | class HeightHead(nn.Sequential): 24 | 25 | def __init__(self, in_channels=16, out_channels=1, kernel_size=3, upsampling=1): 26 | conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2) 27 | 28 | upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity() 29 | relu = nn.ReLU() 30 | super().__init__(conv2d,relu,upsampling) 31 | 32 | class SegmentationHead(nn.Sequential): 33 | 34 | def __init__(self, in_channels=16, out_channels=5, kernel_size=3, upsampling=1): 35 | conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2) 36 | sigmoid = nn.Sigmoid() 37 | upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity() 38 | super().__init__(conv2d,upsampling) 39 | #super().__init__(conv2d, sigmoid,upsampling) 40 | 41 | 42 | class Conv2dReLU(nn.Sequential): 43 | def __init__( 44 | self, 45 | in_channels, 46 | out_channels, 47 | kernel_size, 48 | padding=0, 49 | stride=1, 50 | use_batchnorm=True, 51 | ): 52 | conv = nn.Conv2d( 53 | in_channels, 54 | out_channels, 55 | kernel_size, 56 | stride=stride, 57 | padding=padding, 58 | bias=not (use_batchnorm), 59 | ) 60 | relu = nn.ReLU(inplace=True) 61 | 62 | bn = nn.BatchNorm2d(out_channels) 63 | 64 | super(Conv2dReLU, self).__init__(conv, bn, relu) 65 | 66 | 67 | class DecoderBlock(nn.Module): 68 | def __init__( 69 | self, 70 | in_channels, 71 | out_channels, 72 | skip_channels=0, 73 | use_batchnorm=True, 74 | ): 75 | super().__init__() 76 | self.conv1 = Conv2dReLU( 77 | in_channels + skip_channels, 78 | out_channels, 79 | kernel_size=3, 80 | padding=1, 81 | use_batchnorm=use_batchnorm, 82 | ) 83 | self.conv2 = Conv2dReLU( 84 | out_channels, 85 | out_channels, 86 | kernel_size=3, 87 | padding=1, 88 | use_batchnorm=use_batchnorm, 89 | ) 90 | self.up = nn.UpsamplingBilinear2d(scale_factor=2) 91 | self.dropout = nn.Dropout(p=0.2) #new addition by me 92 | 93 | def forward(self, x, skip=None): 94 | x = self.up(x) 95 | if skip is not None: 96 | x = torch.cat([x, skip], dim=1) 97 | x = self.conv1(x) 98 | x = self.dropout(x) 99 | x = self.conv2(x) 100 | return x 101 | 102 | 103 | class DecoderCup(nn.Module): 104 | def __init__(self): 105 | super().__init__() 106 | self.hidden_size = 256 107 | self.decoder_channels = (128,64,16) 108 | self.head_channels = 512 109 | self.n_skip = 0 110 | self.skip_channels = [256,64,16] ##dummy 111 | self.conv_more = Conv2dReLU( 112 | self.hidden_size, 113 | self.head_channels, 114 | kernel_size=3, 115 | padding=1, 116 | use_batchnorm=True, 117 | ) 118 | decoder_channels = self.decoder_channels 119 | head_channels = self.head_channels 120 | in_channels = [head_channels] + list(decoder_channels[:-1]) 121 | out_channels = decoder_channels 122 | 123 | if self.n_skip != 0: 124 | skip_channels = self.skip_channels 125 | for i in range(4-self.n_skip): # re-select the skip channels according to n_skip 126 | skip_channels[3-i]=0 127 | 128 | else: 129 | skip_channels=[0,0,0] 130 | 131 | blocks = [ 132 | DecoderBlock(in_ch, out_ch, sk_ch) for in_ch, out_ch, sk_ch in zip(in_channels, out_channels, skip_channels) 133 | ] 134 | self.blocks = nn.ModuleList(blocks) 135 | 136 | def forward(self, hidden_states, features=None): 137 | B, n_patch, hidden = hidden_states.size() # reshape from (B, n_patch, hidden) to (B, h, w, hidden) 138 | #print(B, n_patch,hidden) 139 | h, w = int(np.sqrt(n_patch)), int(np.sqrt(n_patch)) 140 | x = hidden_states.permute(0, 2, 1) 141 | x = x.contiguous().view(B, hidden, h, w) 142 | x = self.conv_more(x) 143 | #print(x.shape) 144 | for i, decoder_block in enumerate(self.blocks): 145 | if features is not None: 146 | skip = features[i] if (i < self.n_skip) else None 147 | else: 148 | skip = None 149 | x = decoder_block(x, skip=skip) 150 | #print(x.shape) 151 | return x 152 | 153 | class UpSampleBlock(nn.Module): 154 | def __init__( 155 | self, 156 | in_channels, 157 | out_channels, 158 | use_batchnorm=True, 159 | ): 160 | super().__init__() 161 | self.conv1 = Conv2dReLU( 162 | in_channels, 163 | out_channels, 164 | kernel_size=3, 165 | padding=1, 166 | use_batchnorm=use_batchnorm, 167 | ) 168 | self.conv2 = Conv2dReLU( 169 | out_channels, 170 | out_channels, 171 | kernel_size=1, 172 | padding=0, 173 | use_batchnorm=use_batchnorm, 174 | ) 175 | self.up = nn.UpsamplingBilinear2d(scale_factor=2) 176 | 177 | 178 | def forward(self, x, skip=None): 179 | x = self.conv1(x) 180 | x = self.conv2(x) 181 | x = self.up(x) 182 | return x 183 | 184 | class BEVUpSample(nn.Module): 185 | def __init__(self): 186 | super().__init__() 187 | in_channels = [256,128,64] 188 | out_channels = [128,64,16] 189 | blocks = [ 190 | UpSampleBlock(in_ch, out_ch) for in_ch, out_ch in zip(in_channels, out_channels) 191 | ] 192 | 193 | self.blocks = nn.ModuleList(blocks) 194 | self.dropout = nn.Dropout(p=0.2) 195 | self.final_head = HeightMulticlassHead() 196 | 197 | def forward(self,hidden_states,features=None): 198 | B, n_patch, hidden = hidden_states.size() # reshape from (B, n_patch, hidden) to (B, h, w, hidden) 199 | #print(B, n_patch,hidden) 200 | h, w = int(np.sqrt(n_patch)), int(np.sqrt(n_patch)) 201 | x = hidden_states.permute(0, 2, 1) 202 | x = x.contiguous().view(B, hidden, h, w) 203 | for i, up_block in enumerate(self.blocks): 204 | x = up_block(x) 205 | 206 | x = self.dropout(x) 207 | x = self.final_head(x) 208 | 209 | return x 210 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/test_f2bev_attn_mt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Feb 23 14:05:34 2023 5 | 6 | @author: Ekta 7 | """ 8 | 9 | 10 | import torch,time,ntpath #cv2 11 | from torch import nn, optim 12 | import numpy as np 13 | from test_loader_multi_task import UnityImageDataset 14 | import os,fnmatch 15 | from torch.utils.data import DataLoader 16 | from model_f2bev_attn_mt import FisheyeBEVFormer 17 | import torchvision.transforms as T 18 | from torchmetrics.functional import jaccard_index 19 | from losses.focal import BinaryFocalLoss 20 | 21 | 22 | def numpy_sigmoid(x): 23 | return 1/(1 + np.exp(-x)) 24 | 25 | if not os.path.exists('./predictions/'): 26 | os.makedirs('./predictions/') 27 | 28 | if not os.path.exists('./predictions/f2bev_attn_mt/'): 29 | os.makedirs('./predictions/f2bev_attn_mt/') 30 | if not os.path.exists('./predictions/f2bev_attn_mt/bevfeatures'): 31 | os.makedirs('./predictions/f2bev_attn_mt/features') 32 | if not os.path.exists('./predictions/f2bev_attn_mt/predfull/'): 33 | os.makedirs('./predictions/f2bev_attn_mt/predfull/') 34 | if not os.path.exists('./predictions/f2bev_attn_mt/predfull/ce/'): 35 | os.makedirs('./predictions/f2bev_attn_mt/predfull/ce/') 36 | 37 | 38 | num_data_sequences = 1 39 | 40 | 41 | bev_dirs = ['./data/images'+str(i)+'/test/seg/bev' for i in range(num_data_sequences)] 42 | bev_depth_dirs = ['./data/images'+str(i)+'/test/depth' for i in range(num_data_sequences)] 43 | front_dirs = ['./data/images'+str(i)+'/test/rgb/front' for i in range(num_data_sequences)] 44 | left_dirs = ['./data/images'+str(i)+'/test/rgb/left' for i in range(num_data_sequences)] 45 | rear_dirs = ['./data/images'+str(i)+'/test/rgb/rear' for i in range(num_data_sequences)] 46 | right_dirs = ['./data/images'+str(i)+'/test/rgb/right' for i in range(num_data_sequences)] 47 | config_dirs = ['./data/images'+str(i)+'/test/cameraconfig' for i in range(num_data_sequences)] 48 | 49 | 50 | seq_len = 1 51 | 52 | image_lists = [] 53 | 54 | datalengths = [] 55 | 56 | for bev_dir in bev_dirs: 57 | names = fnmatch.filter(os.listdir(bev_dir), '*.png') 58 | 59 | files = [] 60 | for name in names: 61 | files.append(os.path.splitext(ntpath.basename(name))[0]) 62 | 63 | filelist = sorted(files,key=int) 64 | 65 | image_lists.append([f + '.png' for f in filelist]) 66 | datalengths.append(len(names)) 67 | 68 | 69 | 70 | transforms = torch.nn.Sequential(T.Resize((540,640)),) 71 | target_transforms = torch.nn.Sequential(T.Grayscale(num_output_channels=1)) 72 | 73 | test_data = UnityImageDataset(bev_dirs = bev_dirs, bev_depth_dirs = bev_depth_dirs, front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, 74 | transform = transforms, target_transform= target_transforms) 75 | 76 | 77 | test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False) 78 | 79 | 80 | device = "cuda" if torch.cuda.is_available() else "cpu" 81 | print(f"Using {device} device") 82 | 83 | model = FisheyeBEVFormer().to(device) 84 | 85 | checkpoint = torch.load('./f2bev_attn_mt.pt') 86 | 87 | model.load_state_dict(checkpoint['model_state_dict']) 88 | 89 | 90 | 91 | def test_temporal(test_dataloader,seq_len,model,loss_fn,image_list): 92 | num_batches = len(test_dataloader) 93 | model.eval() 94 | stest_loss = 0 95 | stest_iou = 0 96 | htest_loss = 0 97 | htest_iou = 0 98 | with torch.no_grad(): 99 | for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader): 100 | inp_img_seq, can_buses_seq = dataseq 101 | stargetseq,htargetseq = targetseq 102 | bs = stargetseq[0].shape[0] 103 | for ctr in range(seq_len): 104 | front = inp_img_seq[ctr][0] 105 | left = inp_img_seq[ctr][1] 106 | rear = inp_img_seq[ctr][2] 107 | right = inp_img_seq[ctr][3] 108 | 109 | 110 | 111 | starget = stargetseq[ctr] 112 | htarget = htargetseq[ctr] 113 | 114 | front = front.to(device) 115 | left = left.to(device) 116 | rear = rear.to(device) 117 | right = right.to(device) 118 | 119 | starget = torch.squeeze(starget,dim=1) 120 | idx0 = torch.where(starget <= 0.02) 121 | starget[idx0] = 10 122 | idx1 = torch.where(starget <= 0.07) 123 | starget[idx1] = 11 124 | idx2 = torch.where(starget <= 0.22) 125 | starget[idx2] = 12 126 | idx3 = torch.where(starget <= 0.60) 127 | starget[idx3] = 13 128 | idx4 = torch.where(starget <=1) 129 | starget[idx4] = 14 130 | starget = starget - 10 131 | starget = starget.to(torch.int64).to(device) 132 | 133 | 134 | 135 | htarget = torch.squeeze(htarget,dim=1) 136 | idx2 = torch.where(htarget <= 0.35) 137 | idx0 = torch.where(htarget >= 0.69) 138 | htarget[htarget >= 0] = 1 139 | htarget[idx2] = 2 140 | htarget[idx0] = 0 141 | htarget = htarget.to(torch.int64).to(device) 142 | 143 | 144 | can_buses = can_buses_seq[ctr] 145 | 146 | 147 | if batch_idx == 0: 148 | prev_bev = None 149 | 150 | else: 151 | prev_bev = torch.Tensor([np.load('./predictions/f2bev_attn_mt/bevfeatures/'+image_list[batch_idx-1].split('.')[0]+'.npy')]).to(device) 152 | spred,_, hpred, _ ,for_prev_bev = model(front,left,rear,right, list(can_buses),prev_bev) 153 | 154 | for i,p in enumerate(for_prev_bev): 155 | np.save('./predictions/f2bev_attn_mt/bevfeatures/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy()) 156 | for i,p in enumerate(hpred): 157 | np.save('./predictions/f2bev_attn_mt/predfull/ce/'+image_list[batch_idx].split('.')[0]+'h.npy',p.detach().cpu().numpy()) 158 | for i,p in enumerate(spred): 159 | np.save('./predictions/f2bev_attn_mt/predfull/ce/'+image_list[batch_idx].split('.')[0]+'s.npy',p.detach().cpu().numpy()) 160 | 161 | stest_loss += loss_fn(spred, starget).item() 162 | stest_iou += jaccard_index(spred,starget.to(spred.device),num_classes=5,average='none') 163 | htest_loss += loss_fn(hpred, htarget).item() 164 | htest_iou += jaccard_index(hpred,htarget.to(hpred.device),num_classes=3,average='none') 165 | 166 | 167 | htest_loss/= num_batches*seq_len 168 | htest_iou /= num_batches 169 | stest_loss/= num_batches*seq_len 170 | stest_iou /= num_batches 171 | 172 | print(f"Test Error: Avg seg loss: {stest_loss:>8f} \n") 173 | print(f"Test Error: Avg ht loss: {htest_loss:>8f} \n") 174 | 175 | print("Segmentation: ", stest_iou) 176 | print("Height: ", htest_iou) 177 | return stest_loss,htest_loss 178 | 179 | all_images = [] 180 | for test_list in image_lists: 181 | all_images = all_images + test_list 182 | 183 | loss = nn.CrossEntropyLoss() #nn.MSELoss() #BinaryFocalLoss(alpha=0.25,gamma=2) 184 | test_temporal(test_dataloader,seq_len,model,loss,all_images) 185 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/test_f2bev_conv_mt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Dec 22 11:47:19 2022 5 | 6 | @author: Ekta 7 | """ 8 | import torch,time,ntpath #cv2 9 | from torch import nn, optim 10 | import numpy as np 11 | from test_loader_multi_task import UnityImageDataset 12 | import os,fnmatch 13 | from torch.utils.data import DataLoader 14 | from model_f2bev_conv_mt import FisheyeBEVFormer 15 | import torchvision.transforms as T 16 | from torchmetrics.functional import jaccard_index 17 | from losses.focal import BinaryFocalLoss 18 | 19 | 20 | def numpy_sigmoid(x): 21 | return 1/(1 + np.exp(-x)) 22 | 23 | if not os.path.exists('./predictions/'): 24 | os.makedirs('./predictions/') 25 | 26 | if not os.path.exists('./predictions/f2bev_conv_mt/'): 27 | os.makedirs('./predictions/f2bev_conv_mt/') 28 | if not os.path.exists('./predictions/f2bev_conv_mt/bevfeatures'): 29 | os.makedirs('./predictions/f2bev_conv_mt/features') 30 | if not os.path.exists('./predictions/f2bev_conv_mt/predfull/'): 31 | os.makedirs('./predictions/f2bev_conv_mt/predfull/') 32 | if not os.path.exists('./predictions/f2bev_conv_mt/predfull/ce/'): 33 | os.makedirs('./predictions/f2bev_conv_mt/predfull/ce/') 34 | 35 | 36 | num_data_sequences = 20 37 | 38 | bev_dirs = ['./data/images'+str(i)+'/test/seg/bev' for i in range(num_data_sequences)] 39 | bev_depth_dirs = ['./data/images'+str(i)+'/test/depth' for i in range(num_data_sequences)] 40 | front_dirs = ['./data/images'+str(i)+'/test/rgb/front' for i in range(num_data_sequences)] 41 | left_dirs = ['./data/images'+str(i)+'/test/rgb/left' for i in range(num_data_sequences)] 42 | rear_dirs = ['./data/images'+str(i)+'/test/rgb/rear' for i in range(num_data_sequences)] 43 | right_dirs = ['./data/images'+str(i)+'/test/rgb/right' for i in range(num_data_sequences)] 44 | config_dirs = ['./data/images'+str(i)+'/test/cameraconfig' for i in range(num_data_sequences)] 45 | 46 | 47 | seq_len = 1 48 | 49 | image_lists = [] 50 | 51 | datalengths = [] 52 | 53 | for bev_dir in bev_dirs: 54 | names = fnmatch.filter(os.listdir(bev_dir), '*.png') 55 | 56 | files = [] 57 | for name in names: 58 | files.append(os.path.splitext(ntpath.basename(name))[0]) 59 | 60 | filelist = sorted(files,key=int) 61 | 62 | image_lists.append([f + '.png' for f in filelist]) 63 | datalengths.append(len(names)) 64 | 65 | 66 | 67 | transforms = torch.nn.Sequential(T.Resize((540,640)),) 68 | target_transforms = torch.nn.Sequential(T.Grayscale(num_output_channels=1)) 69 | 70 | test_data = UnityImageDataset(bev_dirs = bev_dirs, bev_depth_dirs = bev_depth_dirs, front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, 71 | transform = transforms, target_transform= target_transforms) 72 | 73 | 74 | test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False) 75 | 76 | 77 | device = "cuda" if torch.cuda.is_available() else "cpu" 78 | #device = "cpu" 79 | print(f"Using {device} device") 80 | 81 | model = FisheyeBEVFormer().to(device) 82 | 83 | checkpoint = torch.load('./f2bev_conv_mt.pt') 84 | 85 | model.load_state_dict(checkpoint['model_state_dict']) 86 | 87 | 88 | 89 | def test_temporal(test_dataloader,seq_len,model,loss_fn,image_list): 90 | num_batches = len(test_dataloader) 91 | model.eval() 92 | stest_loss = 0 93 | stest_iou = 0 94 | htest_loss = 0 95 | htest_iou = 0 96 | with torch.no_grad(): 97 | for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader): 98 | inp_img_seq, can_buses_seq = dataseq 99 | stargetseq,htargetseq = targetseq 100 | bs = stargetseq[0].shape[0] 101 | for ctr in range(seq_len): 102 | front = inp_img_seq[ctr][0] 103 | left = inp_img_seq[ctr][1] 104 | rear = inp_img_seq[ctr][2] 105 | right = inp_img_seq[ctr][3] 106 | 107 | 108 | 109 | starget = stargetseq[ctr] 110 | htarget = htargetseq[ctr] 111 | 112 | front = front.to(device) 113 | left = left.to(device) 114 | rear = rear.to(device) 115 | right = right.to(device) 116 | 117 | starget = torch.squeeze(starget,dim=1) 118 | idx0 = torch.where(starget <= 0.02) 119 | starget[idx0] = 10 120 | idx1 = torch.where(starget <= 0.07) 121 | starget[idx1] = 11 122 | idx2 = torch.where(starget <= 0.22) 123 | starget[idx2] = 12 124 | idx3 = torch.where(starget <= 0.60) 125 | starget[idx3] = 13 126 | idx4 = torch.where(starget <=1) 127 | starget[idx4] = 14 128 | starget = starget - 10 129 | starget = starget.to(torch.int64).to(device) 130 | 131 | 132 | 133 | htarget = torch.squeeze(htarget,dim=1) 134 | idx2 = torch.where(htarget <= 0.35) 135 | idx0 = torch.where(htarget >= 0.69) 136 | htarget[htarget >= 0] = 1 137 | htarget[idx2] = 2 138 | htarget[idx0] = 0 139 | htarget = htarget.to(torch.int64).to(device) 140 | 141 | 142 | can_buses = can_buses_seq[ctr] 143 | 144 | 145 | if batch_idx == 0: 146 | prev_bev = None 147 | 148 | else: 149 | prev_bev = torch.Tensor([np.load('./predictions/f2bev_conv_mt/bevfeatures/'+image_list[batch_idx-1].split('.')[0]+'.npy')]).to(device) 150 | spred, hpred, for_prev_bev = model(front,left,rear,right, list(can_buses),prev_bev) 151 | 152 | 153 | for i,p in enumerate(for_prev_bev): 154 | np.save('./predictions/f2bev_conv_mt/bevfeatures/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy()) 155 | for i,p in enumerate(hpred): 156 | np.save('./predictions/f2bev_conv_mt/predfull/ce/'+image_list[batch_idx].split('.')[0]+'h.npy',p.detach().cpu().numpy()) 157 | for i,p in enumerate(spred): 158 | np.save('./predictions/f2bev_conv_mt/predfull/ce/'+image_list[batch_idx].split('.')[0]+'s.npy',p.detach().cpu().numpy()) 159 | 160 | stest_loss += loss_fn(spred, starget).item() 161 | stest_iou += jaccard_index(spred,starget.to(spred.device),num_classes=5,average='none') 162 | htest_loss += loss_fn(hpred, htarget).item() 163 | htest_iou += jaccard_index(hpred,htarget.to(hpred.device),num_classes=3,average='none') 164 | 165 | 166 | htest_loss/= num_batches*seq_len 167 | htest_iou /= num_batches 168 | stest_loss/= num_batches*seq_len 169 | stest_iou /= num_batches 170 | 171 | print(f"Test Error: Avg seg loss: {stest_loss:>8f} \n") 172 | print(f"Test Error: Avg ht loss: {htest_loss:>8f} \n") 173 | 174 | print("Segmentation: ", stest_iou) 175 | print("Height: ", htest_iou) 176 | return stest_loss,htest_loss 177 | 178 | all_images = [] 179 | for test_list in image_lists: 180 | all_images = all_images + test_list 181 | 182 | loss = nn.CrossEntropyLoss() #nn.MSELoss() #BinaryFocalLoss(alpha=0.25,gamma=2) 183 | test_temporal(test_dataloader,seq_len,model,loss,all_images) 184 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/bifpn_configs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | Created on Wed Dec 28 11:35:28 2022 6 | 7 | @author: Ekta 8 | """ 9 | 10 | import itertools 11 | 12 | from omegaconf import OmegaConf 13 | 14 | 15 | def bifpn_config(min_level, max_level, weight_method=None): 16 | """BiFPN config. 17 | Adapted from https://github.com/google/automl/blob/56815c9986ffd4b508fe1d68508e268d129715c1/efficientdet/keras/fpn_configs.py 18 | """ 19 | p = OmegaConf.create() 20 | weight_method = weight_method or 'fastattn' 21 | 22 | num_levels = max_level - min_level + 1 23 | node_ids = {min_level + i: [i] for i in range(num_levels)} 24 | 25 | level_last_id = lambda level: node_ids[level][-1] 26 | level_all_ids = lambda level: node_ids[level] 27 | id_cnt = itertools.count(num_levels) 28 | 29 | p.nodes = [] 30 | for i in range(max_level - 1, min_level - 1, -1): 31 | # top-down path. 32 | p.nodes.append({ 33 | 'feat_level': i, 34 | 'inputs_offsets': [level_last_id(i), level_last_id(i + 1)], 35 | 'weight_method': weight_method, 36 | }) 37 | node_ids[i].append(next(id_cnt)) 38 | 39 | for i in range(min_level + 1, max_level + 1): 40 | # bottom-up path. 41 | p.nodes.append({ 42 | 'feat_level': i, 43 | 'inputs_offsets': level_all_ids(i) + [level_last_id(i - 1)], 44 | 'weight_method': weight_method, 45 | }) 46 | node_ids[i].append(next(id_cnt)) 47 | return p 48 | 49 | 50 | def panfpn_config(min_level, max_level, weight_method=None): 51 | """PAN FPN config. 52 | 53 | This defines FPN layout from Path Aggregation Networks as an alternate to 54 | BiFPN, it does not implement the full PAN spec. 55 | 56 | Paper: https://arxiv.org/abs/1803.01534 57 | """ 58 | p = OmegaConf.create() 59 | weight_method = weight_method or 'fastattn' 60 | 61 | num_levels = max_level - min_level + 1 62 | node_ids = {min_level + i: [i] for i in range(num_levels)} 63 | level_last_id = lambda level: node_ids[level][-1] 64 | id_cnt = itertools.count(num_levels) 65 | 66 | p.nodes = [] 67 | for i in range(max_level, min_level - 1, -1): 68 | # top-down path. 69 | offsets = [level_last_id(i), level_last_id(i + 1)] if i != max_level else [level_last_id(i)] 70 | p.nodes.append({ 71 | 'feat_level': i, 72 | 'inputs_offsets': offsets, 73 | 'weight_method': weight_method, 74 | }) 75 | node_ids[i].append(next(id_cnt)) 76 | 77 | for i in range(min_level, max_level + 1): 78 | # bottom-up path. 79 | offsets = [level_last_id(i), level_last_id(i - 1)] if i != min_level else [level_last_id(i)] 80 | p.nodes.append({ 81 | 'feat_level': i, 82 | 'inputs_offsets': offsets, 83 | 'weight_method': weight_method, 84 | }) 85 | node_ids[i].append(next(id_cnt)) 86 | 87 | return p 88 | 89 | 90 | def qufpn_config(min_level, max_level, weight_method=None): 91 | """A dynamic quad fpn config that can adapt to different min/max levels. 92 | 93 | It extends the idea of BiFPN, and has four paths: 94 | (up_down -> bottom_up) + (bottom_up -> up_down). 95 | 96 | Paper: https://ieeexplore.ieee.org/document/9225379 97 | Ref code: From contribution to TF EfficientDet 98 | https://github.com/google/automl/blob/eb74c6739382e9444817d2ad97c4582dbe9a9020/efficientdet/keras/fpn_configs.py 99 | """ 100 | p = OmegaConf.create() 101 | weight_method = weight_method or 'fastattn' 102 | quad_method = 'fastattn' 103 | num_levels = max_level - min_level + 1 104 | node_ids = {min_level + i: [i] for i in range(num_levels)} 105 | level_last_id = lambda level: node_ids[level][-1] 106 | level_all_ids = lambda level: node_ids[level] 107 | level_first_id = lambda level: node_ids[level][0] 108 | id_cnt = itertools.count(num_levels) 109 | 110 | p.nodes = [] 111 | for i in range(max_level - 1, min_level - 1, -1): 112 | # top-down path 1. 113 | p.nodes.append({ 114 | 'feat_level': i, 115 | 'inputs_offsets': [level_last_id(i), level_last_id(i + 1)], 116 | 'weight_method': weight_method 117 | }) 118 | node_ids[i].append(next(id_cnt)) 119 | node_ids[max_level].append(node_ids[max_level][-1]) 120 | 121 | for i in range(min_level + 1, max_level): 122 | # bottom-up path 2. 123 | p.nodes.append({ 124 | 'feat_level': i, 125 | 'inputs_offsets': level_all_ids(i) + [level_last_id(i - 1)], 126 | 'weight_method': weight_method 127 | }) 128 | node_ids[i].append(next(id_cnt)) 129 | 130 | i = max_level 131 | p.nodes.append({ 132 | 'feat_level': i, 133 | 'inputs_offsets': [level_first_id(i)] + [level_last_id(i - 1)], 134 | 'weight_method': weight_method 135 | }) 136 | node_ids[i].append(next(id_cnt)) 137 | node_ids[min_level].append(node_ids[min_level][-1]) 138 | 139 | for i in range(min_level + 1, max_level + 1, 1): 140 | # bottom-up path 3. 141 | p.nodes.append({ 142 | 'feat_level': i, 143 | 'inputs_offsets': [ 144 | level_first_id(i), level_last_id(i - 1) if i != min_level + 1 else level_first_id(i - 1)], 145 | 'weight_method': weight_method 146 | }) 147 | node_ids[i].append(next(id_cnt)) 148 | node_ids[min_level].append(node_ids[min_level][-1]) 149 | 150 | for i in range(max_level - 1, min_level, -1): 151 | # top-down path 4. 152 | p.nodes.append({ 153 | 'feat_level': i, 154 | 'inputs_offsets': [node_ids[i][0]] + [node_ids[i][-1]] + [level_last_id(i + 1)], 155 | 'weight_method': weight_method 156 | }) 157 | node_ids[i].append(next(id_cnt)) 158 | i = min_level 159 | p.nodes.append({ 160 | 'feat_level': i, 161 | 'inputs_offsets': [node_ids[i][0]] + [level_last_id(i + 1)], 162 | 'weight_method': weight_method 163 | }) 164 | node_ids[i].append(next(id_cnt)) 165 | node_ids[max_level].append(node_ids[max_level][-1]) 166 | 167 | # NOTE: the order of the quad path is reversed from the original, my code expects the output of 168 | # each FPN repeat to be same as input from backbone, in order of increasing reductions 169 | for i in range(min_level, max_level + 1): 170 | # quad-add path. 171 | p.nodes.append({ 172 | 'feat_level': i, 173 | 'inputs_offsets': [node_ids[i][2], node_ids[i][4]], 174 | 'weight_method': quad_method 175 | }) 176 | node_ids[i].append(next(id_cnt)) 177 | 178 | return p 179 | 180 | 181 | def get_fpn_config(fpn_name, min_level=3, max_level=7): 182 | if not fpn_name: 183 | fpn_name = 'bifpn_fa' 184 | name_to_config = { 185 | 'bifpn_sum': bifpn_config(min_level=min_level, max_level=max_level, weight_method='sum'), 186 | 'bifpn_attn': bifpn_config(min_level=min_level, max_level=max_level, weight_method='attn'), 187 | 'bifpn_fa': bifpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'), 188 | 'pan_sum': panfpn_config(min_level=min_level, max_level=max_level, weight_method='sum'), 189 | 'pan_fa': panfpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'), 190 | 'qufpn_sum': qufpn_config(min_level=min_level, max_level=max_level, weight_method='sum'), 191 | 'qufpn_fa': qufpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'), 192 | } 193 | return name_to_config[fpn_name] 194 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/loader_single_task.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Dec 21 12:15:36 2022 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import torch 10 | from torch.utils.data import Dataset 11 | #from torchvision import datasets 12 | #from torchvision.transforms import ToTensor 13 | #import matplotlib.pyplot as plt 14 | #from torch.utils.data import DataLoader 15 | import numpy as np 16 | import os#,fnmatch 17 | from torchvision.io import read_image 18 | import random 19 | class UnityImageDataset(Dataset): 20 | def __init__(self, bev_dirs, front_dirs, left_dirs, rear_dirs, right_dirs, image_lists, config_dirs, seq_len, datalengths, num_data_sequences, transform=None, target_transform=None): 21 | self.bev_dirs = bev_dirs 22 | self.front_dirs = front_dirs 23 | self.left_dirs = left_dirs 24 | self.rear_dirs = rear_dirs 25 | self.right_dirs = right_dirs 26 | self.image_lists = image_lists 27 | self.config_dirs = config_dirs 28 | self.transform = transform 29 | self.target_transform = target_transform 30 | self.seq_len = seq_len 31 | self.datalengths = datalengths 32 | self.num_data_sequences = num_data_sequences 33 | 34 | def __len__(self): 35 | total = 0 36 | for count in self.datalengths: 37 | total = total + count 38 | return total 39 | 40 | def find_which_sequence(self,idx): 41 | 42 | eff_data_lens = [x for x in self.datalengths] 43 | 44 | # seq_idx = 0 45 | # if idx > -1 and idx < eff_data_lens[0]: 46 | # seq_idx = 0 47 | # elif idx > eff_data_lens[0] -1 and idx < (eff_data_lens[0] + eff_data_lens[1]) : 48 | # seq_idx = 1 49 | # elif idx > (eff_data_lens[0] + eff_data_lens[1]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) : 50 | # seq_idx = 2 51 | # elif idx > (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2] + eff_data_lens[3]): 52 | # seq_idx = 3 53 | # else: 54 | # raise NotImplementedError 55 | 56 | currptr = 0 57 | nextptr = eff_data_lens[0] 58 | 59 | for i in range(self.num_data_sequences): 60 | if i == 0: 61 | currptr = 0 62 | nextptr = eff_data_lens[0] 63 | 64 | if idx > currptr -1 and idx < nextptr: 65 | seq_idx = 0 66 | else: 67 | currptr = sum(eff_data_lens[:i]) 68 | nextptr = sum(eff_data_lens[:i+1]) 69 | if idx > currptr -1 and idx < nextptr: 70 | seq_idx = i 71 | 72 | 73 | # if idx > -1 and idx < eff_data_lens[0]: 74 | # seq_idx = 0 75 | # elif idx > eff_data_lens[0] -1 and idx < (eff_data_lens[0] + eff_data_lens[1]) : 76 | # seq_idx = 1 77 | # elif idx > (eff_data_lens[0] + eff_data_lens[1]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) : 78 | # seq_idx = 2 79 | # elif idx > (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2] + eff_data_lens[3]): 80 | # seq_idx = 3 81 | # else: 82 | # raise NotImplementedError 83 | 84 | 85 | #print(idx, seq_idx) 86 | return seq_idx 87 | 88 | def get_id_in_seq(self,seq_idx,idx): 89 | eff_data_lens = [x for x in self.datalengths] 90 | 91 | # if seq_idx == 0: 92 | # subtract = 0 93 | # elif seq_idx == 1: 94 | # subtract = eff_data_lens[0] 95 | # elif seq_idx == 2: 96 | # subtract = eff_data_lens[0] + eff_data_lens[1] 97 | # elif seq_idx == 3: 98 | # subtract = eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2] 99 | 100 | if seq_idx == 0: 101 | subtract = 0 102 | else: 103 | subtract = sum(eff_data_lens[:seq_idx]) 104 | return idx - subtract 105 | 106 | def read_config_for_bevposrot(self,configdir,filename): 107 | with open(os.path.join(configdir, filename)) as f: 108 | lines = f.readlines() 109 | 110 | bpos = [float(lines[5].split(',')[1]),float(lines[5].split(',')[2]),float(lines[5].split(',')[3])] 111 | brot = [float(lines[5].split(',')[4]),float(lines[5].split(',')[5]),float(lines[5].split(',')[6])] 112 | return [bpos,brot] 113 | 114 | 115 | def __getitem__(self, idx): 116 | 117 | seq_idx = self.find_which_sequence(idx) 118 | 119 | bev_dir = self.bev_dirs[seq_idx] 120 | image_list = self.image_lists[seq_idx] 121 | front_dir = self.front_dirs[seq_idx] 122 | left_dir = self.left_dirs[seq_idx] 123 | rear_dir = self.rear_dirs[seq_idx] 124 | right_dir = self.right_dirs[seq_idx] 125 | config_dir = self.config_dirs[seq_idx] 126 | 127 | idinseq = self.get_id_in_seq(seq_idx,idx) 128 | 129 | return_images_tensor = [] 130 | return_target = [] 131 | return_can_bus = [] 132 | ##first image 133 | 134 | index_list = list(range(idinseq-self.seq_len, idinseq)) 135 | random.shuffle(index_list) 136 | index_list = sorted(index_list[1:]) 137 | index_list.append(idinseq) 138 | 139 | for idxctr,cidx in enumerate(index_list): 140 | cidx = max(0, cidx) 141 | tar_path = os.path.join(bev_dir, image_list[cidx]) 142 | tar = read_image(tar_path)[:,100:~99,100:~99] 143 | tar = torch.mul(tar.float(),1/255) 144 | if self.target_transform: 145 | tar = self.target_transform(tar) 146 | inp = [] 147 | for cam_views in [front_dir, left_dir, rear_dir, right_dir]: 148 | img_path = os.path.join(cam_views, image_list[cidx]) 149 | image = read_image(img_path) 150 | if image.shape[0] == 4: 151 | image = image[0:3,:,:] 152 | image = torch.mul(image.float(),1/255) 153 | # if self.transform: 154 | # image = self.transform(image) 155 | inp.append(image) 156 | 157 | [bpos,brot] = self.read_config_for_bevposrot(config_dir,image_list[cidx].split('.')[0]+'.txt') 158 | 159 | 160 | can_bus = np.zeros((5,)) 161 | if idxctr == 0: 162 | #pos 163 | can_bus[0] = 0 164 | can_bus[1] = 0 165 | can_bus[2] = 0 166 | #angle 167 | can_bus[3] = brot[1] #(90 - bevrot[num,1])/180*np.pi ##ego_angle is kept unchanged .. i.e. no delta ##before that 270 - 168 | can_bus[4] = 0 169 | 170 | else: 171 | can_bus[0] = bpos[0] - return_can_bus[idxctr-1][0] 172 | can_bus[1] = bpos[2] - return_can_bus[idxctr-1][2] 173 | can_bus[2] = bpos[1] - return_can_bus[idxctr-1][1] 174 | can_bus[3] = brot[1] 175 | 176 | can_bus[4] = brot[1] - return_can_bus[idxctr-1][3] 177 | 178 | 179 | return_images_tensor.append(torch.stack(inp)) 180 | return_target.append(tar) 181 | return_can_bus.append(can_bus) 182 | 183 | 184 | if self.transform: 185 | return_images_tensor = self.transform(torch.cat(return_images_tensor, dim=0)) 186 | 187 | #return_images = [list(torch.split(x, 4)) for x in list(torch.split(return_images, self.seq_len))] #because 4 camera views 188 | return_images = [] 189 | for frameidx in range(self.seq_len): 190 | inp = [] 191 | for camnum in range(4): #4 cam views 192 | inp.append(return_images_tensor[(4*frameidx) + camnum, :,:,:]) 193 | return_images.append(inp) 194 | 195 | #return torch.cat((inp[0],inp[1],inp[2],inp[3]),axis=0), tar 196 | return [return_images,return_can_bus], return_target 197 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/test_loader_single_task.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Dec 22 11:53:01 2022 5 | 6 | @author: Ekta 7 | """ 8 | 9 | 10 | import torch 11 | from torch.utils.data import Dataset 12 | #from torchvision import datasets 13 | #from torchvision.transforms import ToTensor 14 | #import matplotlib.pyplot as plt 15 | #from torch.utils.data import DataLoader 16 | import numpy as np 17 | import os#,fnmatch 18 | from torchvision.io import read_image 19 | import random 20 | 21 | ##this will have seq len = 1 22 | 23 | class UnityImageDataset(Dataset): 24 | def __init__(self, bev_dirs, front_dirs, left_dirs, rear_dirs, right_dirs, image_lists, config_dirs, seq_len, datalengths, num_data_sequences, transform=None, target_transform=None): 25 | self.bev_dirs = bev_dirs 26 | self.front_dirs = front_dirs 27 | self.left_dirs = left_dirs 28 | self.rear_dirs = rear_dirs 29 | self.right_dirs = right_dirs 30 | self.image_lists = image_lists 31 | self.config_dirs = config_dirs 32 | self.transform = transform 33 | self.target_transform = target_transform 34 | self.seq_len = seq_len 35 | self.datalengths = datalengths 36 | self.num_data_sequences = num_data_sequences 37 | 38 | def __len__(self): 39 | total = 0 40 | for count in self.datalengths: 41 | total = total + count 42 | return total 43 | 44 | def find_which_sequence(self,idx): 45 | 46 | eff_data_lens = [x for x in self.datalengths] 47 | 48 | # seq_idx = 0 49 | # if idx > -1 and idx < eff_data_lens[0]: 50 | # seq_idx = 0 51 | # elif idx > eff_data_lens[0] -1 and idx < (eff_data_lens[0] + eff_data_lens[1]) : 52 | # seq_idx = 1 53 | # elif idx > (eff_data_lens[0] + eff_data_lens[1]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) : 54 | # seq_idx = 2 55 | # elif idx > (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2] + eff_data_lens[3]): 56 | # seq_idx = 3 57 | # else: 58 | # raise NotImplementedError 59 | 60 | currptr = 0 61 | nextptr = eff_data_lens[0] 62 | 63 | for i in range(self.num_data_sequences): 64 | if i == 0: 65 | currptr = 0 66 | nextptr = eff_data_lens[0] 67 | 68 | if idx > currptr -1 and idx < nextptr: 69 | seq_idx = 0 70 | else: 71 | currptr = sum(eff_data_lens[:i]) 72 | nextptr = sum(eff_data_lens[:i+1]) 73 | if idx > currptr -1 and idx < nextptr: 74 | seq_idx = i 75 | 76 | 77 | # if idx > -1 and idx < eff_data_lens[0]: 78 | # seq_idx = 0 79 | # elif idx > eff_data_lens[0] -1 and idx < (eff_data_lens[0] + eff_data_lens[1]) : 80 | # seq_idx = 1 81 | # elif idx > (eff_data_lens[0] + eff_data_lens[1]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) : 82 | # seq_idx = 2 83 | # elif idx > (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2] + eff_data_lens[3]): 84 | # seq_idx = 3 85 | # else: 86 | # raise NotImplementedError 87 | 88 | 89 | #print(idx, seq_idx) 90 | return seq_idx 91 | 92 | def get_id_in_seq(self,seq_idx,idx): 93 | eff_data_lens = [x for x in self.datalengths] 94 | 95 | # if seq_idx == 0: 96 | # subtract = 0 97 | # elif seq_idx == 1: 98 | # subtract = eff_data_lens[0] 99 | # elif seq_idx == 2: 100 | # subtract = eff_data_lens[0] + eff_data_lens[1] 101 | # elif seq_idx == 3: 102 | # subtract = eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2] 103 | 104 | if seq_idx == 0: 105 | subtract = 0 106 | else: 107 | subtract = sum(eff_data_lens[:seq_idx]) 108 | return idx - subtract 109 | 110 | def read_config_for_bevposrot(self,configdir,filename): 111 | with open(os.path.join(configdir, filename)) as f: 112 | lines = f.readlines() 113 | 114 | bpos = [float(lines[5].split(',')[1]),float(lines[5].split(',')[2]),float(lines[5].split(',')[3])] 115 | brot = [float(lines[5].split(',')[4]),float(lines[5].split(',')[5]),float(lines[5].split(',')[6])] 116 | return [bpos,brot] 117 | 118 | 119 | def __getitem__(self, idx): 120 | 121 | seq_idx = self.find_which_sequence(idx) 122 | 123 | bev_dir = self.bev_dirs[seq_idx] 124 | image_list = self.image_lists[seq_idx] 125 | front_dir = self.front_dirs[seq_idx] 126 | left_dir = self.left_dirs[seq_idx] 127 | rear_dir = self.rear_dirs[seq_idx] 128 | right_dir = self.right_dirs[seq_idx] 129 | config_dir = self.config_dirs[seq_idx] 130 | 131 | idinseq = self.get_id_in_seq(seq_idx,idx) 132 | 133 | return_images_tensor = [] 134 | return_target = [] 135 | return_can_bus = [] 136 | ##first image 137 | 138 | index_list = list(range(idinseq-self.seq_len, idinseq)) 139 | random.shuffle(index_list) 140 | index_list = sorted(index_list[1:]) 141 | index_list.append(idinseq) 142 | 143 | for idxctr,cidx in enumerate(index_list): 144 | cidx = max(0, cidx) 145 | tar_path = os.path.join(bev_dir, image_list[cidx]) 146 | tar = read_image(tar_path)[:,100:~99,100:~99] 147 | tar = torch.mul(tar.float(),1/255) 148 | if self.target_transform: 149 | tar = self.target_transform(tar) 150 | inp = [] 151 | for cam_views in [front_dir, left_dir, rear_dir, right_dir]: 152 | img_path = os.path.join(cam_views, image_list[cidx]) 153 | image = read_image(img_path) 154 | image = torch.mul(image.float(),1/255) 155 | # if self.transform: 156 | # image = self.transform(image) 157 | inp.append(image) 158 | 159 | [bpos,brot] = self.read_config_for_bevposrot(config_dir,image_list[cidx].split('.')[0]+'.txt') 160 | 161 | can_bus = np.zeros((5,)) 162 | if cidx == 0: 163 | #pos 164 | can_bus[0] = 0 165 | can_bus[1] = 0 166 | can_bus[2] = 0 167 | #angle 168 | can_bus[3] = brot[1] #(90 - bevrot[num,1])/180*np.pi ##ego_angle is kept unchanged .. i.e. no delta ##before that 270 - 169 | can_bus[4] = 0 170 | 171 | else: 172 | [prev_bpos,prev_brot] = self.read_config_for_bevposrot(config_dir,image_list[cidx-1].split('.')[0]+'.txt') 173 | 174 | can_bus[0] = bpos[0] - prev_bpos[0] 175 | can_bus[1] = bpos[2] - prev_bpos[2] 176 | can_bus[2] = bpos[1] - prev_bpos[1] 177 | can_bus[3] = brot[1] 178 | 179 | can_bus[4] = brot[1] - prev_brot[1] 180 | 181 | return_images_tensor.append(torch.stack(inp)) 182 | return_target.append(tar) 183 | return_can_bus.append(can_bus) 184 | 185 | 186 | if self.transform: 187 | return_images_tensor = self.transform(torch.cat(return_images_tensor, dim=0)) 188 | 189 | #return_images = [list(torch.split(x, 4)) for x in list(torch.split(return_images, self.seq_len))] #because 4 camera views 190 | return_images = [] 191 | for frameidx in range(self.seq_len): 192 | inp = [] 193 | for camnum in range(4): #4 cam views 194 | inp.append(return_images_tensor[(4*frameidx) + camnum, :,:,:]) 195 | return_images.append(inp) 196 | 197 | #return torch.cat((inp[0],inp[1],inp[2],inp[3]),axis=0), tar 198 | return [return_images,return_can_bus], return_target 199 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/pre_computation/computeNormalizedReferencePoints.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Feb 2 10:27:02 2023 5 | 6 | @author: smartslab 7 | """ 8 | 9 | import cv2,fnmatch,os 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | from scipy.spatial.transform import Rotation as R 13 | 14 | 15 | 16 | files = fnmatch.filter(os.listdir('./forward_looking_camera_model/data/bev'),'*.png') 17 | 18 | 19 | 20 | def load_yaml(filename): 21 | content = cv2.FileStorage(filename, cv2.FILE_STORAGE_READ) 22 | K = content.getNode("K").mat() 23 | D = content.getNode("D").mat() 24 | xi = content.getNode("xi").mat() 25 | return K,D,xi 26 | 27 | 28 | def convertPoseFromUnityToOpenCV(pose): 29 | opencvpose = {} 30 | for key,value in pose.items(): 31 | # ======================= UnityToOpenCV: Flip y axis ======================== 32 | opencvpose[key] = [[-1,1,-1]*value[0] , [1,-1,1]*value[1]] 33 | # =========================================================================== 34 | return opencvpose 35 | 36 | 37 | def convertOpenCVPoseToRvecTvec(opencvpose): 38 | extrinsics = {} 39 | for key,value in opencvpose.items(): 40 | rot = value[0] 41 | # ====================== Use "ZYX" for extrinsic rotation =================== 42 | intermediater = R.from_euler('ZYX',[rot[2], rot[1], rot[0]]) 43 | # =========================================================================== 44 | rotmat = intermediater.as_matrix() 45 | # ========================= cam2world -> world2cam ========================== 46 | ''' 47 | cam2world: R, t 48 | world2cam: R', t' 49 | R' = R.T 50 | t' = - R.T @ t 51 | ''' 52 | rvec,_ = cv2.Rodrigues(rotmat.T) 53 | tvec = -rotmat.T @ value[1].reshape(3,1) 54 | # =========================================================================== 55 | extrinsics[key] = [rvec,tvec] 56 | return extrinsics 57 | 58 | 59 | def computeRealWorldLocationOfBEVPixels(h,w, resolution,height_anchors,scale,offset): 60 | z = np.arange(0, h, 1) 61 | x = np.arange(0, w, 1) 62 | zprime = ((offset/scale)+h/2-z)*resolution[1] # offset for unity...unity origin at mid of the car rear and on the ground 63 | # ============================================================================== 64 | xprime = (x-w/2)*resolution[0] 65 | # ============================================================================== 66 | 67 | worldpoints = [] 68 | bevpointsforworldpoints = [] 69 | for j,tz in enumerate(zprime): 70 | for i,tx in enumerate(xprime): 71 | for ty in height_anchors: 72 | worldpoints.append([tx,-ty,tz]) # because opencv right handed coordinate system has x,z same as unity but y is downward 73 | bevpointsforworldpoints.append([x[i],z[j]]) 74 | 75 | return np.asarray(worldpoints),np.asarray(bevpointsforworldpoints) 76 | 77 | 78 | 79 | def getValidProjectPoints(imgpoints,validfisheyemask): 80 | validpointidxes = [] 81 | validimgpoints = [] 82 | inpfisheyeshape = validfisheyemask.shape 83 | imgpoints = imgpoints.astype(int) 84 | for i in range(len(imgpoints)): 85 | loc = imgpoints[i,:] 86 | if loc[0] > 0 and loc[0] < inpfisheyeshape[1]: 87 | if loc[1] > 0 and loc[1] < inpfisheyeshape[0]: 88 | if validfisheyemask[loc[1],loc[0]] == 255: 89 | validimgpoints.append(loc) 90 | validpointidxes.append(i) 91 | return np.asarray(validimgpoints).astype(int),validpointidxes 92 | 93 | 94 | 95 | 96 | unitypose = {} 97 | unitypose['front'] = [(np.pi/180) *np.asarray([26,0,0]),np.asarray([0,0.406,3.873])] ##xyz as per unity 98 | unitypose['left'] = [(np.pi/180) *np.asarray([0,-90,0]),np.asarray([-1.024,0.8,2.053])] 99 | unitypose['rear'] = [(np.pi/180) *np.asarray([3,180,0]),np.asarray([0.132,0.744,-1.001])] 100 | unitypose['right'] = [(np.pi/180) *np.asarray([0,90,0]),np.asarray([1.015,0.801,2.04])] 101 | 102 | opencvpose = convertPoseFromUnityToOpenCV(unitypose) 103 | extrinsics = convertOpenCVPoseToRvecTvec(opencvpose) 104 | 105 | for file in files[0:1]: 106 | 107 | 108 | unity_offset_for_orgin = 33 #pixels 56 is ffset for unity...unity origin at mid of the car rear wheel axis and on the ground 109 | #this 56 is considering 600*600 BEV. If I resize it this will change. If I crop it, it will not change 110 | 111 | batch_size = 1 112 | bev_h = 400 #height of the Unity generated bev i consider (if i crop then consider cropped size) 113 | bev_w = 400 #width of the Unity generated bev i consider (if i crop then consider cropped size) 114 | 115 | bh = 50 116 | bw = 50 117 | bev_scale = int(bev_h/bh) ##600/50 118 | bevformer_bev_size = (bh,bw) 119 | 120 | 121 | K,D,xi = load_yaml('./forward_looking_camera_model/flcw_unity.yml') 122 | #resolution = bev_scale*np.asarray([0.036,0.042]) 123 | resolution = bev_scale*np.asarray([2/48,2/48]) ## resolution: 48 pixels is 2m 124 | bev_mask = [] 125 | 126 | height_anchors = [0-0.377, 0.25 -0.377,1.8 - 0.377] #[0, 0.25,1.8] #[0, 1.5, 3, 4.5] # in meters 127 | 128 | bev_mask = [] 129 | reference_points_cam = [] 130 | for camtype in ['front','left','rear','right']: 131 | 132 | img = cv2.imread('./forward_looking_camera_model/data/'+camtype+'/'+file) 133 | imgrgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 134 | h,w,_ = img.shape 135 | validfisheyemask = np.load('./forward_looking_camera_model/masks/'+camtype+'.npy') 136 | 137 | if camtype == 'front' or camtype=='rear': 138 | validfisheyemask = np.flip(validfisheyemask) 139 | 140 | view_bev_mask = [] 141 | view_reference_points_cam = [] 142 | 143 | for height in height_anchors: 144 | 145 | worldpoints,bevptsforworldpoints = computeRealWorldLocationOfBEVPixels(bh,bw, resolution,[height],bev_scale,unity_offset_for_orgin) 146 | worldpoints = np.expand_dims(worldpoints, 0) 147 | 148 | imgpoints,_ = cv2.omnidir.projectPoints(worldpoints, extrinsics[camtype][0], extrinsics[camtype][1], K, xi[0,0], D) 149 | validimgpoints,valididxes = getValidProjectPoints(imgpoints[0,:,:], validfisheyemask) 150 | 151 | ## new addition 152 | filtered_valididxes = [] 153 | for idx in valididxes: 154 | 155 | bloc = bevptsforworldpoints[idx,:] 156 | #print(bloc) 157 | if (camtype == 'front' and bloc[1] < 25) or (camtype == 'left' and bloc[0] < 25) or (camtype == 'rear' and bloc[1] > 25) or (camtype == 'right' and bloc[0] > 25): 158 | filtered_valididxes.append(idx) 159 | 160 | 161 | curr_bev_mask = np.zeros((bh*bw,)) 162 | 163 | for fidx in filtered_valididxes: 164 | curr_bev_mask[fidx] = 1 165 | 166 | imgpoints[0,:,0] = imgpoints[0,:,0].astype(float)/w ##normalize 167 | imgpoints[0,:,1] = imgpoints[0,:,1].astype(float)/h ##normalize 168 | 169 | plt.figure() 170 | plt.imshow(np.reshape(curr_bev_mask,(50,50))) 171 | 172 | view_bev_mask.append(curr_bev_mask) 173 | view_reference_points_cam.append(imgpoints[0,:,:]) 174 | 175 | view_bev_mask = np.expand_dims(np.transpose(np.asarray(view_bev_mask)), axis = 0) 176 | view_reference_points_cam = np.transpose(np.asarray(view_reference_points_cam)) 177 | view_reference_points_cam = np.transpose(view_reference_points_cam,[1,2,0]) 178 | view_reference_points_cam = np.expand_dims(view_reference_points_cam, axis = 0) 179 | 180 | bev_mask.append(view_bev_mask) 181 | reference_points_cam.append(view_reference_points_cam) 182 | 183 | bev_mask = np.tile(np.asarray(bev_mask),(1,batch_size,1,1)) 184 | reference_points_cam = np.tile(np.asarray(reference_points_cam),(1,batch_size,1,1,1)) 185 | 186 | np.save('./unity_data/bev_mask.npy',np.array(bev_mask,dtype=bool)) 187 | np.save('./unity_data/reference_points_cam.npy',reference_points_cam) 188 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/train_f2bev_conv_st_height.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Dec 13 16:44:04 2022 5 | 6 | @author: Ekta 7 | """ 8 | 9 | 10 | import torch,ntpath,random 11 | from torch import nn, optim 12 | import numpy as np 13 | from loader_single_task import UnityImageDataset 14 | 15 | import os,fnmatch 16 | from torch.utils.data import DataLoader 17 | from model_f2bev_conv_st_height import FisheyeBEVFormer 18 | import torchvision.transforms as T 19 | from losses.focal import BinaryFocalLoss, CrossEntropyFocalLoss #FocalLoss 20 | 21 | def numpy_sigmoid(x): 22 | return 1/(1 + np.exp(-x)) 23 | 24 | 25 | 26 | def gamma_correction(image): 27 | gamma = random.choice([0.8,1.0,1.2,1.4]) 28 | return T.functional.adjust_gamma(image,gamma,gain = 1) 29 | 30 | def plt_pred_image(pred): 31 | p = pred.detach().cpu().numpy() 32 | #zero = numpy_sigmoid(p[0,:,:]) 33 | one = numpy_sigmoid(p[1,:,:]) 34 | two = numpy_sigmoid(p[2,:,:]) 35 | 36 | show = np.zeros((p.shape[1],p.shape[2])) 37 | show[np.where(one > 0.5)] = 1 38 | show[np.where(two > 0.5)] = 2 39 | 40 | return show 41 | 42 | 43 | num_data_sequences = 20 44 | 45 | 46 | bev_dirs = ['./data/images'+str(i)+'/train/depth' for i in range(num_data_sequences)] 47 | front_dirs = ['./data/images'+str(i)+'/train/rgb/front' for i in range(num_data_sequences)] 48 | left_dirs = ['./data/images'+str(i)+'/train/rgb/left' for i in range(num_data_sequences)] 49 | rear_dirs = ['./data/images'+str(i)+'/train/rgb/rear' for i in range(num_data_sequences)] 50 | right_dirs = ['./data/images'+str(i)+'/train/rgb/right' for i in range(num_data_sequences)] 51 | config_dirs = ['./data/images'+str(i)+'/train/cameraconfig' for i in range(num_data_sequences)] 52 | 53 | seq_len = 3 54 | 55 | image_lists = [] 56 | datalengths = [] 57 | 58 | for bev_dir in bev_dirs: 59 | names = fnmatch.filter(os.listdir(bev_dir), '*.png') 60 | 61 | files = [] 62 | for name in names: 63 | files.append(os.path.splitext(ntpath.basename(name))[0]) 64 | 65 | filelist = sorted(files,key=int) 66 | 67 | image_lists.append([f + '.png' for f in filelist]) 68 | datalengths.append(len(names)) 69 | 70 | vtransforms = T.Compose([T.Resize((540,640))]) #,GammaCorrectionTransform()) 71 | 72 | transforms = T.Compose([T.Resize((540,640)), T.ColorJitter(brightness=.3, hue=.3, contrast = 0.2, saturation = 0.3), T.Lambda(gamma_correction) ]) #,GammaCorrectionTransform()) 73 | 74 | target_transforms = T.Compose([T.Grayscale(num_output_channels=1)]) 75 | 76 | training_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, transform = transforms,target_transform=target_transforms) 77 | 78 | 79 | vbev_dirs = ['./data/images'+str(i)+'/val/depth' for i in range(num_data_sequences)] 80 | vfront_dirs = ['./data/images'+str(i)+'/val/rgb/front' for i in range(num_data_sequences)] 81 | vleft_dirs = ['./data/images'+str(i)+'/val/rgb/left' for i in range(num_data_sequences)] 82 | vrear_dirs = ['./data/images'+str(i)+'/val/rgb/rear' for i in range(num_data_sequences)] 83 | vright_dirs = ['./data/images'+str(i)+'/val/rgb/right' for i in range(num_data_sequences)] 84 | vconfig_dirs = ['./data/images'+str(i)+'/val/cameraconfig' for i in range(num_data_sequences)] 85 | 86 | vimage_lists = [] 87 | vdatalengths = [] 88 | 89 | for vbev_dir in vbev_dirs: 90 | vnames = fnmatch.filter(os.listdir(vbev_dir), '*.png') 91 | 92 | vfiles = [] 93 | for vname in vnames: 94 | vfiles.append(os.path.splitext(ntpath.basename(vname))[0]) 95 | 96 | vfilelist = sorted(vfiles,key=int) 97 | 98 | vimage_lists.append([f + '.png' for f in vfilelist]) 99 | vdatalengths.append(len(vnames)) 100 | 101 | val_data = UnityImageDataset(bev_dirs = vbev_dirs,front_dirs=vfront_dirs,left_dirs=vleft_dirs,rear_dirs=vrear_dirs,right_dirs=vright_dirs,image_lists=vimage_lists,config_dirs=vconfig_dirs,seq_len= seq_len,datalengths = vdatalengths, num_data_sequences = num_data_sequences, transform = vtransforms,target_transform=target_transforms) 102 | 103 | 104 | train_dataloader = DataLoader(training_data, batch_size = 1, shuffle=True) 105 | val_dataloader = DataLoader(val_data, batch_size=1, shuffle=False) 106 | 107 | 108 | 109 | random_seed = 1 # or any of your favorite number 110 | torch.manual_seed(random_seed) 111 | torch.cuda.manual_seed(random_seed) 112 | torch.backends.cudnn.deterministic = True 113 | torch.backends.cudnn.benchmark = False 114 | #model = FisheyeBEVFormer().to(device) 115 | 116 | 117 | 118 | 119 | def train(epoch,model,train_dataloader,seq_len, criterion,optimizer): 120 | 121 | model.train() 122 | 123 | train_loss = 0 124 | num_batches = len(train_dataloader) 125 | # with torch.autograd.detect_anomaly(): 126 | for batch_idx, (dataseq, targetseq) in enumerate(train_dataloader): 127 | 128 | inp_img_seq, can_buses_seq = dataseq 129 | #print(len(inp_img_seq)) 130 | bs = targetseq[0].shape[0] 131 | for ctr in range(seq_len): 132 | front = inp_img_seq[ctr][0] 133 | left = inp_img_seq[ctr][1] 134 | rear = inp_img_seq[ctr][2] 135 | right = inp_img_seq[ctr][3] 136 | 137 | target = targetseq[ctr] 138 | 139 | front = front.to(device) 140 | left = left.to(device) 141 | rear = rear.to(device) 142 | right = right.to(device) 143 | 144 | 145 | target = torch.squeeze(target,dim=1) 146 | idx2 = torch.where(target <= 0.35) 147 | idx0 = torch.where(target >= 0.69) 148 | target[target >= 0] = 1 149 | target[idx2] = 2 150 | target[idx0] = 0 151 | 152 | 153 | target = target.to(torch.int64).to(device) 154 | can_buses = can_buses_seq[ctr] 155 | 156 | if ctr == 0: 157 | prev_bev = None 158 | 159 | optimizer.zero_grad() 160 | output, prev_bev_org = model(front,left,rear,right, list(can_buses),prev_bev) 161 | 162 | prev_bev = prev_bev_org.detach() 163 | 164 | loss = criterion(output, target) 165 | 166 | loss.backward() 167 | optimizer.step() 168 | 169 | train_loss += loss.data 170 | print('Train Epoch: {} [{}/{} Srno: {} ({:.0f}%)]\tLoss: {:.6f}'.format( 171 | epoch, (batch_idx + 1) * 1, len(train_dataloader), ctr, 172 | 100. * (batch_idx + 1) / len(train_dataloader), loss.data)) 173 | 174 | train_loss/= num_batches*seq_len 175 | 176 | print(f"Train Error: Avg loss: {train_loss:>8f} \n") 177 | 178 | return train_loss 179 | 180 | 181 | def val(epoch,test_dataloader,seq_len,model,loss_fn): 182 | num_batches = len(test_dataloader) 183 | model.eval() 184 | test_loss = 0 185 | with torch.no_grad(): 186 | for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader): 187 | inp_img_seq, can_buses_seq = dataseq 188 | bs = targetseq[0].shape[0] 189 | for ctr in range(seq_len): 190 | front = inp_img_seq[ctr][0] 191 | left = inp_img_seq[ctr][1] 192 | rear = inp_img_seq[ctr][2] 193 | right = inp_img_seq[ctr][3] 194 | 195 | target = targetseq[ctr] 196 | 197 | front = front.to(device) 198 | left = left.to(device) 199 | rear = rear.to(device) 200 | right = right.to(device) 201 | 202 | target = torch.squeeze(target,dim=1) 203 | #print(torch.sort(target)) 204 | #target = (target > 0.5).float().to(device) 205 | idx2 = torch.where(target <= 0.35) 206 | idx0 = torch.where(target >= 0.69) 207 | target[target >= 0] = 1 208 | target[idx2] = 2 209 | target[idx0] = 0 210 | 211 | target = target.to(torch.int64).to(device) 212 | can_buses = can_buses_seq[ctr] 213 | 214 | if ctr == 0: 215 | prev_bev = None 216 | pred, prev_bev = model(front,left,rear,right, list(can_buses),prev_bev) 217 | test_loss += loss_fn(pred, target).item() 218 | 219 | test_loss/= num_batches*seq_len 220 | 221 | print(f"Test Error: Avg loss: {test_loss:>8f} \n") 222 | 223 | return test_loss 224 | 225 | 226 | device = "cuda" if torch.cuda.is_available() else "cpu" 227 | #device = "cpu" 228 | print(f"Using {device} device") 229 | 230 | model = FisheyeBEVFormer().to(device) 231 | 232 | optimizer = optim.AdamW(model.parameters(), lr=0.0002) 233 | criterionCE = nn.CrossEntropyLoss() 234 | criterionFocal = CrossEntropyFocalLoss() 235 | 236 | n_epochs = 5 237 | 238 | PATH = './f2bev_conv_st_height.pt' 239 | min_val_loss = np.inf 240 | 241 | for epoch in range(n_epochs): 242 | train_loss = train(epoch,model,train_dataloader,seq_len,criterionFocal,optimizer) 243 | val_loss = val(epoch,val_dataloader,seq_len,model,criterionFocal) 244 | 245 | 246 | if val_loss < min_val_loss: 247 | min_val_loss = val_loss 248 | torch.save({'epoch' : epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict' : optimizer.state_dict(), 'loss': val_loss}, PATH) 249 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/train_f2bev_attn_st_height.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Dec 13 16:44:04 2022 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import os 10 | os.environ["CUDA_VISIBLE_DEVICES"]="1" 11 | import torch,ntpath,random 12 | from torch import nn, optim 13 | import numpy as np 14 | from loader_single_task import UnityImageDataset 15 | 16 | import fnmatch 17 | from torch.utils.data import DataLoader 18 | from model_f2bev_attn_st_height import FisheyeBEVFormer 19 | import torchvision.transforms as T 20 | from losses.focal import BinaryFocalLoss, CrossEntropyFocalLoss #FocalLoss 21 | 22 | def numpy_sigmoid(x): 23 | return 1/(1 + np.exp(-x)) 24 | 25 | 26 | 27 | def gamma_correction(image): 28 | gamma = random.choice([0.8,1.0,1.2,1.4]) 29 | return T.functional.adjust_gamma(image,gamma,gain = 1) 30 | 31 | def plt_pred_image(pred): 32 | p = pred.detach().cpu().numpy() 33 | one = numpy_sigmoid(p[1,:,:]) 34 | two = numpy_sigmoid(p[2,:,:]) 35 | 36 | show = np.zeros((p.shape[1],p.shape[2])) 37 | show[np.where(one > 0.5)] = 1 38 | show[np.where(two > 0.5)] = 2 39 | 40 | return show 41 | 42 | 43 | num_data_sequences = 20 44 | 45 | 46 | 47 | bev_dirs = ['./data/images'+str(i)+'/train/depth' for i in range(num_data_sequences)] 48 | front_dirs = ['./data/images'+str(i)+'/train/rgb/front' for i in range(num_data_sequences)] 49 | left_dirs = ['./data/images'+str(i)+'/train/rgb/left' for i in range(num_data_sequences)] 50 | rear_dirs = ['./data/images'+str(i)+'/train/rgb/rear' for i in range(num_data_sequences)] 51 | right_dirs = ['./data/images'+str(i)+'/train/rgb/right' for i in range(num_data_sequences)] 52 | config_dirs = ['./data/images'+str(i)+'/train/cameraconfig' for i in range(num_data_sequences)] 53 | 54 | seq_len = 3 55 | 56 | image_lists = [] 57 | datalengths = [] 58 | 59 | for bev_dir in bev_dirs: 60 | names = fnmatch.filter(os.listdir(bev_dir), '*.png') 61 | 62 | files = [] 63 | for name in names: 64 | files.append(os.path.splitext(ntpath.basename(name))[0]) 65 | 66 | filelist = sorted(files,key=int) 67 | 68 | image_lists.append([f + '.png' for f in filelist]) 69 | datalengths.append(len(names)) 70 | 71 | vtransforms = T.Compose([T.Resize((540,640))]) #,GammaCorrectionTransform()) 72 | 73 | transforms = T.Compose([T.Resize((540,640)), T.ColorJitter(brightness=.3, hue=.3, contrast = 0.2, saturation = 0.3), T.Lambda(gamma_correction) ]) #,GammaCorrectionTransform()) 74 | target_transforms = T.Compose([T.Resize((50,50)),T.Grayscale(num_output_channels=1)]) 75 | 76 | 77 | training_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, transform = transforms,target_transform=target_transforms) 78 | 79 | 80 | 81 | vbev_dirs = ['./data/images'+str(i)+'/val/depth' for i in range(num_data_sequences)] 82 | vfront_dirs = ['./data/images'+str(i)+'/val/rgb/front' for i in range(num_data_sequences)] 83 | vleft_dirs = ['./data/images'+str(i)+'/val/rgb/left' for i in range(num_data_sequences)] 84 | vrear_dirs = ['./data/images'+str(i)+'/val/rgb/rear' for i in range(num_data_sequences)] 85 | vright_dirs = ['./data/images'+str(i)+'/val/rgb/right' for i in range(num_data_sequences)] 86 | vconfig_dirs = ['./data/images'+str(i)+'/val/cameraconfig' for i in range(num_data_sequences)] 87 | 88 | vimage_lists = [] 89 | vdatalengths = [] 90 | 91 | for vbev_dir in vbev_dirs: 92 | vnames = fnmatch.filter(os.listdir(vbev_dir), '*.png') 93 | 94 | vfiles = [] 95 | for vname in vnames: 96 | vfiles.append(os.path.splitext(ntpath.basename(vname))[0]) 97 | 98 | vfilelist = sorted(vfiles,key=int) 99 | 100 | vimage_lists.append([f + '.png' for f in vfilelist]) 101 | vdatalengths.append(len(vnames)) 102 | 103 | val_data = UnityImageDataset(bev_dirs = vbev_dirs,front_dirs=vfront_dirs,left_dirs=vleft_dirs,rear_dirs=vrear_dirs,right_dirs=vright_dirs,image_lists=vimage_lists,config_dirs=vconfig_dirs,seq_len= seq_len,datalengths = vdatalengths, num_data_sequences = num_data_sequences, transform = vtransforms,target_transform=target_transforms) 104 | 105 | 106 | train_dataloader = DataLoader(training_data, batch_size = 2, shuffle=True) 107 | val_dataloader = DataLoader(val_data, batch_size=2, shuffle=False) 108 | 109 | 110 | 111 | random_seed = 1 # or any of your favorite number 112 | torch.manual_seed(random_seed) 113 | torch.cuda.manual_seed(random_seed) 114 | torch.backends.cudnn.deterministic = True 115 | torch.backends.cudnn.benchmark = False 116 | 117 | 118 | 119 | 120 | def train(epoch,model,train_dataloader,seq_len, criterion,optimizer): 121 | 122 | model.train() 123 | 124 | train_loss = 0 125 | num_batches = len(train_dataloader) 126 | # with torch.autograd.detect_anomaly(): 127 | for batch_idx, (dataseq, targetseq) in enumerate(train_dataloader): 128 | 129 | inp_img_seq, can_buses_seq = dataseq 130 | bs = targetseq[0].shape[0] 131 | for ctr in range(seq_len): 132 | front = inp_img_seq[ctr][0] 133 | left = inp_img_seq[ctr][1] 134 | rear = inp_img_seq[ctr][2] 135 | right = inp_img_seq[ctr][3] 136 | 137 | target = targetseq[ctr] 138 | 139 | front = front.to(device) 140 | left = left.to(device) 141 | rear = rear.to(device) 142 | right = right.to(device) 143 | 144 | 145 | 146 | target = torch.squeeze(target,dim=1) 147 | idx2 = torch.where(target <= 0.35) 148 | idx0 = torch.where(target >= 0.69) 149 | target[target >= 0] = 1 150 | target[idx2] = 2 151 | target[idx0] = 0 152 | 153 | target = target.to(torch.int64).to(device) 154 | can_buses = can_buses_seq[ctr] 155 | 156 | if ctr == 0: 157 | prev_bev = None 158 | 159 | optimizer.zero_grad() 160 | output, inter_outputs, prev_bev_org = model(front,left,rear,right, list(can_buses),prev_bev) 161 | 162 | prev_bev = prev_bev_org.detach() 163 | 164 | loss = criterion(output, target) 165 | 166 | for inter_stage in inter_outputs: 167 | loss += criterion(inter_stage,target) 168 | 169 | loss.backward() 170 | optimizer.step() 171 | 172 | train_loss += loss.data 173 | print('Train Epoch: {} [{}/{} Srno: {} ({:.0f}%)]\tLoss: {:.6f}'.format( 174 | epoch, (batch_idx + 1) * 1, len(train_dataloader), ctr, 175 | 100. * (batch_idx + 1) / len(train_dataloader), loss.data)) 176 | 177 | train_loss/= num_batches*seq_len 178 | 179 | print(f"Train Error: Avg loss: {train_loss:>8f} \n") 180 | 181 | return train_loss 182 | 183 | 184 | def val(epoch,test_dataloader,seq_len,model,loss_fn): 185 | num_batches = len(test_dataloader) 186 | model.eval() 187 | test_loss = 0 188 | with torch.no_grad(): 189 | for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader): 190 | inp_img_seq, can_buses_seq = dataseq 191 | bs = targetseq[0].shape[0] 192 | for ctr in range(seq_len): 193 | front = inp_img_seq[ctr][0] 194 | left = inp_img_seq[ctr][1] 195 | rear = inp_img_seq[ctr][2] 196 | right = inp_img_seq[ctr][3] 197 | 198 | target = targetseq[ctr] 199 | 200 | front = front.to(device) 201 | left = left.to(device) 202 | rear = rear.to(device) 203 | right = right.to(device) 204 | 205 | target = torch.squeeze(target,dim=1) 206 | idx2 = torch.where(target <= 0.35) 207 | idx0 = torch.where(target >= 0.69) 208 | target[target >= 0] = 1 209 | target[idx2] = 2 210 | target[idx0] = 0 211 | 212 | target = target.to(torch.int64).to(device) 213 | can_buses = can_buses_seq[ctr] 214 | 215 | if ctr == 0: 216 | prev_bev = None 217 | pred, inter_pred, prev_bev = model(front,left,rear,right, list(can_buses),prev_bev) 218 | 219 | test_loss += loss_fn(pred, target).item() 220 | 221 | test_loss/= num_batches*seq_len 222 | 223 | print(f"Test Error: Avg loss: {test_loss:>8f} \n") 224 | 225 | return test_loss 226 | 227 | 228 | device = "cuda" if torch.cuda.is_available() else "cpu" 229 | print(f"Using {device} device") 230 | 231 | model = FisheyeBEVFormer().to(device) 232 | 233 | optimizer = optim.AdamW(model.parameters(), lr=0.0002) 234 | criterionCE = nn.CrossEntropyLoss() 235 | criterionFocal = CrossEntropyFocalLoss() 236 | 237 | n_epochs = 1 238 | 239 | PATH = './f2bev_attn_st_height.pt' 240 | min_val_loss = np.inf 241 | min_epoch = n_epochs 242 | for epoch in range(n_epochs): 243 | train_loss = train(epoch,model,train_dataloader,seq_len,criterionCE,optimizer) 244 | val_loss = val(epoch,val_dataloader,seq_len,model,criterionCE) 245 | 246 | if val_loss < min_val_loss: 247 | min_epoch = epoch 248 | min_val_loss = val_loss 249 | torch.save({'epoch' : epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict' : optimizer.state_dict(), 'loss': val_loss}, PATH) 250 | else: 251 | if epoch > min_epoch: 252 | break 253 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/train_f2bev_conv_st_seg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Dec 13 16:44:04 2022 5 | 6 | @author: Ekta 7 | """ 8 | 9 | 10 | import torch,ntpath,random 11 | from torch import nn, optim 12 | import numpy as np 13 | from loader_single_task import UnityImageDataset 14 | 15 | import os,fnmatch 16 | from torch.utils.data import DataLoader 17 | from model_f2bev_conv_st_seg import FisheyeBEVFormer 18 | import torchvision.transforms as T 19 | from losses.focal import BinaryFocalLoss, CrossEntropyFocalLoss #FocalLoss 20 | 21 | def numpy_sigmoid(x): 22 | return 1/(1 + np.exp(-x)) 23 | 24 | 25 | 26 | def gamma_correction(image): 27 | gamma = random.choice([0.8,1.0,1.2,1.4]) 28 | return T.functional.adjust_gamma(image,gamma,gain = 1) 29 | 30 | def plt_pred_image(pred): 31 | p = pred.detach().cpu().numpy() 32 | #zero = numpy_sigmoid(p[0,:,:]) 33 | one = numpy_sigmoid(p[1,:,:]) 34 | two = numpy_sigmoid(p[2,:,:]) 35 | 36 | show = np.zeros((p.shape[1],p.shape[2])) 37 | show[np.where(one > 0.5)] = 1 38 | show[np.where(two > 0.5)] = 2 39 | 40 | return show 41 | 42 | 43 | num_data_sequences = 20 44 | 45 | 46 | 47 | bev_dirs = ['./data/images'+str(i)+'/train/seg/bev' for i in range(num_data_sequences)] 48 | front_dirs = ['./data/images'+str(i)+'/train/rgb/front' for i in range(num_data_sequences)] 49 | left_dirs = ['./data/images'+str(i)+'/train/rgb/left' for i in range(num_data_sequences)] 50 | rear_dirs = ['./data/images'+str(i)+'/train/rgb/rear' for i in range(num_data_sequences)] 51 | right_dirs = ['./data/images'+str(i)+'/train/rgb/right' for i in range(num_data_sequences)] 52 | config_dirs = ['./data/images'+str(i)+'/train/cameraconfig' for i in range(num_data_sequences)] 53 | 54 | seq_len = 3 55 | 56 | image_lists = [] 57 | datalengths = [] 58 | 59 | for bev_dir in bev_dirs: 60 | names = fnmatch.filter(os.listdir(bev_dir), '*.png') 61 | 62 | files = [] 63 | for name in names: 64 | files.append(os.path.splitext(ntpath.basename(name))[0]) 65 | 66 | filelist = sorted(files,key=int) 67 | 68 | image_lists.append([f + '.png' for f in filelist]) 69 | datalengths.append(len(names)) 70 | 71 | vtransforms = T.Compose([T.Resize((540,640))]) #,GammaCorrectionTransform()) 72 | 73 | transforms = T.Compose([T.Resize((540,640)), T.ColorJitter(brightness=.3, hue=.3, contrast = 0.2, saturation = 0.3), T.Lambda(gamma_correction) ]) #,GammaCorrectionTransform()) 74 | 75 | target_transforms = T.Compose([T.Grayscale(num_output_channels=1)]) 76 | 77 | training_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, transform = transforms,target_transform=target_transforms) 78 | 79 | 80 | 81 | 82 | 83 | 84 | vbev_dirs = ['./data/images'+str(i)+'/val/seg/bev' for i in range(num_data_sequences)] 85 | vfront_dirs = ['./data/images'+str(i)+'/val/rgb/front' for i in range(num_data_sequences)] 86 | vleft_dirs = ['./data/images'+str(i)+'/val/rgb/left' for i in range(num_data_sequences)] 87 | vrear_dirs = ['./data/images'+str(i)+'/val/rgb/rear' for i in range(num_data_sequences)] 88 | vright_dirs = ['./data/images'+str(i)+'/val/rgb/right' for i in range(num_data_sequences)] 89 | vconfig_dirs = ['./data/images'+str(i)+'/val/cameraconfig' for i in range(num_data_sequences)] 90 | vimage_lists = [] 91 | vdatalengths = [] 92 | 93 | for vbev_dir in vbev_dirs: 94 | vnames = fnmatch.filter(os.listdir(vbev_dir), '*.png') 95 | 96 | vfiles = [] 97 | for vname in vnames: 98 | vfiles.append(os.path.splitext(ntpath.basename(vname))[0]) 99 | 100 | vfilelist = sorted(vfiles,key=int) 101 | 102 | vimage_lists.append([f + '.png' for f in vfilelist]) 103 | vdatalengths.append(len(vnames)) 104 | 105 | val_data = UnityImageDataset(bev_dirs = vbev_dirs,front_dirs=vfront_dirs,left_dirs=vleft_dirs,rear_dirs=vrear_dirs,right_dirs=vright_dirs,image_lists=vimage_lists,config_dirs=vconfig_dirs,seq_len= seq_len,datalengths = vdatalengths, num_data_sequences = num_data_sequences, transform = vtransforms,target_transform=target_transforms) 106 | 107 | 108 | train_dataloader = DataLoader(training_data, batch_size = 1, shuffle=True) 109 | val_dataloader = DataLoader(val_data, batch_size=1, shuffle=False) 110 | 111 | 112 | 113 | random_seed = 1 # or any of your favorite number 114 | torch.manual_seed(random_seed) 115 | torch.cuda.manual_seed(random_seed) 116 | torch.backends.cudnn.deterministic = True 117 | torch.backends.cudnn.benchmark = False 118 | 119 | 120 | 121 | 122 | def train(epoch,model,train_dataloader,seq_len, criterion,optimizer): 123 | 124 | model.train() 125 | 126 | train_loss = 0 127 | num_batches = len(train_dataloader) 128 | # with torch.autograd.detect_anomaly(): 129 | for batch_idx, (dataseq, targetseq) in enumerate(train_dataloader): 130 | 131 | inp_img_seq, can_buses_seq = dataseq 132 | #print(len(inp_img_seq)) 133 | bs = targetseq[0].shape[0] 134 | for ctr in range(seq_len): 135 | front = inp_img_seq[ctr][0] 136 | left = inp_img_seq[ctr][1] 137 | rear = inp_img_seq[ctr][2] 138 | right = inp_img_seq[ctr][3] 139 | 140 | target = targetseq[ctr] 141 | 142 | front = front.to(device) 143 | left = left.to(device) 144 | rear = rear.to(device) 145 | right = right.to(device) 146 | 147 | 148 | target = torch.squeeze(target,dim=1) 149 | idx0 = torch.where(target <= 0.02) 150 | target[idx0] = 10 151 | idx1 = torch.where(target <= 0.07) 152 | target[idx1] = 11 153 | idx2 = torch.where(target <= 0.22) 154 | target[idx2] = 12 155 | idx3 = torch.where(target <= 0.60) 156 | target[idx3] = 13 157 | idx4 = torch.where(target <= 1) 158 | target[idx4] = 14 159 | target = target - 10 160 | target = target.to(torch.int64).to(device) 161 | 162 | can_buses = can_buses_seq[ctr] 163 | 164 | if ctr == 0: 165 | prev_bev = None 166 | 167 | optimizer.zero_grad() 168 | output, prev_bev_org = model(front,left,rear,right, list(can_buses),prev_bev) 169 | 170 | prev_bev = prev_bev_org.detach() 171 | 172 | loss = criterion(output, target) 173 | 174 | loss.backward() 175 | optimizer.step() 176 | 177 | train_loss += loss.data 178 | print('Train Epoch: {} [{}/{} Srno: {} ({:.0f}%)]\tLoss: {:.6f}'.format( 179 | epoch, (batch_idx + 1) * 1, len(train_dataloader), ctr, 180 | 100. * (batch_idx + 1) / len(train_dataloader), loss.data)) 181 | 182 | train_loss/= num_batches*seq_len 183 | 184 | print(f"Train Error: Avg loss: {train_loss:>8f} \n") 185 | 186 | return train_loss 187 | 188 | 189 | def val(epoch,test_dataloader,seq_len,model,loss_fn): 190 | num_batches = len(test_dataloader) 191 | model.eval() 192 | test_loss = 0 193 | with torch.no_grad(): 194 | for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader): 195 | inp_img_seq, can_buses_seq = dataseq 196 | bs = targetseq[0].shape[0] 197 | for ctr in range(seq_len): 198 | front = inp_img_seq[ctr][0] 199 | left = inp_img_seq[ctr][1] 200 | rear = inp_img_seq[ctr][2] 201 | right = inp_img_seq[ctr][3] 202 | 203 | target = targetseq[ctr] 204 | 205 | front = front.to(device) 206 | left = left.to(device) 207 | rear = rear.to(device) 208 | right = right.to(device) 209 | 210 | target = torch.squeeze(target,dim=1) 211 | idx0 = torch.where(target <= 0.02) 212 | target[idx0] = 10 213 | idx1 = torch.where(target <= 0.07) 214 | target[idx1] = 11 215 | idx2 = torch.where(target <= 0.22) 216 | target[idx2] = 12 217 | idx3 = torch.where(target <= 0.60) 218 | target[idx3] = 13 219 | idx4 = torch.where(target <= 1) 220 | target[idx4] = 14 221 | target = target - 10 222 | target = target.to(torch.int64).to(device) 223 | 224 | 225 | 226 | can_buses = can_buses_seq[ctr] 227 | 228 | if ctr == 0: 229 | prev_bev = None 230 | pred, prev_bev = model(front,left,rear,right, list(can_buses),prev_bev) 231 | 232 | 233 | test_loss += loss_fn(pred, target).item() 234 | 235 | test_loss/= num_batches*seq_len 236 | 237 | print(f"Test Error: Avg loss: {test_loss:>8f} \n") 238 | 239 | return test_loss 240 | 241 | 242 | device = "cuda" if torch.cuda.is_available() else "cpu" 243 | #device = "cpu" 244 | print(f"Using {device} device") 245 | 246 | model = FisheyeBEVFormer().to(device) 247 | 248 | optimizer = optim.AdamW(model.parameters(), lr=0.0002) 249 | criterionCE = nn.CrossEntropyLoss() 250 | criterionFocal = CrossEntropyFocalLoss() 251 | 252 | n_epochs = 5 253 | 254 | PATH = './f2bev_conv_st_seg.pt' 255 | min_val_loss = np.inf 256 | 257 | for epoch in range(n_epochs): 258 | train_loss = train(epoch,model,train_dataloader,seq_len,criterionFocal,optimizer) 259 | val_loss = val(epoch,val_dataloader,seq_len,model,criterionFocal) 260 | 261 | 262 | if val_loss < min_val_loss: 263 | min_val_loss = val_loss 264 | torch.save({'epoch' : epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict' : optimizer.state_dict(), 'loss': val_loss}, PATH) 265 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/bblocks/deformable_attention.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Aug 25 11:09:49 2022 5 | 6 | @author: Ekta 7 | """ 8 | 9 | # ------------------------------------------------------------------------------------------------# ------------------------------------------------------------------------------------------------ 10 | # Modified from https://raw.githubusercontent.com/fundamentalvision/Deformable-DETR/ 11 | # ------------------------------------------------------------------------------------------------ 12 | 13 | from __future__ import absolute_import 14 | from __future__ import print_function 15 | from __future__ import division 16 | 17 | import warnings 18 | import math 19 | 20 | import torch 21 | from torch import nn 22 | import torch.nn.functional as F 23 | from torch.nn.init import xavier_uniform_, constant_ 24 | from bblocks.deformable_attention_function import ms_deform_attn_core_pytorch #,MSDeformAttnFunction 25 | 26 | #from bblocks.multi_scale_deformable_attn_function import MultiScaleDeformableAttnFunction_fp32 27 | 28 | 29 | def _is_power_of_2(n): 30 | if (not isinstance(n, int)) or (n < 0): 31 | raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n))) 32 | return (n & (n-1) == 0) and n != 0 #checked and same 33 | 34 | 35 | class MSDeformAttn3D(nn.Module): 36 | def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=6, batch_first= True): 37 | """ 38 | Multi-Scale Deformable Attention Module 39 | :param d_model hidden dimension 40 | :param n_levels number of feature levels 41 | :param n_heads number of attention heads 42 | :param n_points number of sampling points per attention head per feature level 43 | """ 44 | super().__init__() 45 | if d_model % n_heads != 0: 46 | raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads)) 47 | _d_per_head = d_model // n_heads 48 | # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation 49 | if not _is_power_of_2(_d_per_head): 50 | warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 " 51 | "which is more efficient in our CUDA implementation.") 52 | 53 | self.im2col_step = 64 54 | 55 | self.d_model = d_model #bevformer calls this embed_dims 56 | self.n_levels = n_levels 57 | self.n_heads = n_heads 58 | self.n_points = n_points 59 | 60 | self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points * 2) 61 | self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points) 62 | self.value_proj = nn.Linear(d_model, d_model) 63 | #self.output_proj = nn.Linear(d_model, d_model) ##TODO: this is new in my implementation 64 | self.batch_first = True 65 | self._reset_parameters() 66 | 67 | def _reset_parameters(self): 68 | constant_(self.sampling_offsets.weight.data, 0.) 69 | thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads) 70 | grid_init = torch.stack([thetas.cos(), thetas.sin()], -1) 71 | grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2).repeat(1, self.n_levels, self.n_points, 1) 72 | for i in range(self.n_points): 73 | grid_init[:, :, i, :] *= i + 1 74 | with torch.no_grad(): 75 | self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1)) 76 | constant_(self.attention_weights.weight.data, 0.) 77 | constant_(self.attention_weights.bias.data, 0.) 78 | xavier_uniform_(self.value_proj.weight.data) 79 | constant_(self.value_proj.bias.data, 0.) 80 | #xavier_uniform_(self.output_proj.weight.data) 81 | #constant_(self.output_proj.bias.data, 0.) ##these are all mostly doing the same thing; calculated guess 82 | 83 | 84 | def forward(self, query, query_pos, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None): 85 | """ 86 | :param query (N, Length_{query}, C) 87 | :param reference_points (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area 88 | or (N, Length_{query}, n_levels, 4), add additional (w, h) to form reference boxes 89 | :param input_flatten (N, \sum_{l=0}^{L-1} H_l \cdot W_l, C) 90 | :param input_spatial_shapes (n_levels, 2), [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})] 91 | :param input_level_start_index (n_levels, ), [0, H_0*W_0, H_0*W_0+H_1*W_1, H_0*W_0+H_1*W_1+H_2*W_2, ..., H_0*W_0+H_1*W_1+...+H_{L-1}*W_{L-1}] 92 | :param input_padding_mask (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements 93 | 94 | :return output (N, Length_{query}, C) 95 | """ 96 | 97 | #######bev former has this 98 | 99 | if input_flatten is None: 100 | input_flatten = query 101 | # if query_pos is not None: 102 | # query = query + query_pos ##I think BEVformer had this but its an error 103 | 104 | 105 | if not self.batch_first: 106 | # change to (bs, num_query ,embed_dims) 107 | query = query.permute(1, 0, 2) 108 | input_flatten = input_flatten.permute(1, 0, 2) 109 | 110 | ################## 111 | N, Len_q, _ = query.shape ##capital N is batchsize. i.e. bs in BEVformer 112 | N, Len_in, _ = input_flatten.shape 113 | assert (input_spatial_shapes[:, 0] * input_spatial_shapes[:, 1]).sum() == Len_in 114 | 115 | value = self.value_proj(input_flatten) 116 | if input_padding_mask is not None: ##TODO: Figure out the deal with masks 117 | value = value.masked_fill(input_padding_mask[..., None], float(0)) 118 | value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads) 119 | sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points, 2) 120 | attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points) 121 | attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points) 122 | 123 | #print(attention_weights.shape) 124 | 125 | if reference_points.shape[-1] == 2: 126 | 127 | """ 128 | For each BEV query, it owns `num_Z_anchors` in 3D space that having different heights. 129 | After proejcting, each BEV query has `num_Z_anchors` reference points in each 2D image. 130 | For each referent point, we sample `num_points` sampling points. 131 | For `num_Z_anchors` reference points, it has overall `num_points * num_Z_anchors` sampling points. 132 | """ 133 | 134 | offset_normalizer = torch.stack([input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1) 135 | #print(offset_normalizer) 136 | 137 | ##added by me : this is where sampling points are obtained in a manner that works with SCA 138 | N, Len_q, num_Z_anchors,xy = reference_points.shape 139 | #print(num_Z_anchors) 140 | reference_points = reference_points[:, :, None, None, None, :, :] 141 | #print(reference_points.shape) 142 | #print(sampling_offsets.shape) 143 | sampling_offsets = sampling_offsets / \ 144 | offset_normalizer[None, None, None, :, None, :] 145 | #print(sampling_offsets.shape) 146 | N, Len_q, n_heads,n_levels,num_all_points,xy = sampling_offsets.shape 147 | #print(num_all_points) 148 | #print(num_Z_anchors) 149 | sampling_offsets = sampling_offsets.view( 150 | N, Len_q, n_heads, n_levels, num_all_points // num_Z_anchors, num_Z_anchors, xy) 151 | 152 | 153 | sampling_locations = reference_points + sampling_offsets 154 | N, Len_q,n_heads,n_levels, n_points,num_Z_anchors,xy = sampling_locations.shape 155 | 156 | 157 | assert num_all_points == n_points*num_Z_anchors 158 | 159 | sampling_locations = sampling_locations.view(N,Len_q,n_heads,n_levels,num_all_points,xy) 160 | 161 | ## commented by me: this is original Deformable attention 162 | # sampling_locations = reference_points[:, :, None, :, None, :] \ 163 | # + sampling_offsets / offset_normalizer[None, None, None, :, None, :] 164 | elif reference_points.shape[-1] == 4: 165 | ## commented by me: this is original Deformable attention 166 | # sampling_locations = reference_points[:, :, None, :, None, :2] \ 167 | # + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5 168 | assert False 169 | else: 170 | raise ValueError( 171 | 'Last dim of reference_points must be 2 or 4, but get {} instead.'.format(reference_points.shape[-1])) 172 | #print(value.dtype, input_spatial_shapes.type,input_level_start_index.shape,sampling_locations.type,attention_weights.type,type(self.im2col_step)) 173 | # output = MultiScaleDeformableAttnFunction_fp32.apply( 174 | # value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights, self.im2col_step) 175 | 176 | output = ms_deform_attn_core_pytorch(value,input_spatial_shapes,sampling_locations,attention_weights) 177 | 178 | if not self.batch_first: 179 | output = output.permute(1, 0, 2) 180 | #print(output.shape) 181 | #output = self.output_proj(output) ##TODO: BEVFormer does not have this 182 | return output 183 | -------------------------------------------------------------------------------- /F2BEV_code/F2BEV/train_f2bev_attn_st_seg.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Tue Dec 13 16:44:04 2022 5 | 6 | @author: Ekta 7 | """ 8 | 9 | import os 10 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 11 | import torch,ntpath,random 12 | from torch import nn, optim 13 | import numpy as np 14 | from loader_single_task import UnityImageDataset 15 | 16 | 17 | import fnmatch 18 | from torch.utils.data import DataLoader 19 | from model_f2bev_attn_st_seg import FisheyeBEVFormer 20 | import torchvision.transforms as T 21 | from losses.focal import BinaryFocalLoss, CrossEntropyFocalLoss #FocalLoss 22 | 23 | def numpy_sigmoid(x): 24 | return 1/(1 + np.exp(-x)) 25 | 26 | 27 | 28 | def gamma_correction(image): 29 | gamma = random.choice([0.8,1.0,1.2,1.4]) 30 | return T.functional.adjust_gamma(image,gamma,gain = 1) 31 | 32 | def plt_pred_image(pred): 33 | p = pred.detach().cpu().numpy() 34 | #zero = numpy_sigmoid(p[0,:,:]) 35 | one = numpy_sigmoid(p[1,:,:]) 36 | two = numpy_sigmoid(p[2,:,:]) 37 | 38 | show = np.zeros((p.shape[1],p.shape[2])) 39 | show[np.where(one > 0.5)] = 1 40 | show[np.where(two > 0.5)] = 2 41 | 42 | return show 43 | 44 | 45 | num_data_sequences = 20 46 | 47 | 48 | bev_dirs = ['./data/images'+str(i)+'/train/seg/bev' for i in range(num_data_sequences)] 49 | front_dirs = ['./data/images'+str(i)+'/train/rgb/front' for i in range(num_data_sequences)] 50 | left_dirs = ['./data/images'+str(i)+'/train/rgb/left' for i in range(num_data_sequences)] 51 | rear_dirs = ['./data/images'+str(i)+'/train/rgb/rear' for i in range(num_data_sequences)] 52 | right_dirs = ['./data/images'+str(i)+'/train/rgb/right' for i in range(num_data_sequences)] 53 | config_dirs = ['./data/images'+str(i)+'/train/cameraconfig' for i in range(num_data_sequences)] 54 | 55 | seq_len = 3 56 | 57 | image_lists = [] 58 | datalengths = [] 59 | 60 | for bev_dir in bev_dirs: 61 | names = fnmatch.filter(os.listdir(bev_dir), '*.png') 62 | 63 | files = [] 64 | for name in names: 65 | files.append(os.path.splitext(ntpath.basename(name))[0]) 66 | 67 | filelist = sorted(files,key=int) 68 | 69 | image_lists.append([f + '.png' for f in filelist]) 70 | datalengths.append(len(names)) 71 | 72 | 73 | vtransforms = T.Compose([T.Resize((540,640))]) #,GammaCorrectionTransform()) 74 | 75 | transforms = T.Compose([T.Resize((540,640)), T.ColorJitter(brightness=.3, hue=.3, contrast = 0.2, saturation = 0.3), T.Lambda(gamma_correction) ]) #,GammaCorrectionTransform()) 76 | target_transforms = T.Compose([T.Resize((50,50)),T.Grayscale(num_output_channels=1)]) 77 | 78 | 79 | training_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, transform = transforms,target_transform=target_transforms) 80 | 81 | 82 | vbev_dirs = ['./data/images'+str(i)+'/val/seg/bev' for i in range(num_data_sequences)] 83 | vfront_dirs = ['./data/images'+str(i)+'/val/rgb/front' for i in range(num_data_sequences)] 84 | vleft_dirs = ['./data/images'+str(i)+'/val/rgb/left' for i in range(num_data_sequences)] 85 | vrear_dirs = ['./data/images'+str(i)+'/val/rgb/rear' for i in range(num_data_sequences)] 86 | vright_dirs = ['./data/images'+str(i)+'/val/rgb/right' for i in range(num_data_sequences)] 87 | vconfig_dirs = ['./data/images'+str(i)+'/val/cameraconfig' for i in range(num_data_sequences)] 88 | 89 | vimage_lists = [] 90 | vdatalengths = [] 91 | 92 | for vbev_dir in vbev_dirs: 93 | vnames = fnmatch.filter(os.listdir(vbev_dir), '*.png') 94 | 95 | vfiles = [] 96 | for vname in vnames: 97 | vfiles.append(os.path.splitext(ntpath.basename(vname))[0]) 98 | 99 | vfilelist = sorted(vfiles,key=int) 100 | 101 | vimage_lists.append([f + '.png' for f in vfilelist]) 102 | vdatalengths.append(len(vnames)) 103 | 104 | val_data = UnityImageDataset(bev_dirs = vbev_dirs,front_dirs=vfront_dirs,left_dirs=vleft_dirs,rear_dirs=vrear_dirs,right_dirs=vright_dirs,image_lists=vimage_lists,config_dirs=vconfig_dirs,seq_len= seq_len,datalengths = vdatalengths, num_data_sequences = num_data_sequences, transform = vtransforms,target_transform=target_transforms) 105 | 106 | 107 | train_dataloader = DataLoader(training_data, batch_size = 2, shuffle=True) 108 | val_dataloader = DataLoader(val_data, batch_size=2, shuffle=False) 109 | 110 | 111 | 112 | random_seed = 1 # or any of your favorite number 113 | torch.manual_seed(random_seed) 114 | torch.cuda.manual_seed(random_seed) 115 | torch.backends.cudnn.deterministic = True 116 | torch.backends.cudnn.benchmark = False 117 | 118 | 119 | 120 | 121 | def train(epoch,model,train_dataloader,seq_len, criterion,optimizer): 122 | 123 | model.train() 124 | 125 | train_loss = 0 126 | num_batches = len(train_dataloader) 127 | # with torch.autograd.detect_anomaly(): 128 | for batch_idx, (dataseq, targetseq) in enumerate(train_dataloader): 129 | 130 | inp_img_seq, can_buses_seq = dataseq 131 | #print(len(inp_img_seq)) 132 | bs = targetseq[0].shape[0] 133 | for ctr in range(seq_len): 134 | front = inp_img_seq[ctr][0] 135 | left = inp_img_seq[ctr][1] 136 | rear = inp_img_seq[ctr][2] 137 | right = inp_img_seq[ctr][3] 138 | 139 | target = targetseq[ctr] 140 | 141 | front = front.to(device) 142 | left = left.to(device) 143 | rear = rear.to(device) 144 | right = right.to(device) 145 | 146 | 147 | 148 | target = torch.squeeze(target,dim=1) 149 | idx0 = torch.where(target <= 0.02) 150 | target[idx0] = 10 151 | idx1 = torch.where(target <= 0.07) 152 | target[idx1] = 11 153 | idx2 = torch.where(target <= 0.22) 154 | target[idx2] = 12 155 | idx3 = torch.where(target <= 0.60) 156 | target[idx3] = 13 157 | idx4 = torch.where(target <= 1) 158 | target[idx4] = 14 159 | target = target - 10 160 | target = target.to(torch.int64).to(device) 161 | 162 | can_buses = can_buses_seq[ctr] 163 | 164 | if ctr == 0: 165 | prev_bev = None 166 | 167 | optimizer.zero_grad() 168 | output, inter_outputs, prev_bev_org = model(front,left,rear,right, list(can_buses),prev_bev) 169 | 170 | prev_bev = prev_bev_org.detach() 171 | 172 | loss = criterion(output, target) 173 | 174 | for inter_stage in inter_outputs: 175 | loss += criterion(inter_stage,target) 176 | 177 | loss.backward() 178 | optimizer.step() 179 | 180 | train_loss += loss.data 181 | print('Train Epoch: {} [{}/{} Srno: {} ({:.0f}%)]\tLoss: {:.6f}'.format( 182 | epoch, (batch_idx + 1) * 1, len(train_dataloader), ctr, 183 | 100. * (batch_idx + 1) / len(train_dataloader), loss.data)) 184 | 185 | train_loss/= num_batches*seq_len 186 | 187 | print(f"Train Error: Avg loss: {train_loss:>8f} \n") 188 | 189 | return train_loss 190 | 191 | 192 | def val(epoch,test_dataloader,seq_len,model,loss_fn): 193 | num_batches = len(test_dataloader) 194 | model.eval() 195 | test_loss = 0 196 | with torch.no_grad(): 197 | for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader): 198 | inp_img_seq, can_buses_seq = dataseq 199 | bs = targetseq[0].shape[0] 200 | for ctr in range(seq_len): 201 | front = inp_img_seq[ctr][0] 202 | left = inp_img_seq[ctr][1] 203 | rear = inp_img_seq[ctr][2] 204 | right = inp_img_seq[ctr][3] 205 | 206 | target = targetseq[ctr] 207 | 208 | front = front.to(device) 209 | left = left.to(device) 210 | rear = rear.to(device) 211 | right = right.to(device) 212 | 213 | target = torch.squeeze(target,dim=1) 214 | idx0 = torch.where(target <= 0.02) 215 | target[idx0] = 10 216 | idx1 = torch.where(target <= 0.07) 217 | target[idx1] = 11 218 | idx2 = torch.where(target <= 0.22) 219 | target[idx2] = 12 220 | idx3 = torch.where(target <= 0.60) 221 | target[idx3] = 13 222 | idx4 = torch.where(target <= 1) 223 | target[idx4] = 14 224 | target = target - 10 225 | target = target.to(torch.int64).to(device) 226 | 227 | can_buses = can_buses_seq[ctr] 228 | if ctr == 0: 229 | prev_bev = None 230 | pred, inter_pred, prev_bev = model(front,left,rear,right, list(can_buses),prev_bev) 231 | 232 | test_loss += loss_fn(pred, target).item() 233 | 234 | test_loss/= num_batches*seq_len 235 | 236 | print(f"Test Error: Avg loss: {test_loss:>8f} \n") 237 | 238 | return test_loss 239 | 240 | 241 | device = "cuda" if torch.cuda.is_available() else "cpu" 242 | #device = "cpu" 243 | print(f"Using {device} device") 244 | 245 | model = FisheyeBEVFormer().to(device) 246 | 247 | optimizer = optim.AdamW(model.parameters(), lr=0.0002) 248 | criterionCE = nn.CrossEntropyLoss() 249 | criterionFocal = CrossEntropyFocalLoss() 250 | 251 | n_epochs = 1 252 | 253 | PATH = './f2bev_attn_st_seg.pt' 254 | min_val_loss = np.inf 255 | min_epoch = n_epochs 256 | for epoch in range(n_epochs): 257 | train_loss = train(epoch,model,train_dataloader,seq_len,criterionCE,optimizer) 258 | val_loss = val(epoch,val_dataloader,seq_len,model,criterionCE) 259 | 260 | if val_loss < min_val_loss: 261 | min_epoch = epoch 262 | min_val_loss = val_loss 263 | torch.save({'epoch' : epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict' : optimizer.state_dict(), 'loss': val_loss}, PATH) 264 | else: 265 | if epoch > min_epoch: 266 | break 267 | -------------------------------------------------------------------------------- /F2BEV_code/f2bev_conda_env.yml: -------------------------------------------------------------------------------- 1 | name: f2bev 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=main 7 | - _openmp_mutex=5.1=1_gnu 8 | - alabaster=0.7.12=py37_0 9 | - arrow=1.2.3=py37h06a4308_0 10 | - astroid=2.11.7=py37h06a4308_0 11 | - atomicwrites=1.4.0=py_0 12 | - attrs=22.1.0=py37h06a4308_0 13 | - autopep8=1.6.0=pyhd3eb1b0_1 14 | - babel=2.9.1=pyhd3eb1b0_0 15 | - backcall=0.2.0=pyhd3eb1b0_0 16 | - beautifulsoup4=4.11.1=py37h06a4308_0 17 | - binaryornot=0.4.4=pyhd3eb1b0_1 18 | - black=22.6.0=py37h06a4308_0 19 | - blas=1.0=mkl 20 | - bleach=4.1.0=pyhd3eb1b0_0 21 | - bottleneck=1.3.5=py37h7deecbd_0 22 | - brotli=1.0.9=h5eee18b_7 23 | - brotli-bin=1.0.9=h5eee18b_7 24 | - brotlipy=0.7.0=py37h27cfd23_1003 25 | - bzip2=1.0.8=h7b6447c_0 26 | - ca-certificates=2023.01.10=h06a4308_0 27 | - certifi=2022.12.7=py37h06a4308_0 28 | - cffi=1.15.1=py37h74dc2b5_0 29 | - chardet=4.0.0=py37h06a4308_1003 30 | - charset-normalizer=2.0.4=pyhd3eb1b0_0 31 | - click=8.0.4=py37h06a4308_0 32 | - cloudpickle=2.0.0=pyhd3eb1b0_0 33 | - colorama=0.4.5=py37h06a4308_0 34 | - cookiecutter=1.7.3=pyhd3eb1b0_0 35 | - cryptography=37.0.1=py37h9ce1e76_0 36 | - cudatoolkit=10.2.89=hfd86e86_1 37 | - cycler=0.11.0=pyhd3eb1b0_0 38 | - dbus=1.13.18=hb2f20db_0 39 | - debugpy=1.5.1=py37h295c915_0 40 | - decorator=5.1.1=pyhd3eb1b0_0 41 | - defusedxml=0.7.1=pyhd3eb1b0_0 42 | - diff-match-patch=20200713=pyhd3eb1b0_0 43 | - dill=0.3.6=py37h06a4308_0 44 | - docutils=0.17.1=py37h06a4308_1 45 | - entrypoints=0.4=py37h06a4308_0 46 | - expat=2.4.4=h295c915_0 47 | - ffmpeg=4.3=hf484d3e_0 48 | - flake8=4.0.1=pyhd3eb1b0_1 49 | - fontconfig=2.13.1=h6c09931_0 50 | - fonttools=4.25.0=pyhd3eb1b0_0 51 | - freetype=2.11.0=h70c0345_0 52 | - giflib=5.2.1=h7b6447c_0 53 | - glib=2.69.1=h4ff587b_1 54 | - gmp=6.2.1=h295c915_3 55 | - gnutls=3.6.15=he1e5248_0 56 | - gst-plugins-base=1.14.0=h8213a91_2 57 | - gstreamer=1.14.0=h28cd5cc_2 58 | - icu=58.2=he6710b0_3 59 | - idna=3.3=pyhd3eb1b0_0 60 | - imagesize=1.4.1=py37h06a4308_0 61 | - importlib_metadata=3.10.0=hd3eb1b0_0 62 | - importlib_resources=5.2.0=pyhd3eb1b0_1 63 | - inflection=0.5.1=py37h06a4308_0 64 | - intel-openmp=2021.4.0=h06a4308_3561 65 | - intervaltree=3.1.0=pyhd3eb1b0_0 66 | - ipykernel=6.15.2=py37h06a4308_0 67 | - ipython=7.31.1=py37h06a4308_1 68 | - ipython_genutils=0.2.0=pyhd3eb1b0_1 69 | - isort=5.9.3=pyhd3eb1b0_0 70 | - jedi=0.18.1=py37h06a4308_1 71 | - jeepney=0.7.1=pyhd3eb1b0_0 72 | - jellyfish=0.9.0=py37h7f8727e_0 73 | - jinja2=3.1.2=py37h06a4308_0 74 | - jinja2-time=0.2.0=pyhd3eb1b0_3 75 | - jpeg=9e=h7f8727e_0 76 | - jsonschema=4.16.0=py37h06a4308_0 77 | - jupyter_client=7.3.4=py37h06a4308_0 78 | - jupyter_core=4.11.2=py37h06a4308_0 79 | - jupyterlab_pygments=0.1.2=py_0 80 | - keyring=23.4.0=py37h06a4308_0 81 | - kiwisolver=1.4.2=py37h295c915_0 82 | - krb5=1.19.2=hac12032_0 83 | - lame=3.100=h7b6447c_0 84 | - lazy-object-proxy=1.6.0=py37h27cfd23_0 85 | - lcms2=2.12=h3be6417_0 86 | - ld_impl_linux-64=2.38=h1181459_1 87 | - lerc=3.0=h295c915_0 88 | - libbrotlicommon=1.0.9=h5eee18b_7 89 | - libbrotlidec=1.0.9=h5eee18b_7 90 | - libbrotlienc=1.0.9=h5eee18b_7 91 | - libclang=10.0.1=default_hb85057a_2 92 | - libdeflate=1.8=h7f8727e_5 93 | - libedit=3.1.20210910=h7f8727e_0 94 | - libevent=2.1.12=h8f2d780_0 95 | - libffi=3.3=he6710b0_2 96 | - libgcc-ng=11.2.0=h1234567_1 97 | - libgomp=11.2.0=h1234567_1 98 | - libiconv=1.16=h7f8727e_2 99 | - libidn2=2.3.2=h7f8727e_0 100 | - libllvm10=10.0.1=hbcb73fb_5 101 | - libpng=1.6.37=hbc83047_0 102 | - libpq=12.9=h16c4e8d_3 103 | - libsodium=1.0.18=h7b6447c_0 104 | - libspatialindex=1.9.3=h2531618_0 105 | - libstdcxx-ng=11.2.0=h1234567_1 106 | - libtasn1=4.16.0=h27cfd23_0 107 | - libtiff=4.4.0=hecacb30_0 108 | - libunistring=0.9.10=h27cfd23_0 109 | - libuuid=1.0.3=h7f8727e_2 110 | - libwebp=1.2.2=h55f646e_0 111 | - libwebp-base=1.2.2=h7f8727e_0 112 | - libxcb=1.15=h7f8727e_0 113 | - libxkbcommon=1.0.1=hfa300c1_0 114 | - libxml2=2.9.14=h74e7548_0 115 | - libxslt=1.1.35=h4e12654_0 116 | - lxml=4.9.1=py37h1edc446_0 117 | - lz4-c=1.9.3=h295c915_1 118 | - markupsafe=2.1.1=py37h7f8727e_0 119 | - matplotlib=3.5.2=py37h06a4308_0 120 | - matplotlib-base=3.5.2=py37hf590b9c_0 121 | - matplotlib-inline=0.1.6=py37h06a4308_0 122 | - mccabe=0.7.0=pyhd3eb1b0_0 123 | - mistune=0.8.4=py37h14c3975_1001 124 | - mkl=2021.4.0=h06a4308_640 125 | - mkl-service=2.4.0=py37h7f8727e_0 126 | - mkl_fft=1.3.1=py37hd3c417c_0 127 | - mkl_random=1.2.2=py37h51133e4_0 128 | - munkres=1.1.4=py_0 129 | - mypy_extensions=0.4.3=py37h06a4308_1 130 | - nbclient=0.5.13=py37h06a4308_0 131 | - nbconvert=6.5.4=py37h06a4308_0 132 | - nbformat=5.7.0=py37h06a4308_0 133 | - ncurses=6.3=h5eee18b_3 134 | - nest-asyncio=1.5.5=py37h06a4308_0 135 | - nettle=3.7.3=hbbd107a_1 136 | - nspr=4.33=h295c915_0 137 | - nss=3.74=h0370c37_0 138 | - numexpr=2.8.4=py37he184ba9_0 139 | - numpy=1.21.5=py37h6c91a56_3 140 | - numpy-base=1.21.5=py37ha15fc14_3 141 | - numpydoc=1.5.0=py37h06a4308_0 142 | - openh264=2.1.1=h4ff587b_0 143 | - openssl=1.1.1s=h7f8727e_0 144 | - packaging=21.3=pyhd3eb1b0_0 145 | - pandas=1.3.5=py37h8c16a72_0 146 | - pandocfilters=1.5.0=pyhd3eb1b0_0 147 | - parso=0.8.3=pyhd3eb1b0_0 148 | - pathspec=0.9.0=py37h06a4308_0 149 | - pcre=8.45=h295c915_0 150 | - pexpect=4.8.0=pyhd3eb1b0_3 151 | - pickleshare=0.7.5=pyhd3eb1b0_1003 152 | - pillow=9.2.0=py37hace64e9_1 153 | - pip=22.1.2=py37h06a4308_0 154 | - pkgutil-resolve-name=1.3.10=py37h06a4308_0 155 | - platformdirs=2.5.2=py37h06a4308_0 156 | - pluggy=1.0.0=py37h06a4308_1 157 | - ply=3.11=py37_0 158 | - poyo=0.5.0=pyhd3eb1b0_0 159 | - prompt-toolkit=3.0.20=pyhd3eb1b0_0 160 | - psutil=5.9.0=py37h5eee18b_0 161 | - ptyprocess=0.7.0=pyhd3eb1b0_2 162 | - pycodestyle=2.8.0=pyhd3eb1b0_0 163 | - pycparser=2.21=pyhd3eb1b0_0 164 | - pydocstyle=6.1.1=pyhd3eb1b0_0 165 | - pyflakes=2.4.0=pyhd3eb1b0_0 166 | - pygments=2.11.2=pyhd3eb1b0_0 167 | - pylint=2.14.5=py37h06a4308_0 168 | - pyls-spyder=0.4.0=pyhd3eb1b0_0 169 | - pyopenssl=22.0.0=pyhd3eb1b0_0 170 | - pyparsing=3.0.9=py37h06a4308_0 171 | - pyqt=5.15.7=py37h6a678d5_1 172 | - pyqt5-sip=12.11.0=py37h6a678d5_1 173 | - pyqtwebengine=5.15.7=py37h6a678d5_1 174 | - pyrsistent=0.18.0=py37heee7806_0 175 | - pysocks=1.7.1=py37_1 176 | - python=3.7.13=h12debd9_0 177 | - python-dateutil=2.8.2=pyhd3eb1b0_0 178 | - python-fastjsonschema=2.16.2=py37h06a4308_0 179 | - python-lsp-black=1.2.1=py37h06a4308_0 180 | - python-lsp-jsonrpc=1.0.0=pyhd3eb1b0_0 181 | - python-lsp-server=1.5.0=py37h06a4308_0 182 | - python-slugify=5.0.2=pyhd3eb1b0_0 183 | - pytorch=1.12.1=py3.7_cuda10.2_cudnn7.6.5_0 184 | - pytorch-mutex=1.0=cuda 185 | - pytz=2022.1=py37h06a4308_0 186 | - pyxdg=0.27=pyhd3eb1b0_0 187 | - pyyaml=6.0=py37h5eee18b_1 188 | - pyzmq=23.2.0=py37h6a678d5_0 189 | - qdarkstyle=3.0.2=pyhd3eb1b0_0 190 | - qstylizer=0.1.10=pyhd3eb1b0_0 191 | - qt-main=5.15.2=h327a75a_7 192 | - qt-webengine=5.15.9=hd2b0992_4 193 | - qtawesome=1.0.3=pyhd3eb1b0_0 194 | - qtconsole=5.3.2=py37h06a4308_0 195 | - qtpy=2.2.0=py37h06a4308_0 196 | - qtwebkit=5.212=h4eab89a_4 197 | - readline=8.1.2=h7f8727e_1 198 | - requests=2.28.1=py37h06a4308_0 199 | - rope=0.22.0=pyhd3eb1b0_0 200 | - rtree=0.9.7=py37h06a4308_1 201 | - secretstorage=3.3.1=py37h06a4308_0 202 | - setuptools=63.4.1=py37h06a4308_0 203 | - sip=6.6.2=py37h6a678d5_0 204 | - six=1.16.0=pyhd3eb1b0_1 205 | - snowballstemmer=2.2.0=pyhd3eb1b0_0 206 | - sortedcontainers=2.4.0=pyhd3eb1b0_0 207 | - soupsieve=2.3.2.post1=py37h06a4308_0 208 | - sphinx=4.2.0=pyhd3eb1b0_1 209 | - sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0 210 | - sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0 211 | - sphinxcontrib-htmlhelp=2.0.0=pyhd3eb1b0_0 212 | - sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0 213 | - sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0 214 | - sphinxcontrib-serializinghtml=1.1.5=pyhd3eb1b0_0 215 | - spyder=5.3.3=py37h06a4308_0 216 | - spyder-kernels=2.3.3=py37h06a4308_0 217 | - sqlite=3.39.2=h5082296_0 218 | - text-unidecode=1.3=pyhd3eb1b0_0 219 | - textdistance=4.2.1=pyhd3eb1b0_0 220 | - three-merge=0.1.1=pyhd3eb1b0_0 221 | - tinycss=0.4=pyhd3eb1b0_1002 222 | - tinycss2=1.2.1=py37h06a4308_0 223 | - tk=8.6.12=h1ccaba5_0 224 | - toml=0.10.2=pyhd3eb1b0_0 225 | - tomli=2.0.1=py37h06a4308_0 226 | - tomlkit=0.11.1=py37h06a4308_0 227 | - torchaudio=0.12.1=py37_cu102 228 | - torchvision=0.13.1=py37_cu102 229 | - tornado=6.1=py37h27cfd23_0 230 | - traitlets=5.7.1=py37h06a4308_0 231 | - typed-ast=1.4.3=py37h7f8727e_1 232 | - typing-extensions=4.3.0=py37h06a4308_0 233 | - typing_extensions=4.3.0=py37h06a4308_0 234 | - ujson=5.4.0=py37h6a678d5_0 235 | - unidecode=1.2.0=pyhd3eb1b0_0 236 | - urllib3=1.26.11=py37h06a4308_0 237 | - watchdog=2.1.6=py37h06a4308_0 238 | - wcwidth=0.2.5=pyhd3eb1b0_0 239 | - webencodings=0.5.1=py37_1 240 | - whatthepatch=1.0.2=py37h06a4308_0 241 | - wheel=0.37.1=pyhd3eb1b0_0 242 | - wrapt=1.14.1=py37h5eee18b_0 243 | - wurlitzer=3.0.2=py37h06a4308_0 244 | - xz=5.2.5=h7f8727e_1 245 | - yaml=0.2.5=h7b6447c_0 246 | - yapf=0.31.0=pyhd3eb1b0_0 247 | - zeromq=4.3.4=h2531618_0 248 | - zlib=1.2.12=h7f8727e_2 249 | - zstd=1.5.2=ha4553b6_0 250 | - pip: 251 | - absl-py==1.2.0 252 | - aiohttp==3.8.1 253 | - aiosignal==1.2.0 254 | - antlr4-python3-runtime==4.9.3 255 | - async-timeout==4.0.2 256 | - asynctest==0.13.0 257 | - cachetools==5.2.0 258 | - cython==0.29.32 259 | - filelock==3.9.0 260 | - frozenlist==1.3.1 261 | - fsspec==2022.8.2 262 | - google-auth==2.11.1 263 | - google-auth-oauthlib==0.4.6 264 | - grpcio==1.48.1 265 | - huggingface-hub==0.11.1 266 | - importlib-metadata==4.12.0 267 | - markdown==3.4.1 268 | - multidict==6.0.2 269 | - oauthlib==3.2.1 270 | - omegaconf==2.3.0 271 | - opencv-contrib-python==4.5.4.60 272 | - opencv-python==3.4.2.17 273 | - protobuf==3.19.5 274 | - pyasn1==0.4.8 275 | - pyasn1-modules==0.2.8 276 | - pycocotools==2.0.6 277 | - pydeprecate==0.3.2 278 | - pytorch-lightning==1.7.6 279 | - requests-oauthlib==1.3.1 280 | - rsa==4.9 281 | - scipy==1.7.3 282 | - tensorboard==2.10.0 283 | - tensorboard-data-server==0.6.1 284 | - tensorboard-plugin-wit==1.8.1 285 | - timm==0.6.12 286 | - torchmetrics==0.9.3 287 | - torchsummary==1.5.1 288 | - tqdm==4.64.0 289 | - werkzeug==2.2.2 290 | - yarl==1.8.1 291 | - zipp==3.8.1 292 | prefix: /home/smartslab/anaconda3/envs/pytorch 293 | --------------------------------------------------------------------------------