├── F2BEV_code
    ├── .DS_Store
    ├── F2BEV
    │   ├── .DS_Store
    │   ├── pre_computation
    │   │   ├── .DS_Store
    │   │   ├── unity_data
    │   │   │   ├── bev_mask.npy
    │   │   │   └── reference_points_cam.npy
    │   │   ├── forward_looking_camera_model
    │   │   │   ├── data
    │   │   │   │   ├── bev
    │   │   │   │   │   └── 0.png
    │   │   │   │   ├── front
    │   │   │   │   │   └── 0.png
    │   │   │   │   ├── left
    │   │   │   │   │   └── 0.png
    │   │   │   │   ├── rear
    │   │   │   │   │   └── 0.png
    │   │   │   │   ├── right
    │   │   │   │   │   └── 0.png
    │   │   │   │   └── seg
    │   │   │   │   │   ├── BEV_0_seg.png
    │   │   │   │   │   ├── left_0_seg.png
    │   │   │   │   │   ├── rear_0_seg.png
    │   │   │   │   │   ├── front_0_seg.png
    │   │   │   │   │   └── right_0_seg.png
    │   │   │   ├── masks
    │   │   │   │   ├── front.npy
    │   │   │   │   ├── left.npy
    │   │   │   │   ├── rear.npy
    │   │   │   │   └── right.npy
    │   │   │   └── flcw_unity.yml
    │   │   └── computeNormalizedReferencePoints.py
    │   ├── bblocks
    │   │   ├── __pycache__
    │   │   │   ├── ffn.cpython-37.pyc
    │   │   │   ├── bifpn.cpython-37.pyc
    │   │   │   ├── backbone.cpython-37.pyc
    │   │   │   ├── cnndecoder.cpython-37.pyc
    │   │   │   ├── bifpn_configs.cpython-37.pyc
    │   │   │   ├── backbone_bifpn.cpython-37.pyc
    │   │   │   ├── bevformer_block.cpython-37.pyc
    │   │   │   ├── encoder_height.cpython-37.pyc
    │   │   │   ├── deformable_attention.cpython-37.pyc
    │   │   │   ├── mask_head_decoder_seg.cpython-37.pyc
    │   │   │   ├── positional_encoding.cpython-37.pyc
    │   │   │   ├── mask_head_decoder_htseg.cpython-37.pyc
    │   │   │   ├── mask_head_pansegformer.cpython-37.pyc
    │   │   │   ├── spatial_cross_attention.cpython-37.pyc
    │   │   │   ├── temporal_self_attention.cpython-37.pyc
    │   │   │   ├── mask_head_decoder_height.cpython-37.pyc
    │   │   │   └── deformable_attention_function.cpython-37.pyc
    │   │   ├── ffn.py
    │   │   ├── backbone_bifpn.py
    │   │   ├── mask_head_decoder_height.py
    │   │   ├── mask_head_decoder_seg.py
    │   │   ├── backbone.py
    │   │   ├── bevformer_block.py
    │   │   ├── mask_head_decoder_htseg.py
    │   │   ├── positional_encoding.py
    │   │   ├── deformable_attention_function.py
    │   │   ├── spatial_cross_attention.py
    │   │   ├── cnndecoder.py
    │   │   ├── bifpn_configs.py
    │   │   └── deformable_attention.py
    │   ├── losses
    │   │   ├── __pycache__
    │   │   │   ├── focal.cpython-37.pyc
    │   │   │   ├── functional.cpython-37.pyc
    │   │   │   └── smoothness.cpython-37.pyc
    │   │   ├── smoothness.py
    │   │   └── focal.py
    │   ├── model_f2bev_conv_st_seg.py
    │   ├── model_f2bev_conv_st_height.py
    │   ├── model_f2bev_attn_st_seg.py
    │   ├── model_f2bev_attn_st_height.py
    │   ├── model_f2bev_conv_mt.py
    │   ├── model_f2bev_attn_mt.py
    │   ├── test_f2bev_conv_st_height.py
    │   ├── test_f2bev_attn_st_height.py
    │   ├── test_f2bev_conv_st_seg.py
    │   ├── test_f2bev_attn_st_seg.py
    │   ├── loader_multi_task.py
    │   ├── test_loader_multi_task.py
    │   ├── test_f2bev_attn_mt.py
    │   ├── test_f2bev_conv_mt.py
    │   ├── loader_single_task.py
    │   ├── test_loader_single_task.py
    │   ├── train_f2bev_conv_st_height.py
    │   ├── train_f2bev_attn_st_height.py
    │   ├── train_f2bev_conv_st_seg.py
    │   └── train_f2bev_attn_st_seg.py
    ├── README.md
    └── f2bev_conda_env.yml
├── README.md
└── FB-SSEM_dataset
    └── README.md


/F2BEV_code/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/.DS_Store


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/.DS_Store


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/.DS_Store


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/ffn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/ffn.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/bifpn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/bifpn.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/losses/__pycache__/focal.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/losses/__pycache__/focal.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/unity_data/bev_mask.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/unity_data/bev_mask.npy


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/backbone.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/backbone.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/cnndecoder.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/cnndecoder.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/losses/__pycache__/functional.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/losses/__pycache__/functional.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/losses/__pycache__/smoothness.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/losses/__pycache__/smoothness.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/bifpn_configs.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/bifpn_configs.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/backbone_bifpn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/backbone_bifpn.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/bevformer_block.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/bevformer_block.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/encoder_height.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/encoder_height.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/unity_data/reference_points_cam.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/unity_data/reference_points_cam.npy


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/deformable_attention.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/deformable_attention.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_decoder_seg.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_decoder_seg.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/positional_encoding.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/positional_encoding.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_decoder_htseg.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_decoder_htseg.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_pansegformer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_pansegformer.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/spatial_cross_attention.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/spatial_cross_attention.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/temporal_self_attention.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/temporal_self_attention.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_decoder_height.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/mask_head_decoder_height.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/bev/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/bev/0.png


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/front/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/front/0.png


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/left/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/left/0.png


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/rear/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/rear/0.png


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/right/0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/right/0.png


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/front.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/front.npy


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/left.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/left.npy


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/rear.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/rear.npy


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/right.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/masks/right.npy


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/__pycache__/deformable_attention_function.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/bblocks/__pycache__/deformable_attention_function.cpython-37.pyc


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/BEV_0_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/BEV_0_seg.png


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/left_0_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/left_0_seg.png


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/rear_0_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/rear_0_seg.png


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/front_0_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/front_0_seg.png


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/right_0_seg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volvo-cars/FB-SSEM-dataset/HEAD/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/data/seg/right_0_seg.png


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/forward_looking_camera_model/flcw_unity.yml:
--------------------------------------------------------------------------------
 1 | %YAML:1.0
 2 | ---
 3 | K: !!opencv-matrix
 4 |    rows: 3
 5 |    cols: 3
 6 |    dt: d
 7 |    data: [ 659.9565405462982, -2.8848508379788056, 634.6329612029243, 0.0, 625.1032520893773, 544.7433055928482, 0., 0., 1. ]
 8 | D: !!opencv-matrix
 9 |    rows: 1
10 |    cols: 4
11 |    dt: d
12 |    data: [ -0.2900269437421997, 0.11089496468175668, -0.0003222479159157141, 0.0029110573007121382]
13 | xi: !!opencv-matrix
14 |    rows: 1
15 |    cols: 1
16 |    dt: d
17 |    data: [ 1.0866311153248236 ]
18 | board_width: 9
19 | board_height: 6
20 | square_size: 2.4229999631643295e-02
21 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/ffn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Aug 30 17:10:39 2022
 5 | 
 6 | @author: Ekta
 7 | 
 8 | """
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | 
13 | class FFN(nn.Module):
14 |     def __init__(self):
15 |         super(FFN, self).__init__()
16 |         self.layers = nn.Sequential(
17 |             nn.Linear(in_features = 256, out_features = 512, bias=True),
18 |             nn.ReLU(inplace=True),
19 |             nn.Dropout(p=0.2,inplace=False),
20 |             nn.Linear(in_features = 512, out_features = 256, bias=True),
21 |             nn.Dropout(p=0.2,inplace=False))
22 |         
23 |     def forward(self,x):
24 |         x = self.layers(x)
25 |         return x
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # F2BEV
 2 | 
 3 | F2BEV is a network for Bird's Eye View (BEV) generation from surround-view fisheye camera images for automated driving.
 4 | 
 5 | Please navigate to the ```F2BEV_code``` folder in this repository for details
 6 | 
 7 | 
 8 | # FB-SSEM-dataset
 9 | 
10 | The FB-SSEM dataset is a synthetic dataset consisting of surround-view fisheye camera images and BEV maps from simulated sequences of ego car motion. Please navigate to the ```FB-SSEM_dataset``` folder in this repository for details
11 | 
12 | ## Citation
13 | If find our dataset or code beneficial, please cite the [F2BEV paper](https://arxiv.org/abs/2303.03651)
14 | 
15 | ```bash
16 | @article{samani2023f2bev, 
17 | title={F2BEV: Bird's Eye View Generation from Surround-View Fisheye Camera Images for Automated Driving},
18 | author={Samani, Ekta U and Tao, Feng and Dasari, Harshavardhan R and Ding, Sihao and Banerjee, Ashis G}, 
19 | journal={arXiv preprint arXiv:2303.03651},
20 | year={2023}}
21 | ```
22 | 
23 | 
24 | ## Contact
25 | Harshavardhan R. Dasari  
26 | mail    : harshavardhan.reddy.dasari@volvocars.com  
27 | Ekta Samani  
28 | mail    : eusamani@gmail.com


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/backbone_bifpn.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Wed Dec 28 10:51:01 2022
 3 | 
 4 | @author: Ekta
 5 | """
 6 | 
 7 | 
 8 | import torch
 9 | import torch.nn as nn
10 | from collections import OrderedDict
11 | import timm
12 | from typing import Callable
13 | from .bifpn import BiFpn
14 | #from functools import partial
15 | #from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork
16 | 
17 | def get_feature_info(backbone):
18 |     if isinstance(backbone.feature_info, Callable):
19 |         # old accessor for timm versions <= 0.1.30, efficientnet and mobilenetv3 and related nets only
20 |         feature_info = [dict(num_chs=f['num_chs'], reduction=f['reduction'])
21 |                         for i, f in enumerate(backbone.feature_info())]
22 |     else:
23 |         # new feature info accessor, timm >= 0.2, all models supported
24 |         feature_info = backbone.feature_info.get_dicts(keys=['num_chs', 'reduction'])
25 |     return feature_info
26 | 
27 | 
28 |     
29 | class ResNet34BiFPN(nn.Module):
30 |     def __init__(self):
31 |         super(ResNet34BiFPN,self).__init__()
32 |         self.backbone = timm.create_model(
33 |             'resnet34', features_only=True,
34 |             out_indices= (1, 2, 3, 4),
35 |             pretrained=True)
36 |         feature_info = get_feature_info(self.backbone)
37 |         self.fpn = BiFpn(feature_info)
38 | 
39 |         
40 |     def forward(self, x):
41 |         x = self.backbone(x)
42 |         x = self.fpn(x)
43 |         return x
44 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/mask_head_decoder_height.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Jan 17 12:06:48 2023
 5 | 
 6 | @author: Ekta
 7 | """
 8 | 
 9 | import torch 
10 | import torch.nn as nn
11 | from bblocks.mask_head_pansegformer import MaskHead
12 | 
13 | class MaskHeadDecoder(nn.Module):
14 |     def __init__(self):
15 |         super().__init__()
16 |         self.bev_h = 50
17 |         self.bev_w = 50
18 |         self.num_stuff_classes = 3
19 |         self.embed_dims = 256
20 |         self.stuff_query = nn.Embedding(self.num_stuff_classes,
21 |                                         self.embed_dims * 2)
22 |         self.stuff_mask_head = MaskHead(num_decoder_layers=3,self_attn=True)
23 | 
24 |  
25 |         # self._reset_parameters()
26 | 
27 |     # def _reset_parameters(self):
28 | 
29 | 
30 |     def forward(self,bev_embed):
31 |         stuff_query, stuff_query_pos = torch.split(self.stuff_query.weight,self.embed_dims,dim=1)
32 |         bs = bev_embed.shape[0]
33 |         stuff_query_pos = stuff_query_pos.unsqueeze(0).expand(bs, -1, -1)
34 |         stuff_query = stuff_query.unsqueeze(0).expand(bs, -1, -1)
35 |         hw_lvl = torch.tensor([[self.bev_h, self.bev_w]], device=stuff_query.device)
36 |         
37 |         attn, masks, inter_query = self.stuff_mask_head(bev_embed,None,None,stuff_query, None, stuff_query_pos, 
38 |                                                         hw_lvl)
39 |         
40 |         mask_stuff = attn.squeeze(-1)
41 |         
42 |         mask_stuff = mask_stuff.reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1])
43 | 
44 |         inter_masks = [m.squeeze(-1).reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1]) for m in masks]
45 | 
46 |         return mask_stuff, inter_masks
47 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/mask_head_decoder_seg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Jan 17 12:06:48 2023
 5 | 
 6 | @author: Ekta
 7 | """
 8 | 
 9 | import torch 
10 | import torch.nn as nn
11 | from bblocks.mask_head_pansegformer import MaskHead
12 | 
13 | class MaskHeadDecoder(nn.Module):
14 |     def __init__(self):
15 |         super().__init__()
16 |         self.bev_h = 50
17 |         self.bev_w = 50
18 |         self.num_stuff_classes = 5
19 |         self.embed_dims = 256
20 |         self.stuff_query = nn.Embedding(self.num_stuff_classes,
21 |                                         self.embed_dims * 2)
22 |         self.stuff_mask_head = MaskHead(num_decoder_layers=3,self_attn=True)
23 | 
24 |  
25 |         # self._reset_parameters()
26 | 
27 |     # def _reset_parameters(self):
28 | 
29 | 
30 |     def forward(self,bev_embed):
31 |         stuff_query, stuff_query_pos = torch.split(self.stuff_query.weight,self.embed_dims,dim=1)
32 |         bs = bev_embed.shape[0]
33 |         stuff_query_pos = stuff_query_pos.unsqueeze(0).expand(bs, -1, -1)
34 |         stuff_query = stuff_query.unsqueeze(0).expand(bs, -1, -1)
35 |         hw_lvl = torch.tensor([[self.bev_h, self.bev_w]], device=stuff_query.device)
36 |         
37 |         attn, masks, inter_query = self.stuff_mask_head(bev_embed,None,None,stuff_query, None, stuff_query_pos, 
38 |                                                         hw_lvl)
39 |         
40 |         mask_stuff = attn.squeeze(-1)
41 |         
42 |         mask_stuff = mask_stuff.reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1])
43 | 
44 |         inter_masks = [m.squeeze(-1).reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1]) for m in masks]
45 | 
46 |         return mask_stuff, inter_masks
47 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/backbone.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Mon Aug 29 15:19:36 2022
 5 | 
 6 | @author: Ekta
 7 | """
 8 | ## From: https://pytorch.org/vision/stable/feature_extraction.html
 9 | import torch
10 | from torchvision.models import resnet50,resnet34 ,ResNet50_Weights, resnet34, ResNet34_Weights
11 | from torchvision.models.feature_extraction import create_feature_extractor
12 | from torchvision.models.detection.backbone_utils import LastLevelMaxPool
13 | from torchvision.ops.feature_pyramid_network import FeaturePyramidNetwork
14 | 
15 | 
16 | 
17 | class Resnet34WithFPN(torch.nn.Module):
18 |     def __init__(self):
19 |         super(Resnet34WithFPN, self).__init__()
20 |         # Get a resnet50 backbone
21 |         #m = resnet50()
22 |         #m = resnet50(weights=ResNet50_Weights.DEFAULT)
23 |         m = resnet34(weights=ResNet34_Weights.DEFAULT)
24 |         #m = resnet34()
25 |         # Extract 4 main layers (note: MaskRCNN needs this particular name
26 |         # mapping for return nodes)
27 |         # print(resnet34)
28 |         self.body = create_feature_extractor(
29 |             m, return_nodes={f'layer{k}': str(v)
30 |                               for v, k in enumerate([1, 2, 3, 4])})
31 |         inp = torch.randn(1, 3, 540, 640)
32 |         with torch.no_grad():
33 |             out = self.body(inp)
34 |         in_channels_list = [o.shape[1] for o in out.values()]
35 |         #print(in_channels_list)
36 |         # # Build FPN
37 |         self.out_channels = 256
38 |         # self.fpn = FeaturePyramidNetwork(
39 |         #     in_channels_list, out_channels=self.out_channels,
40 |         #     extra_blocks=LastLevelMaxPool())
41 |         self.fpn = FeaturePyramidNetwork(
42 |             in_channels_list, out_channels=self.out_channels)
43 |     def forward(self, x):
44 |         x = self.body(x)
45 |         #print(x.keys(),x['0'].shape)
46 |         x = self.fpn(x)
47 |         return x
48 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/losses/smoothness.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Wed Nov 23 16:45:22 2022
 5 | 
 6 | @author: Ekta
 7 | """
 8 | 
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | 
14 | class MonodepthLoss(nn.modules.Module):
15 |     def __init__(self):
16 |         super(MonodepthLoss, self).__init__()
17 | 
18 |     def gradient_x(self,img):
19 |         # Pad input to keep output size consistent
20 |         img = F.pad(img, (0, 1, 0, 0), mode="replicate")
21 |         gx = img[:, :, :, :-1] - img[:, :, :, 1:]  # NCHW
22 |         #print(gx)
23 |         return gx
24 |     
25 |     def gradient_y(self,img):
26 |         # Pad input to keep output size consistent
27 |         img = F.pad(img, (0, 0, 0, 1), mode="replicate")
28 |         gy = img[:, :, :-1, :] - img[:, :, 1:, :]  # NCHW
29 |         return gy
30 |     
31 |     def disp_smoothness_fn(self,disp, img):
32 |         disp_gradients_x = self.gradient_x(disp)
33 |         disp_gradients_y = self.gradient_y(disp)
34 | 
35 |         #print(torch.unique(torch.isnan(disp_gradients_x)))    
36 |         #print(torch.unique(torch.isnan(disp_gradients_y)))        
37 |         
38 |         image_gradients_x = self.gradient_x(img)
39 |         image_gradients_y = self.gradient_y(img) 
40 | 
41 |         weight_x = torch.exp(-torch.mean(torch.abs(image_gradients_x), 1, keepdim=True)) 
42 |         weight_y = torch.exp(-torch.mean(torch.abs(image_gradients_y), 1, keepdim=True))
43 |         
44 |         smoothness_x = disp_gradients_x * weight_x
45 |         smoothness_y = disp_gradients_y * weight_y
46 |                         
47 | 
48 |         return torch.abs(smoothness_x) + torch.abs(smoothness_y)
49 | 
50 | 
51 |     def forward(self, height, seg):
52 |         disp = 1/height
53 |         #print(torch.unique(torch.isnan(disp)))
54 |         disp_smoothness = self.disp_smoothness_fn(disp, seg)
55 | 
56 |         loss = torch.mean(torch.abs(disp_smoothness))
57 |                           
58 |         return loss
59 |         


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/model_f2bev_conv_st_seg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Thu Jan  5 14:35:41 2023
 5 | 
 6 | @author: Ekta
 7 | """
 8 | 
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | 
13 | import numpy as np
14 | 
15 | from bblocks.backbone_bifpn import ResNet34BiFPN
16 | from bblocks.encoder_height import EncoderFLCW
17 | 
18 | from bblocks.cnndecoder import DecoderCup, SegmentationHead, HeightHead, HeightMulticlassHead
19 | 
20 | class FisheyeBEVFormer(nn.Module):
21 |     def __init__(self):
22 |         super(FisheyeBEVFormer, self).__init__()
23 |         self.backbone =  ResNet34BiFPN()
24 | 
25 |         self.encoder = EncoderFLCW()
26 |         self.multiscale = True#False
27 |         self.decoder = DecoderCup()
28 |         self.segmentation_head = SegmentationHead()
29 |     
30 |     def forward(self,front,left,rear,right,can_buses,prev_bev=None):      
31 | 
32 |         f_f  = self.backbone(front)
33 |         f_l  = self.backbone(left)
34 |         f_re  = self.backbone(rear)
35 |         f_r  = self.backbone(right) 
36 |         
37 |         if self.multiscale:
38 |             level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
39 |             level1 = torch.cat((f_f[1].unsqueeze(0),f_l[1].unsqueeze(0),f_re[1].unsqueeze(0),f_r[1].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
40 |             level2 = torch.cat((f_f[2].unsqueeze(0),f_l[2].unsqueeze(0),f_re[2].unsqueeze(0),f_r[2].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
41 |             level3 = torch.cat((f_f[3].unsqueeze(0),f_l[3].unsqueeze(0),f_re[3].unsqueeze(0),f_r[3].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
42 |             mlvl_feats = [level0,level1,level2,level3]            
43 |         else:
44 | 
45 |             level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
46 |             mlvl_feats = level0.unsqueeze(0)
47 |         bevfeatures = self.encoder(mlvl_feats,can_buses,prev_bev)
48 | 
49 |         decoded = self.decoder(bevfeatures)
50 | 
51 |         output = self.segmentation_head(decoded)
52 | 
53 |         return output,bevfeatures
54 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/model_f2bev_conv_st_height.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Thu Jan  5 14:35:41 2023
 5 | 
 6 | @author: Ekta
 7 | """
 8 | 
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | 
13 | import numpy as np
14 | 
15 | from bblocks.backbone_bifpn import ResNet34BiFPN
16 | from bblocks.encoder_height import EncoderFLCW
17 | 
18 | from bblocks.cnndecoder import DecoderCup, SegmentationHead, HeightHead, HeightMulticlassHead
19 | 
20 | class FisheyeBEVFormer(nn.Module):
21 |     def __init__(self):
22 |         super(FisheyeBEVFormer, self).__init__()
23 |         self.backbone =  ResNet34BiFPN()
24 |         self.encoder = EncoderFLCW()
25 |         self.multiscale = True#False
26 |         self.decoder = DecoderCup()
27 |         self.height_multiclass_head = HeightMulticlassHead()
28 |     
29 |     def forward(self,front,left,rear,right,can_buses,prev_bev=None):      
30 | 
31 | 
32 |         f_f  = self.backbone(front)
33 |         f_l  = self.backbone(left)
34 |         f_re  = self.backbone(rear)
35 |         f_r  = self.backbone(right) 
36 |         
37 |         if self.multiscale:
38 |             level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
39 |             level1 = torch.cat((f_f[1].unsqueeze(0),f_l[1].unsqueeze(0),f_re[1].unsqueeze(0),f_r[1].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
40 |             level2 = torch.cat((f_f[2].unsqueeze(0),f_l[2].unsqueeze(0),f_re[2].unsqueeze(0),f_r[2].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
41 |             level3 = torch.cat((f_f[3].unsqueeze(0),f_l[3].unsqueeze(0),f_re[3].unsqueeze(0),f_r[3].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
42 |             mlvl_feats = [level0,level1,level2,level3]            
43 |         else:
44 | 
45 |             level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
46 |             mlvl_feats = level0.unsqueeze(0)
47 |         bevfeatures = self.encoder(mlvl_feats,can_buses,prev_bev)
48 | 
49 |         decoded = self.decoder(bevfeatures)
50 |         output = self.height_multiclass_head(decoded)
51 | 
52 |         return output,bevfeatures
53 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/model_f2bev_attn_st_seg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Mon Jan  9 17:07:00 2023
 5 | 
 6 | @author: Ekta
 7 | """
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | from bblocks.backbone import Resnet34WithFPN
13 | 
14 | from bblocks.encoder_height import EncoderFLCW
15 | from bblocks.mask_head_decoder_seg import MaskHeadDecoder
16 | 
17 | class FisheyeBEVFormer(nn.Module):
18 |     def __init__(self):
19 |         super(FisheyeBEVFormer, self).__init__()
20 |         self.backbone =  Resnet34WithFPN()
21 | 
22 |         self.encoder = EncoderFLCW()
23 |         self.multiscale = True #False
24 |         self.decoder = MaskHeadDecoder()
25 |         self.deep_supervision = True
26 | 
27 |     def forward(self,front,left,rear,right,can_buses,prev_bev=None):      
28 | 
29 |         
30 |         f_f  = self.backbone(front)
31 |         f_l  = self.backbone(left)
32 |         f_re  = self.backbone(rear)
33 |         f_r  = self.backbone(right) 
34 |         
35 |         if self.multiscale:
36 |             level0 = torch.cat((f_f['0'].unsqueeze(0),f_l['0'].unsqueeze(0),f_re['0'].unsqueeze(0),f_r['0'].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
37 |             level1 = torch.cat((f_f['1'].unsqueeze(0),f_l['1'].unsqueeze(0),f_re['1'].unsqueeze(0),f_r['1'].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
38 |             level2 = torch.cat((f_f['2'].unsqueeze(0),f_l['2'].unsqueeze(0),f_re['2'].unsqueeze(0),f_r['2'].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
39 |             level3 = torch.cat((f_f['3'].unsqueeze(0),f_l['3'].unsqueeze(0),f_re['3'].unsqueeze(0),f_r['3'].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
40 |             mlvl_feats = [level0,level1,level2,level3]            
41 | 
42 |         else:
43 | 
44 |             level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
45 |             mlvl_feats = level0.unsqueeze(0)
46 |             
47 |         bevfeatures = self.encoder(mlvl_feats,can_buses,prev_bev)
48 |         output, inter_masks  = self.decoder(bevfeatures)
49 | 
50 |         if self.deep_supervision:
51 |             return output, inter_masks, bevfeatures
52 |         else:
53 |             return output,bevfeatures
54 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/model_f2bev_attn_st_height.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Mon Jan  9 17:07:00 2023
 5 | 
 6 | @author: Ekta
 7 | """
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | from bblocks.backbone import Resnet34WithFPN
13 | 
14 | from bblocks.encoder_height import EncoderFLCW
15 | from bblocks.mask_head_decoder_height import MaskHeadDecoder
16 | 
17 | class FisheyeBEVFormer(nn.Module):
18 |     def __init__(self):
19 |         super(FisheyeBEVFormer, self).__init__()
20 |         self.backbone =  Resnet34WithFPN()
21 | 
22 |         self.encoder = EncoderFLCW()
23 |         self.multiscale = True #False
24 |         self.decoder = MaskHeadDecoder()
25 |         self.deep_supervision = True
26 | 
27 |     def forward(self,front,left,rear,right,can_buses,prev_bev=None):      
28 | 
29 |         
30 |         f_f  = self.backbone(front)
31 |         f_l  = self.backbone(left)
32 |         f_re  = self.backbone(rear)
33 |         f_r  = self.backbone(right) 
34 |         
35 |         if self.multiscale:
36 |             level0 = torch.cat((f_f['0'].unsqueeze(0),f_l['0'].unsqueeze(0),f_re['0'].unsqueeze(0),f_r['0'].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
37 |             level1 = torch.cat((f_f['1'].unsqueeze(0),f_l['1'].unsqueeze(0),f_re['1'].unsqueeze(0),f_r['1'].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
38 |             level2 = torch.cat((f_f['2'].unsqueeze(0),f_l['2'].unsqueeze(0),f_re['2'].unsqueeze(0),f_r['2'].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
39 |             level3 = torch.cat((f_f['3'].unsqueeze(0),f_l['3'].unsqueeze(0),f_re['3'].unsqueeze(0),f_r['3'].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
40 |             mlvl_feats = [level0,level1,level2,level3]            
41 | 
42 |         else:
43 | 
44 |             level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
45 |             mlvl_feats = level0.unsqueeze(0)
46 |             
47 |         bevfeatures = self.encoder(mlvl_feats,can_buses,prev_bev)
48 |         output, inter_masks  = self.decoder(bevfeatures)
49 | 
50 |         if self.deep_supervision:
51 |             return output, inter_masks, bevfeatures
52 |         else:
53 |             return output,bevfeatures
54 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/model_f2bev_conv_mt.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Fri Jan  6 10:45:19 2023
 5 | 
 6 | @author: Ekta
 7 | """
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | import numpy as np
13 | 
14 | from bblocks.backbone_bifpn import ResNet34BiFPN
15 | 
16 | from bblocks.encoder_height import EncoderFLCW
17 | 
18 | 
19 | from bblocks.cnndecoder import DecoderCup, SegmentationHead, HeightHead, HeightMulticlassHead
20 | 
21 | class FisheyeBEVFormer(nn.Module):
22 |     def __init__(self):
23 |         super(FisheyeBEVFormer, self).__init__()
24 |         self.backbone =  ResNet34BiFPN()
25 | 
26 |         self.encoder = EncoderFLCW()
27 |         self.multiscale = True #False
28 |         self.decoder = DecoderCup()
29 |         self.segmentation_head = SegmentationHead(out_channels=5)
30 |         self.height_multiclass_head = HeightMulticlassHead(out_channels=3)
31 |     
32 |     def forward(self,front,left,rear,right,can_buses,prev_bev=None):      
33 | 
34 |         f_f  = self.backbone(front)
35 |         f_l  = self.backbone(left)
36 |         f_re  = self.backbone(rear)
37 |         f_r  = self.backbone(right) 
38 |         
39 |         if self.multiscale:
40 |             level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
41 |             level1 = torch.cat((f_f[1].unsqueeze(0),f_l[1].unsqueeze(0),f_re[1].unsqueeze(0),f_r[1].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
42 |             level2 = torch.cat((f_f[2].unsqueeze(0),f_l[2].unsqueeze(0),f_re[2].unsqueeze(0),f_r[2].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
43 |             level3 = torch.cat((f_f[3].unsqueeze(0),f_l[3].unsqueeze(0),f_re[3].unsqueeze(0),f_r[3].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
44 |             mlvl_feats = [level0,level1,level2,level3]            
45 |         else:
46 | 
47 | 
48 |             level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
49 |             mlvl_feats = level0.unsqueeze(0)
50 | 
51 |         bevfeatures = self.encoder(mlvl_feats,can_buses,prev_bev)
52 | 
53 |         decoded = self.decoder(bevfeatures)
54 | 
55 |         soutput = self.segmentation_head(decoded)
56 |         houtput = self.height_multiclass_head(decoded)
57 | 
58 |         return soutput,houtput,bevfeatures
59 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/model_f2bev_attn_mt.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Thu Feb 23 12:03:40 2023
 5 | 
 6 | @author: Ekta
 7 | """
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | from bblocks.backbone import Resnet34WithFPN
13 | 
14 | from bblocks.encoder_height import EncoderFLCW
15 | from bblocks.mask_head_decoder_htseg import MaskHeadDecoderHt, MaskHeadDecoderSeg
16 | 
17 | 
18 | class FisheyeBEVFormer(nn.Module):
19 |     def __init__(self):
20 |         super(FisheyeBEVFormer, self).__init__()
21 |         self.backbone =  Resnet34WithFPN()
22 | 
23 |         self.encoder = EncoderFLCW()
24 |         self.multiscale = True #False
25 |         self.segdecoder = MaskHeadDecoderSeg()
26 |         self.htdecoder = MaskHeadDecoderHt()
27 |         self.deep_supervision = True
28 | 
29 |     def forward(self,front,left,rear,right,can_buses,prev_bev=None):      
30 | 
31 |         
32 |         f_f  = self.backbone(front)
33 |         f_l  = self.backbone(left)
34 |         f_re  = self.backbone(rear)
35 |         f_r  = self.backbone(right) 
36 |         
37 |         if self.multiscale:
38 |             level0 = torch.cat((f_f['0'].unsqueeze(0),f_l['0'].unsqueeze(0),f_re['0'].unsqueeze(0),f_r['0'].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
39 |             level1 = torch.cat((f_f['1'].unsqueeze(0),f_l['1'].unsqueeze(0),f_re['1'].unsqueeze(0),f_r['1'].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
40 |             level2 = torch.cat((f_f['2'].unsqueeze(0),f_l['2'].unsqueeze(0),f_re['2'].unsqueeze(0),f_r['2'].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
41 |             level3 = torch.cat((f_f['3'].unsqueeze(0),f_l['3'].unsqueeze(0),f_re['3'].unsqueeze(0),f_r['3'].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
42 |             mlvl_feats = [level0,level1,level2,level3]            
43 | 
44 |         else:
45 | 
46 |             level0 = torch.cat((f_f[0].unsqueeze(0),f_l[0].unsqueeze(0),f_re[0].unsqueeze(0),f_r[0].unsqueeze(0)),axis=0).permute(1,0,2,3,4)
47 |             mlvl_feats = level0.unsqueeze(0)
48 |             
49 |         bevfeatures = self.encoder(mlvl_feats,can_buses,prev_bev)
50 |         seg_output, seg_inter_masks  = self.segdecoder(bevfeatures)
51 |         ht_output, ht_inter_masks  = self.htdecoder(bevfeatures)
52 | 
53 | 
54 |         if self.deep_supervision:
55 |             return seg_output, seg_inter_masks, ht_output, ht_inter_masks, bevfeatures
56 |         else:
57 |             return seg_output, ht_output, bevfeatures
58 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/bevformer_block.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Aug 30 16:47:35 2022
 5 | 
 6 | @author: Ekta
 7 | """
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | from bblocks.spatial_cross_attention import SpatialCrossAttention
12 | from bblocks.temporal_self_attention import TemporalSelfAttention
13 | from bblocks.ffn import FFN
14 | 
15 | class BEVFormerBlock(nn.Module):
16 |     def __init__(self):
17 |         super(BEVFormerBlock, self).__init__()
18 |         self.norm = nn.LayerNorm((256,), eps=1e-05, elementwise_affine=True)
19 |         self.bev_h = 50
20 |         self.bev_w = 50
21 |         self.ffn = FFN()
22 |         self.sca = SpatialCrossAttention()
23 |         self.tsa = TemporalSelfAttention()
24 |         
25 |     def forward(self,bev_query,key,value,bev_pos,spatial_shapes,level_start_index,reference_points_cam,bev_mask,ref_2d,prev_bev=None):
26 |         bs = bev_query.size(0)
27 |         #print(spatial_shapes)
28 |         #print(reference_points_cam.shape)
29 | 
30 |         # x = self.sca(query = bev_query, key = key, value = value, query_pos = bev_pos,
31 |         #              spatial_shapes = spatial_shapes, level_start_index = level_start_index,
32 |         #              reference_points_cam = reference_points_cam,bev_mask = bev_mask )
33 | 
34 |         batch_reference_points_cam = reference_points_cam.repeat(1,bs,1,1,1)
35 |         batch_bev_mask = bev_mask.repeat(1,bs,1,1)    
36 | 
37 |         ##add temporal here
38 |         x = self.tsa(query = bev_query, key = prev_bev, value = prev_bev, query_pos = bev_pos,
39 |                               reference_points = ref_2d, spatial_shapes=torch.tensor([[self.bev_h, self.bev_w]], device=bev_query.device),
40 |                               level_start_index=torch.tensor([0], device=bev_query.device))
41 |         x = self.norm(x)
42 |         x = self.sca(query = x, key = key, value = value, query_pos = bev_pos,
43 |                       spatial_shapes = spatial_shapes, level_start_index = level_start_index,
44 |                       reference_points_cam = batch_reference_points_cam,bev_mask = batch_bev_mask )
45 |         #print(x.shape)
46 |         x = self.norm(x)
47 |         x = self.ffn(x)
48 |         x = self.norm(x)
49 |         
50 |         
51 |         return x
52 | 
53 | 
54 |         # x = self.sca(query = bev_query, key = key, value = value, residual = None, query_pos = bev_pos,
55 |         #              key_padding_mask = None, reference_points = None, spatial_shapes = spatial_shapes, level_start_index = level_start_index,
56 |         #              reference_points_cam = reference_points_cam,bev_mask =bev_mask )
57 |         
58 |         
59 | 


--------------------------------------------------------------------------------
/FB-SSEM_dataset/README.md:
--------------------------------------------------------------------------------
 1 | # FB-SSEM-dataset
 2 | 
 3 | The FB-SSEM dataset is a synthetic dataset consisting of surround-view fisheye camera images and BEV maps from simulated sequences of ego car motion
 4 | 
 5 | ## About
 6 | We use the Unity game engine to simulate a parking lot environment for our dataset. The parking lot consists of parked cars/trucks, buses, electric vehicle (EV) charging stations of varying dimensions, and large containers of varying heights (on the boundaries). All the vehicles in the parking lot, except the ego car, are static. For the ego car, we use a forward-looking wide camera to simulate its four surround-view fisheye cameras. Our dataset consists of 20 sequences of ego car motion through the parking lot environment. Each sequence represents a different parking lot setup, i.e., different placement of all the vehicles in the lot and ground textures. Each sequence consists of 1000 samples; each sample consists of RGB images from the four car-mounted fisheye cameras (i.e., front, left, rear, and right cameras) and the BEV camera. Corresponding semantic segmentation maps for all five views and normalized height maps for the BEV are also generated. In addition, ego-motion information (3D rotation and translation) corresponding to every sample is obtained. We consider five semantic classes for the BEV segmentation map: car (ego car and parked cars/trucks), bus, EV charger, ground, and a non-driveable area.
 7 | 
 8 | [F2BEV: Bird's Eye View Generation from Surround-View Fisheye Camera Images for Automated Driving](https://arxiv.org/abs/2303.03651)
 9 | 
10 | ## Dataset
11 | Links to download FB-SSEM dataset are below. 12000 files per image sequence as described [here](https://fb-ssem.s3.us-west-2.amazonaws.com/README.pdf)  
12 | 
13 | * [Link to download data](https://fb-ssem.s3.us-west-2.amazonaws.com/index.html)
14 | 
15 | ## Camera calibration parameters
16 | * [Camera intrinsics](https://fb-ssem.s3.us-west-2.amazonaws.com/CameraCalibrationParameters/camera_intrinsics.yml)  
17 | * [Camera positions for extrinsics](https://fb-ssem.s3.us-west-2.amazonaws.com/CameraCalibrationParameters/camera_positions_for_extrinsics.txt)
18 | ## Legal notice
19 | * Volvo Cars Technology USA LLC is the sole and exclusive owner of this dataset.
20 | * The datset is licensed under [CC BY-SA 4.0
21 | ](https://creativecommons.org/licenses/by-sa/4.0/legalcode.en)
22 | * Any public use, distribution, display of this data set must contain this notice in its entirety.
23 | 
24 | ## Privacy
25 | Volvo Cars takes reasonable care to remove or hide personal data.
26 | 
27 | ## Public Distribution
28 | When using the FB-SSEM dataset for public distribution, we would be glad if you cite our [paper](https://arxiv.org/abs/2303.03651). Please cite the following:
29 | 
30 | ``` 
31 | @article{samani2023f2bev, 
32 | title={F2BEV: Bird's Eye View Generation from Surround-View Fisheye Camera Images for Automated Driving},
33 | author={Samani, Ekta U and Tao, Feng and Dasari, Harshavardhan R and Ding, Sihao and Banerjee, Ashis G}, 
34 | journal={arXiv preprint arXiv:2303.03651},
35 | year={2023}}
36 | ```
37 | 
38 | ## Contact
39 | Harshavardhan R. Dasari  
40 | mail    : harshavardhan.reddy.dasari@volvocars.com  
41 | Ekta Samani  
42 | mail    : eusamani@gmail.com
43 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/mask_head_decoder_htseg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Thu Feb 23 12:19:10 2023
 5 | 
 6 | @author: Ekta
 7 | """
 8 | 
 9 | 
10 | import torch 
11 | import torch.nn as nn
12 | from bblocks.mask_head_pansegformer import MaskHead
13 | 
14 | class MaskHeadDecoderSeg(nn.Module):
15 |     def __init__(self):
16 |         super().__init__()
17 |         self.bev_h = 50
18 |         self.bev_w = 50
19 |         self.num_stuff_classes = 5
20 |         self.embed_dims = 256
21 |         self.stuff_query = nn.Embedding(self.num_stuff_classes,
22 |                                         self.embed_dims * 2)
23 |         self.stuff_mask_head = MaskHead(num_decoder_layers=3,self_attn=True)
24 | 
25 |  
26 |         # self._reset_parameters()
27 | 
28 |     # def _reset_parameters(self):
29 | 
30 | 
31 |     def forward(self,bev_embed):
32 |         stuff_query, stuff_query_pos = torch.split(self.stuff_query.weight,self.embed_dims,dim=1)
33 |         bs = bev_embed.shape[0]
34 |         stuff_query_pos = stuff_query_pos.unsqueeze(0).expand(bs, -1, -1)
35 |         stuff_query = stuff_query.unsqueeze(0).expand(bs, -1, -1)
36 |         hw_lvl = torch.tensor([[self.bev_h, self.bev_w]], device=stuff_query.device)
37 |         
38 |         attn, masks, inter_query = self.stuff_mask_head(bev_embed,None,None,stuff_query, None, stuff_query_pos, 
39 |                                                         hw_lvl)
40 |         
41 |         mask_stuff = attn.squeeze(-1)
42 |         
43 |         mask_stuff = mask_stuff.reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1])
44 | 
45 |         inter_masks = [m.squeeze(-1).reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1]) for m in masks]
46 | 
47 |         return mask_stuff, inter_masks
48 | 
49 | 
50 | class MaskHeadDecoderHt(nn.Module):
51 |     def __init__(self):
52 |         super().__init__()
53 |         self.bev_h = 50
54 |         self.bev_w = 50
55 |         self.num_stuff_classes = 3
56 |         self.embed_dims = 256
57 |         self.stuff_query = nn.Embedding(self.num_stuff_classes,
58 |                                         self.embed_dims * 2)
59 |         self.stuff_mask_head = MaskHead(num_decoder_layers=3,self_attn=True)
60 | 
61 |  
62 |         # self._reset_parameters()
63 | 
64 |     # def _reset_parameters(self):
65 | 
66 | 
67 |     def forward(self,bev_embed):
68 |         stuff_query, stuff_query_pos = torch.split(self.stuff_query.weight,self.embed_dims,dim=1)
69 |         bs = bev_embed.shape[0]
70 |         stuff_query_pos = stuff_query_pos.unsqueeze(0).expand(bs, -1, -1)
71 |         stuff_query = stuff_query.unsqueeze(0).expand(bs, -1, -1)
72 |         hw_lvl = torch.tensor([[self.bev_h, self.bev_w]], device=stuff_query.device)
73 |         
74 |         attn, masks, inter_query = self.stuff_mask_head(bev_embed,None,None,stuff_query, None, stuff_query_pos, 
75 |                                                         hw_lvl)
76 |         
77 |         mask_stuff = attn.squeeze(-1)
78 |         
79 |         mask_stuff = mask_stuff.reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1])
80 | 
81 |         inter_masks = [m.squeeze(-1).reshape(bs, self.num_stuff_classes, hw_lvl[0][0],hw_lvl[0][1]) for m in masks]
82 | 
83 |         return mask_stuff, inter_masks
84 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/losses/focal.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | from typing import Optional
 3 | import torch
 4 | from torch import Tensor, nn
 5 | from torch.nn.modules.loss import _Loss
 6 | 
 7 | from .functional import focal_loss_with_logits, softmax_focal_loss_with_logits
 8 | 
 9 | 
10 | #__all__ = ["CrossEntropyFocalLoss", "BinaryFocalLoss"]
11 | 
12 | 
13 | class BinaryFocalLoss(nn.Module):
14 |     def __init__(
15 |         self,
16 |         alpha: Optional[float] = None,
17 |         gamma: float = 2.0,
18 |         ignore_index: Optional[int] = None,
19 |         reduction: str = "mean",
20 |         normalized: bool = False,
21 |         reduced_threshold: Optional[float] = None,
22 |         activation: str = "sigmoid",
23 |         softmax_dim: Optional[int] = None,
24 |     ):
25 |         """
26 |         :param alpha: Prior probability of having positive value in target.
27 |         :param gamma: Power factor for dampening weight (focal strength).
28 |         :param ignore_index: If not None, targets may contain values to be ignored.
29 |         Target values equal to ignore_index will be ignored from loss computation.
30 |         :param reduced: Switch to reduced focal loss. Note, when using this mode you should use `reduction="sum"`.
31 |         :param activation: Either `sigmoid` or `softmax`. If `softmax` is used, `softmax_dim` must be also specified.
32 |         """
33 |         super().__init__()
34 |         self.focal_loss_fn = partial(
35 |             focal_loss_with_logits,
36 |             alpha=alpha,
37 |             gamma=gamma,
38 |             reduced_threshold=reduced_threshold,
39 |             reduction=reduction,
40 |             normalized=normalized,
41 |             ignore_index=ignore_index,
42 |             activation=activation,
43 |             softmax_dim=softmax_dim,
44 |         )
45 | 
46 |     def forward(self, inputs: Tensor, targets: Tensor) -> Tensor:
47 |         """Compute focal loss for binary classification problem."""
48 |         loss = self.focal_loss_fn(inputs, targets)
49 |         return loss
50 | 
51 | 
52 | class CrossEntropyFocalLoss(nn.Module):
53 |     """
54 |     Focal loss for multi-class problem. It uses softmax to compute focal term instead of sigmoid as in
55 |     original paper. This loss expects target labes to have one dimension less (like in nn.CrossEntropyLoss).
56 |     """
57 | 
58 |     def __init__(
59 |         self,
60 |         gamma: float = 2.0,
61 |         reduction: str = "mean",
62 |         normalized: bool = False,
63 |         reduced_threshold: Optional[float] = None,
64 |         ignore_index: int = -100,
65 |     ):
66 |         """
67 |         :param alpha:
68 |         :param gamma:
69 |         :param ignore_index: If not None, targets with given index are ignored
70 |         :param reduced_threshold: A threshold factor for computing reduced focal loss
71 |         """
72 |         super().__init__()
73 |         self.gamma = gamma
74 |         self.reduction = reduction
75 |         self.reduced_threshold = reduced_threshold
76 |         self.normalized = normalized
77 |         self.ignore_index = ignore_index
78 | 
79 |     def forward(self, inputs: Tensor, targets: Tensor) -> Tensor:
80 |         return softmax_focal_loss_with_logits(
81 |             inputs,
82 |             targets,
83 |             gamma=self.gamma,
84 |             reduction=self.reduction,
85 |             normalized=self.normalized,
86 |             reduced_threshold=self.reduced_threshold,
87 |             ignore_index=self.ignore_index,
88 |         )
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/F2BEV_code/README.md:
--------------------------------------------------------------------------------
 1 | # F2BEV: Bird's Eye View Generation from Surround-View Fisheye Camera Images for Automated Driving
 2 | 
 3 | 
 4 | ## Requirements
 5 | 
 6 | Package requirements are included in the ```f2bev_conda_env.yml``` file. A conda virtual environment can be created using this file as follows:
 7 | 
 8 | ```bash
 9 | conda env create --file f2bev_conda_env.yml
10 | ```
11 | 
12 | 
13 | ## Data
14 | 
15 | Download the FB-SSEM dataset from [here](https://github.com/volvo-cars/FB-SSEM-dataset). In particular, download ```.zip``` files corresponding to all twenty sequences, unzip them, and place them in a single folder named ```data```. This folder muster be placed inside the ```F2BEV``` folder generated from cloning this repository. 
16 | 
17 | ## Compute Reference Points
18 | 
19 | A part of the reference point computation for the distortion-aware spatial cross attention in the network can be done offline to save on training time. 
20 | 
21 | Run the following commands from within the ```F2BEV``` folder.
22 | 
23 | ```bash
24 | cd pre_computation
25 | python3 computeNormalizedReferencePoints.py
26 | cd ../
27 | ```
28 | The outputs of this code are already placed in the ```unity_data``` fold inside the ```pre_computation``` folder for convenience.
29 | 
30 | ## Training and Testing F2BEV
31 | 
32 | Use ```train_<model_type>.py``` to train an F2BEV network and ```test_<model_type>.py``` to test a trained F2BEV network.
33 | 
34 | For e.g., to train an F2BEV network to generate (only) a discretized BEV height map using an attention-based task-specific head, where the height of every pixel is classified into one of three classes (below car bumper, above car height, or car height) run the following. The traininglog will be saved in ```traininglog_f2bev_attn_st_height.out```.
35 | 
36 | ```bash
37 | nohup python3 -u train_f2bev_attn_st_height.py > traininglog_f2bev_attn_st_height.out &
38 | ```
39 | 
40 | To test a trained F2BEv network, run ```test_<model_type>.py```.
41 | For e.g., to test the above trained model, run the following
42 | 
43 | ```bash
44 | python3 test_f2bev_attn_st_height.py
45 | ```
46 | 
47 | Training and test scripts for the all model types discussed in the [F2BEV paper](https://arxiv.org/abs/2303.03651) are included in this repository. They are as follows
48 | 
49 | 
50 | | <model_type> | Description |
51 | | ------ | ------ |
52 | | f2bev_attn_st_height | To generate discretized BEV height maps (alone) using an attention-based task-specific head, where the height of every pixel in classified into one of three classes |
53 | | f2bev_attn_st_seg | To generate BEV semantic segmentation maps (alone) using an attention-based task-specific head |
54 | | f2bev_attn_mt | To generate discretized BEV height maps and BEV semantic segmentation maps simultaneously using attention-based task-specific heads |
55 | | f2bev_conv_st_height | To generate discretized BEV height maps (alone) using a convolution-based task-specific head, where the height of every pixel in classified into one of three classes|
56 | | f2bev_conv_st_seg | To generate BEV semantic segmentation maps (alone) using a convolution-based task-specific head|
57 | | f2bev_conv_mt | To generate discretized BEV height maps and BEV semantic segmentation maps simultaneously using convolution-based task-specific heads |
58 | 
59 | ## Citation
60 | If you find our code beneficial, please cite the [F2BEV paper](https://arxiv.org/abs/2303.03651)
61 | 
62 | ```bash
63 | @article{samani2023f2bev, 
64 | title={F2BEV: Bird's Eye View Generation from Surround-View Fisheye Camera Images for Automated Driving},
65 | author={Samani, Ekta U and Tao, Feng and Dasari, Harshavardhan R and Ding, Sihao and Banerjee, Ashis G}, 
66 | journal={arXiv preprint arXiv:2303.03651},
67 | year={2023}}
68 | ```
69 | 
70 | 
71 | ## Contact
72 | Harshavardhan R. Dasari  
73 | mail    : harshavardhan.reddy.dasari@volvocars.com  
74 | Ekta Samani  
75 | mail    : eusamani@gmail.com
76 | 
77 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/positional_encoding.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue Aug 30 15:32:38 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | 
 13 | 
 14 | ### from: https://mmdetection.readthedocs.io/en/latest/_modules/mmdet/models/utils/positional_encoding.html
 15 | 
 16 | class LearnedPositionalEncoding(nn.Module):
 17 |     """Position embedding with learnable embedding weights.
 18 | 
 19 |     Args:
 20 |         num_feats (int): The feature dimension for each position
 21 |             along x-axis or y-axis. The final returned dimension for
 22 |             each position is 2 times of this value.
 23 |         row_num_embed (int, optional): The dictionary size of row embeddings.
 24 |             Default 50.
 25 |         col_num_embed (int, optional): The dictionary size of col embeddings.
 26 |             Default 50.
 27 |         init_cfg (dict or list[dict], optional): Initialization config dict.
 28 |     """
 29 | 
 30 |     def __init__(self,
 31 |                  num_feats,
 32 |                  row_num_embed=50,
 33 |                  col_num_embed=50):
 34 |         super(LearnedPositionalEncoding, self).__init__()
 35 |         self.row_embed = nn.Embedding(row_num_embed, num_feats)
 36 |         self.col_embed = nn.Embedding(col_num_embed, num_feats)
 37 |         self.num_feats = num_feats
 38 |         self.row_num_embed = row_num_embed
 39 |         self.col_num_embed = col_num_embed
 40 | 
 41 |     def forward(self, mask):
 42 |         """Forward function for `LearnedPositionalEncoding`.
 43 | 
 44 |         Args:
 45 |             mask (Tensor): ByteTensor mask. Non-zero values representing
 46 |                 ignored positions, while zero values means valid positions
 47 |                 for this image. Shape [bs, h, w].
 48 | 
 49 |         Returns:
 50 |             pos (Tensor): Returned position embedding with shape
 51 |                 [bs, num_feats*2, h, w].
 52 |         """
 53 |         h, w = mask.shape[-2:]
 54 |         x = torch.arange(w, device=mask.device)
 55 |         y = torch.arange(h, device=mask.device)
 56 |         x_embed = self.col_embed(x)
 57 |         y_embed = self.row_embed(y)
 58 |         pos = torch.cat((x_embed.unsqueeze(0).repeat(h, 1, 1), y_embed.unsqueeze(1).repeat(1, w, 1)),
 59 |             dim=-1).permute(2, 0, 1).unsqueeze(0).repeat(mask.shape[0], 1, 1, 1)
 60 |         return pos
 61 |     
 62 | 
 63 | # import torch
 64 | # import torch.nn as nn
 65 | # from typing import Optional
 66 | # from torch import Tensor
 67 | 
 68 | # ### https://github.com/fundamentalvision/Deformable-DETR/blob/11169a60c33333af00a4849f1808023eba96a931/models/position_encoding.py
 69 | 
 70 | # class NestedTensor(object):
 71 | #     def __init__(self, tensors, mask: Optional[Tensor]):
 72 | #         self.tensors = tensors
 73 | #         self.mask = mask
 74 | 
 75 | #     def to(self, device, non_blocking=False):
 76 | #         # type: (Device) -> NestedTensor # noqa
 77 | #         cast_tensor = self.tensors.to(device, non_blocking=non_blocking)
 78 | #         mask = self.mask
 79 | #         if mask is not None:
 80 | #             assert mask is not None
 81 | #             cast_mask = mask.to(device, non_blocking=non_blocking)
 82 | #         else:
 83 | #             cast_mask = None
 84 | #         return NestedTensor(cast_tensor, cast_mask)
 85 | 
 86 | # class PositionEmbeddingLearned(nn.Module):
 87 | #     """
 88 | #     Absolute pos embedding, learned.
 89 | #     """
 90 | #     def __init__(self, num_pos_feats=256):
 91 | #         super().__init__()
 92 | #         self.row_embed = nn.Embedding(50, num_pos_feats)
 93 | #         self.col_embed = nn.Embedding(50, num_pos_feats)
 94 | #         self.reset_parameters()
 95 | 
 96 | #     def reset_parameters(self):
 97 | #         nn.init.uniform_(self.row_embed.weight)
 98 | #         nn.init.uniform_(self.col_embed.weight)
 99 | 
100 | #     def forward(self, tensor_list: NestedTensor):
101 | #         x = tensor_list.tensors
102 | #         h, w = x.shape[-2:]
103 | #         i = torch.arange(w, device=x.device)
104 | #         j = torch.arange(h, device=x.device)
105 | #         x_emb = self.col_embed(i)
106 | #         y_emb = self.row_embed(j)
107 | #         pos = torch.cat([
108 | #             x_emb.unsqueeze(0).repeat(h, 1, 1),
109 | #             y_emb.unsqueeze(1).repeat(1, w, 1),
110 | #         ], dim=-1).permute(2, 0, 1).unsqueeze(0).repeat(x.shape[0], 1, 1, 1)
111 | #         return pos


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/deformable_attention_function.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Thu Aug 25 14:12:46 2022
 5 | 
 6 | @author: Ekta
 7 | """
 8 | from __future__ import absolute_import
 9 | from __future__ import print_function
10 | from __future__ import division
11 | 
12 | import torch
13 | import torch.nn.functional as F
14 | from torch.autograd import Function
15 | from torch.autograd.function import once_differentiable
16 | from torch.cuda.amp import custom_bwd, custom_fwd
17 | 
18 | #import MultiScaleDeformableAttention as MSDA ##TODO: Installation for this comes from DETR repo -- need to build sth to get this
19 | 
20 | # class MSDeformAttnFunction(Function):
21 | #     @staticmethod
22 | #     @custom_fwd(cast_inputs=torch.float16)
23 | #     def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step):
24 | #         """GPU version of multi-scale deformable attention.
25 | #         Args:
26 | #             value (Tensor): The value has shape
27 | #                 (bs, num_keys, mum_heads, embed_dims//num_heads)
28 | #             value_spatial_shapes (Tensor): Spatial shape of
29 | #                 each feature map, has shape (num_levels, 2),
30 | #                 last dimension 2 represent (h, w)
31 | #             sampling_locations (Tensor): The location of sampling points,
32 | #                 has shape
33 | #                 (bs ,num_queries, num_heads, num_levels, num_points, 2),
34 | #                 the last dimension 2 represent (x, y).
35 | #             attention_weights (Tensor): The weight of sampling points used
36 | #                 when calculate the attention, has shape
37 | #                 (bs ,num_queries, num_heads, num_levels, num_points),
38 | #             im2col_step (Tensor): The step used in image to column.
39 | #         Returns:
40 | #             Tensor: has shape (N, Len_q, d_model)
41 | #         """
42 |         
43 | #         ctx.im2col_step = im2col_step
44 | #         #print(type(value),type(value_spatial_shapes),type(value_level_start_index),type(sampling_locations),type(attention_weights),type(ctx.im2col_step))
45 | 
46 | #         output = MSDA.ms_deform_attn_forward(
47 | #             value = value, value_spatial_shapes=value_spatial_shapes, value_level_start_index=value_level_start_index, sampling_locations=sampling_locations, attention_weights=attention_weights, im2col_step = ctx.im2col_step)
48 | #         ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights)
49 | #         return output
50 | 
51 | #     @staticmethod
52 | #     @once_differentiable
53 | #     @custom_bwd
54 | #     def backward(ctx, grad_output):
55 | #         """GPU version of backward function.
56 | #         Args:
57 | #             grad_output (Tensor): Gradient
58 | #                 of output tensor of forward.
59 | #         Returns:
60 | #              Tuple[Tensor]: Gradient
61 | #                 of input tensors in forward.
62 | #         """
63 | #         value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
64 | #         grad_value = torch.zeros_like(value)
65 | #         grad_sampling_loc = torch.zeros_like(sampling_locations)
66 | #         grad_attn_weight = torch.zeros_like(attention_weights)
67 |         
68 | #         MSDA.ms_deform_attn_backward(value,value_spatial_shapes,value_level_start_index,sampling_locations,attention_weights,grad_output.contiguous(),grad_value,grad_sampling_loc,grad_attn_weight,im2col_step=ctx.im2col_step)
69 | 
70 | #         return grad_value, None, None, \
71 | #             grad_sampling_loc, grad_attn_weight, None
72 |             
73 | 
74 | def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights):
75 |     # for debug and test only,
76 |     # need to use cuda version instead
77 |     N_, S_, M_, D_ = value.shape
78 |     #print(value.shape)
79 |     _, Lq_, M_, L_, P_, _ = sampling_locations.shape
80 |     value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
81 |     sampling_grids = 2 * sampling_locations - 1
82 |     sampling_value_list = []
83 |     for lid_, (H_, W_) in enumerate(value_spatial_shapes):
84 |         # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
85 |         value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_)
86 |         # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
87 |         sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
88 |         # N_*M_, D_, Lq_, P_
89 |         sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_,
90 |                                           mode='bilinear', padding_mode='zeros', align_corners=False)
91 |         sampling_value_list.append(sampling_value_l_)
92 |     # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
93 |     attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_)
94 |     output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_)
95 |     return output.transpose(1, 2).contiguous()
96 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/test_f2bev_conv_st_height.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Dec 22 11:47:19 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | import torch,time,ntpath #cv2
  9 | from torch import nn, optim
 10 | import numpy as np
 11 | from test_loader_single_task import UnityImageDataset
 12 | import os,fnmatch
 13 | from torch.utils.data import DataLoader
 14 | from model_f2bev_conv_st_height import FisheyeBEVFormer
 15 | import torchvision.transforms as T
 16 | from torchmetrics.functional import jaccard_index
 17 | 
 18 | 
 19 | def numpy_sigmoid(x):
 20 |     return 1/(1 + np.exp(-x))
 21 | 
 22 | if not os.path.exists('./predictions/'):
 23 |    os.makedirs('./predictions/')
 24 | 
 25 | if not os.path.exists('./predictions/f2bev_conv_st_height/'):
 26 |    os.makedirs('./predictions/f2bev_conv_st_height/')
 27 | if not os.path.exists('./predictions/f2bev_conv_st_height/bevfeatures'):
 28 |    os.makedirs('./predictions/f2bev_conv_st_height/features')
 29 | if not os.path.exists('./predictions/f2bev_conv_st_height/predfull/'):
 30 |    os.makedirs('./predictions/f2bev_conv_st_height/predfull/')
 31 | if not os.path.exists('./predictions/f2bev_conv_st_height/predfull/ce/'):
 32 |    os.makedirs('./predictions/f2bev_conv_st_height/predfull/ce/')
 33 | 
 34 | num_data_sequences = 20
 35 | 
 36 | 
 37 | bev_dirs = ['./data/images'+str(i)+'/test/depth' for i in range(num_data_sequences)]
 38 | front_dirs = ['./data/images'+str(i)+'/test/rgb/front' for i in range(num_data_sequences)]
 39 | left_dirs = ['./data/images'+str(i)+'/test/rgb/left' for i in range(num_data_sequences)]
 40 | rear_dirs = ['./data/images'+str(i)+'/test/rgb/rear' for i in range(num_data_sequences)]
 41 | right_dirs = ['./data/images'+str(i)+'/test/rgb/right' for i in range(num_data_sequences)]
 42 | config_dirs = ['./data/images'+str(i)+'/test/cameraconfig' for i in range(num_data_sequences)]
 43 | 
 44 | 
 45 | seq_len = 1
 46 | 
 47 | image_lists = []
 48 | 
 49 | datalengths = []
 50 | 
 51 | for bev_dir in bev_dirs:
 52 |     names = fnmatch.filter(os.listdir(bev_dir), '*.png')
 53 |     
 54 |     files = []
 55 |     for name in names:
 56 |         files.append(os.path.splitext(ntpath.basename(name))[0])
 57 | 
 58 |         filelist = sorted(files,key=int)
 59 | 
 60 |     image_lists.append([f + '.png' for f in filelist])
 61 |     datalengths.append(len(names))
 62 | 
 63 | 
 64 | 
 65 | transforms = torch.nn.Sequential(T.Resize((540,640)),)
 66 | target_transforms = torch.nn.Sequential(T.Grayscale(num_output_channels=1))
 67 | 
 68 | test_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, 
 69 |                               transform = transforms, target_transform= target_transforms)
 70 | 
 71 | 
 72 | test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False)
 73 | 
 74 | 
 75 | device = "cuda" if torch.cuda.is_available() else "cpu"
 76 | #device = "cpu"
 77 | print(f"Using {device} device")
 78 | 
 79 | model = FisheyeBEVFormer().to(device)
 80 | 
 81 | checkpoint = torch.load('./f2bev_conv_st_height.pt')
 82 | model.load_state_dict(checkpoint['model_state_dict'])
 83 | 
 84 | 
 85 | 
 86 | def test_temporal(test_dataloader,seq_len,model,loss_fn,image_list):
 87 |     num_batches = len(test_dataloader)
 88 |     model.eval()
 89 |     test_loss = 0
 90 |     test_iou = 0
 91 |     with torch.no_grad():
 92 |         for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader):
 93 |             inp_img_seq, can_buses_seq = dataseq
 94 |             bs = targetseq[0].shape[0]
 95 |             for ctr in range(seq_len):
 96 |                 front = inp_img_seq[ctr][0]
 97 |                 left = inp_img_seq[ctr][1]
 98 |                 rear = inp_img_seq[ctr][2]
 99 |                 right = inp_img_seq[ctr][3]
100 | 
101 | 
102 | 
103 |                 target = targetseq[ctr]
104 |                 front = front.to(device)
105 |                 left = left.to(device)
106 |                 rear = rear.to(device)
107 |                 right = right.to(device)
108 | 
109 |                 target = torch.squeeze(target,dim=1)
110 |                 idx2 = torch.where(target <= 0.35)
111 |                 idx0 = torch.where(target >= 0.69)
112 |                 target[target >= 0] = 1
113 |                 target[idx2] = 2
114 |                 target[idx0] = 0
115 | 
116 |                 target = target.to(torch.int64).to(device)
117 |                 can_buses = can_buses_seq[ctr]
118 | 
119 | 
120 |                 if batch_idx == 0:
121 |                     prev_bev = None
122 | 
123 |                 else:
124 |                     prev_bev = torch.Tensor([np.load('./predictions/f2bev_conv_st_height/bevfeatures/'+image_list[batch_idx-1].split('.')[0]+'.npy')]).to(device)
125 |                 pred, for_prev_bev = model(front,left,rear,right, list(can_buses),prev_bev)
126 | 
127 |                 for i,p in enumerate(for_prev_bev):
128 |                     np.save('./predictions/f2bev_conv_st_height/bevfeatures/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy())
129 |                 for i,p in enumerate(pred):
130 |                     np.save('./predictions/f2bev_conv_st_height/predfull/focal/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy())
131 |                 test_loss += loss_fn(pred, target).item()
132 |                 test_iou += jaccard_index(pred,target.to(pred.device),num_classes=3,average='none')
133 | 
134 |     test_loss/= num_batches*seq_len
135 |     test_iou /= num_batches    
136 |     print(f"Test Error: Avg loss: {test_loss:>8f} \n")    
137 |     print(test_iou)
138 |     return test_loss
139 | 
140 | all_images = []
141 | for test_list in image_lists:
142 |     all_images = all_images + test_list
143 |     
144 | loss = nn.CrossEntropyLoss() #nn.MSELoss() #BinaryFocalLoss(alpha=0.25,gamma=2)
145 | test_temporal(test_dataloader,seq_len,model,loss,all_images)
146 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/test_f2bev_attn_st_height.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Dec 22 11:47:19 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | import os
  9 | os.environ["CUDA_VISIBLE_DEVICES"]="1"
 10 | import torch,time,ntpath #cv2
 11 | from torch import nn, optim
 12 | import numpy as np
 13 | from test_loader_single_task import UnityImageDataset
 14 | import fnmatch
 15 | from torch.utils.data import DataLoader
 16 | from model_f2bev_attn_st_height import FisheyeBEVFormer
 17 | import torchvision.transforms as T
 18 | from torchmetrics.functional import jaccard_index
 19 | 
 20 | 
 21 | def numpy_sigmoid(x):
 22 |     return 1/(1 + np.exp(-x))
 23 | 
 24 | if not os.path.exists('./predictions/'):
 25 |    os.makedirs('./predictions/')
 26 | if not os.path.exists('./predictions/f2bev_attn_st_height/'):
 27 |    os.makedirs('./predictions/f2bev_attn_st_height/')
 28 | if not os.path.exists('./predictions/f2bev_attn_st_height/bevfeatures'):
 29 |    os.makedirs('./predictions/f2bev_attn_st_height/features')
 30 | if not os.path.exists('./predictions/f2bev_attn_st_height/predfull/'):
 31 |    os.makedirs('./predictions/f2bev_attn_st_height/predfull/')
 32 | if not os.path.exists('./predictions/f2bev_attn_st_height/predfull/ce/'):
 33 |    os.makedirs('./predictions/f2bev_attn_st_height/predfull/ce/')
 34 | 
 35 | 
 36 | num_data_sequences = 20
 37 | 
 38 | bev_dirs = ['./data/images'+str(i)+'/test/depth' for i in range(num_data_sequences)]
 39 | front_dirs = ['./data/images'+str(i)+'/test/rgb/front' for i in range(num_data_sequences)]
 40 | left_dirs = ['./data/images'+str(i)+'/test/rgb/left' for i in range(num_data_sequences)]
 41 | rear_dirs = ['./data/images'+str(i)+'/test/rgb/rear' for i in range(num_data_sequences)]
 42 | right_dirs = ['./data/images'+str(i)+'/test/rgb/right' for i in range(num_data_sequences)]
 43 | config_dirs = ['./data/images'+str(i)+'/test/cameraconfig' for i in range(num_data_sequences)]
 44 | 
 45 | 
 46 | seq_len = 1
 47 | 
 48 | image_lists = []
 49 | 
 50 | datalengths = []
 51 | 
 52 | for bev_dir in bev_dirs:
 53 |     names = fnmatch.filter(os.listdir(bev_dir), '*.png')
 54 |     
 55 |     files = []
 56 |     for name in names:
 57 |         files.append(os.path.splitext(ntpath.basename(name))[0])
 58 | 
 59 |         filelist = sorted(files,key=int)
 60 | 
 61 |     image_lists.append([f + '.png' for f in filelist])
 62 |     datalengths.append(len(names))
 63 | 
 64 | 
 65 | 
 66 | transforms = torch.nn.Sequential(T.Resize((540,640)),)
 67 | target_transforms = torch.nn.Sequential(T.Resize((50,50)),T.Grayscale(num_output_channels=1))
 68 | 
 69 | test_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, 
 70 |                               transform = transforms, target_transform= target_transforms)
 71 | 
 72 | 
 73 | test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False)
 74 | 
 75 | 
 76 | device = "cuda" if torch.cuda.is_available() else "cpu"
 77 | #device = "cpu"
 78 | print(f"Using {device} device")
 79 | 
 80 | model = FisheyeBEVFormer().to(device)
 81 | 
 82 | checkpoint = torch.load('./f2bev_attn_st_height.pt')
 83 | model.load_state_dict(checkpoint['model_state_dict'])
 84 | 
 85 | 
 86 | 
 87 | def test_temporal(test_dataloader,seq_len,model,loss_fn,image_list):
 88 |     num_batches = len(test_dataloader)
 89 |     model.eval()
 90 |     test_loss = 0
 91 |     test_iou = 0
 92 |     with torch.no_grad():
 93 |         for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader):
 94 |             inp_img_seq, can_buses_seq = dataseq
 95 |             bs = targetseq[0].shape[0]
 96 |             for ctr in range(seq_len):
 97 |                 front = inp_img_seq[ctr][0]
 98 |                 left = inp_img_seq[ctr][1]
 99 |                 rear = inp_img_seq[ctr][2]
100 |                 right = inp_img_seq[ctr][3]
101 | 
102 | 
103 | 
104 |                 target = targetseq[ctr]
105 |                 front = front.to(device)
106 |                 left = left.to(device)
107 |                 rear = rear.to(device)
108 |                 right = right.to(device)
109 | 
110 |                 target = torch.squeeze(target,dim=1)
111 |                 idx2 = torch.where(target <= 0.35)
112 |                 idx0 = torch.where(target >= 0.69)
113 |                 target[target >= 0] = 1
114 |                 target[idx2] = 2
115 |                 target[idx0] = 0
116 | 
117 |                 target = target.to(torch.int64).to(device)
118 |                 can_buses = can_buses_seq[ctr]
119 | 
120 | 
121 |                 if batch_idx == 0:
122 |                     prev_bev = None
123 | 
124 |                 else:
125 |                     prev_bev = torch.Tensor([np.load('./predictions/f2bev_attn_st_height/bevfeatures/'+image_list[batch_idx-1].split('.')[0]+'.npy')]).to(device)
126 |                 pred, _, for_prev_bev = model(front,left,rear,right, list(can_buses),prev_bev)
127 | 
128 |                 for i,p in enumerate(for_prev_bev):
129 |                     np.save('./predictions/f2bev_attn_st_height/bevfeatures/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy())
130 |                 for i,p in enumerate(pred):
131 |                     np.save('./predictions/f2bev_attn_st_height/predfull/ce/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy())
132 |                 test_loss += loss_fn(pred, target).item()
133 |                 test_iou += jaccard_index(pred,target.to(pred.device),num_classes=3,average='none')
134 | 
135 |     test_loss/= num_batches*seq_len
136 |     test_iou /= num_batches    
137 |     print(f"Test Error: Avg loss: {test_loss:>8f} \n")    
138 |     print(test_iou)
139 |     return test_loss
140 | 
141 | all_images = []
142 | for test_list in image_lists:
143 |     all_images = all_images + test_list
144 |     
145 | loss = nn.CrossEntropyLoss() #nn.MSELoss() #BinaryFocalLoss(alpha=0.25,gamma=2)
146 | test_temporal(test_dataloader,seq_len,model,loss,all_images)
147 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/test_f2bev_conv_st_seg.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Dec 22 11:47:19 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | import torch,time,ntpath #cv2
  9 | from torch import nn, optim
 10 | import numpy as np
 11 | from test_loader_single_task import UnityImageDataset
 12 | import os,fnmatch
 13 | from torch.utils.data import DataLoader
 14 | from model_f2bev_conv_st_seg import FisheyeBEVFormer
 15 | import torchvision.transforms as T
 16 | from torchmetrics.functional import jaccard_index
 17 | 
 18 | 
 19 | def numpy_sigmoid(x):
 20 |     return 1/(1 + np.exp(-x))
 21 | 
 22 | if not os.path.exists('./predictions/'):
 23 |    os.makedirs('./predictions/')
 24 | if not os.path.exists('./predictions/f2bev_conv_st_seg/'):
 25 |    os.makedirs('./predictions/f2bev_conv_st_seg/')
 26 | if not os.path.exists('./predictions/f2bev_conv_st_seg/bevfeatures'):
 27 |    os.makedirs('./predictions/f2bev_conv_st_seg/features')
 28 | if not os.path.exists('./predictions/f2bev_conv_st_seg/predfull/'):
 29 |    os.makedirs('./predictions/f2bev_conv_st_seg/predfull/')
 30 | if not os.path.exists('./predictions/f2bev_conv_st_seg/predfull/ce/'):
 31 |    os.makedirs('./predictions/f2bev_conv_st_seg/predfull/ce/')
 32 | 
 33 | num_data_sequences = 20
 34 | bev_dirs = ['./data/images'+str(i)+'/test/seg/bev' for i in range(num_data_sequences)]
 35 | front_dirs = ['./data/images'+str(i)+'/test/rgb/front' for i in range(num_data_sequences)]
 36 | left_dirs = ['./data/images'+str(i)+'/test/rgb/left' for i in range(num_data_sequences)]
 37 | rear_dirs = ['./data/images'+str(i)+'/test/rgb/rear' for i in range(num_data_sequences)]
 38 | right_dirs = ['./data/images'+str(i)+'/test/rgb/right' for i in range(num_data_sequences)]
 39 | config_dirs = ['./data/images'+str(i)+'/test/cameraconfig' for i in range(num_data_sequences)]
 40 | 
 41 | 
 42 | 
 43 | seq_len = 1
 44 | 
 45 | image_lists = []
 46 | 
 47 | datalengths = []
 48 | 
 49 | for bev_dir in bev_dirs:
 50 |     names = fnmatch.filter(os.listdir(bev_dir), '*.png')
 51 |     
 52 |     files = []
 53 |     for name in names:
 54 |         files.append(os.path.splitext(ntpath.basename(name))[0])
 55 | 
 56 |         filelist = sorted(files,key=int)
 57 | 
 58 |     image_lists.append([f + '.png' for f in filelist])
 59 |     datalengths.append(len(names))
 60 | 
 61 | 
 62 | 
 63 | transforms = torch.nn.Sequential(T.Resize((540,640)),)
 64 | target_transforms = torch.nn.Sequential(T.Grayscale(num_output_channels=1))
 65 | 
 66 | test_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, 
 67 |                               transform = transforms, target_transform= target_transforms)
 68 | 
 69 | 
 70 | test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False)
 71 | 
 72 | 
 73 | device = "cuda" if torch.cuda.is_available() else "cpu"
 74 | #device = "cpu"
 75 | print(f"Using {device} device")
 76 | 
 77 | model = FisheyeBEVFormer().to(device)
 78 | 
 79 | checkpoint = torch.load('./f2bev_conv_st_seg.pt') 
 80 | 
 81 | model.load_state_dict(checkpoint['model_state_dict'])
 82 | 
 83 | 
 84 | 
 85 | def test_temporal(test_dataloader,seq_len,model,loss_fn,image_list):
 86 |     num_batches = len(test_dataloader)
 87 |     model.eval()
 88 |     test_loss = 0
 89 |     test_iou = 0
 90 |     with torch.no_grad():
 91 |         for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader):
 92 |             inp_img_seq, can_buses_seq = dataseq
 93 |             bs = targetseq[0].shape[0]
 94 |             for ctr in range(seq_len):
 95 |                 front = inp_img_seq[ctr][0]
 96 |                 left = inp_img_seq[ctr][1]
 97 |                 rear = inp_img_seq[ctr][2]
 98 |                 right = inp_img_seq[ctr][3]
 99 | 
100 | 
101 | 
102 |                 target = targetseq[ctr]
103 |                 front = front.to(device)
104 |                 left = left.to(device)
105 |                 rear = rear.to(device)
106 |                 right = right.to(device)
107 | 
108 |                 target = torch.squeeze(target,dim=1)
109 |                 idx0 = torch.where(target <= 0.02)
110 |                 target[idx0] = 10
111 |                 idx1 = torch.where(target <= 0.07)
112 |                 target[idx1] = 11
113 |                 idx2 = torch.where(target <= 0.22)
114 |                 target[idx2] = 12
115 |                 idx3 = torch.where(target <= 0.60)
116 |                 target[idx3] = 13
117 |                 idx4 = torch.where(target <= 1)
118 |                 target[idx4] = 14
119 |                 target = target - 10
120 |                 target = target.to(torch.int64).to(device)
121 | 
122 |                 can_buses = can_buses_seq[ctr]
123 | 
124 | 
125 |                 if batch_idx == 0:
126 |                     prev_bev = None
127 | 
128 |                 else:
129 |                     prev_bev = torch.Tensor([np.load('./predictions/f2bev_conv_st_seg/bevfeatures/'+image_list[batch_idx-1].split('.')[0]+'.npy')]).to(device)
130 |                 pred, for_prev_bev = model(front,left,rear,right, list(can_buses),prev_bev)
131 | 
132 |     
133 |                 for i,p in enumerate(for_prev_bev):
134 |                     np.save('./predictions/f2bev_conv_st_seg/bevfeatures/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy())
135 |                 for i,p in enumerate(pred):
136 |                     np.save('./predictions/f2bev_conv_st_seg/predfull/focal/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy())
137 |                 test_loss += loss_fn(pred, target).item()
138 |                 test_iou += jaccard_index(pred,target.to(pred.device),num_classes=5,average='none')
139 | 
140 |     test_loss/= num_batches*seq_len
141 |     test_iou /= num_batches    
142 |     print(f"Test Error: Avg loss: {test_loss:>8f} \n")    
143 |     print(test_iou)
144 |     return test_loss
145 | 
146 | all_images = []
147 | for test_list in image_lists:
148 |     all_images = all_images + test_list
149 |     
150 | loss = nn.CrossEntropyLoss() #nn.MSELoss() #BinaryFocalLoss(alpha=0.25,gamma=2)
151 | test_temporal(test_dataloader,seq_len,model,loss,all_images)
152 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/test_f2bev_attn_st_seg.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Dec 22 11:47:19 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | import os
 10 | os.environ["CUDA_VISIBLE_DEVICES"]="0"
 11 | import torch,time,ntpath #cv2
 12 | from torch import nn, optim
 13 | import numpy as np
 14 | from test_loader_single_task import UnityImageDataset
 15 | import fnmatch
 16 | from torch.utils.data import DataLoader
 17 | from model_f2bev_attn_st_seg import FisheyeBEVFormer
 18 | 
 19 | import torchvision.transforms as T
 20 | from torchmetrics.functional import jaccard_index
 21 | 
 22 | 
 23 | 
 24 | def numpy_sigmoid(x):
 25 |     return 1/(1 + np.exp(-x))
 26 | 
 27 | if not os.path.exists('./predictions/'):
 28 |    os.makedirs('./predictions/')
 29 |    
 30 | if not os.path.exists('./predictions/f2bev_attn_st_seg/'):
 31 |    os.makedirs('./predictions/f2bev_attn_st_seg/')
 32 | if not os.path.exists('./predictions/f2bev_attn_st_seg/bevfeatures'):
 33 |    os.makedirs('./predictions/f2bev_attn_st_seg/features')
 34 | if not os.path.exists('./predictions/f2bev_attn_st_seg/predfull/'):
 35 |    os.makedirs('./predictions/f2bev_attn_st_seg/predfull/')
 36 | if not os.path.exists('./predictions/f2bev_attn_st_seg/predfull/ce/'):
 37 |    os.makedirs('./predictions/f2bev_attn_st_seg/predfull/ce/')
 38 | 
 39 | num_data_sequences = 20
 40 | 
 41 | bev_dirs = ['./data/images'+str(i)+'/test/seg/bev' for i in range(num_data_sequences)]
 42 | front_dirs = ['./data/images'+str(i)+'/test/rgb/front' for i in range(num_data_sequences)]
 43 | left_dirs = ['./data/images'+str(i)+'/test/rgb/left' for i in range(num_data_sequences)]
 44 | rear_dirs = ['./data/images'+str(i)+'/test/rgb/rear' for i in range(num_data_sequences)]
 45 | right_dirs = ['./data/images'+str(i)+'/test/rgb/right' for i in range(num_data_sequences)]
 46 | config_dirs = ['./data/images'+str(i)+'/test/cameraconfig' for i in range(num_data_sequences)]
 47 | 
 48 | 
 49 | seq_len = 1
 50 | 
 51 | image_lists = []
 52 | 
 53 | datalengths = []
 54 | 
 55 | for bev_dir in bev_dirs:
 56 |     names = fnmatch.filter(os.listdir(bev_dir), '*.png')
 57 |     
 58 |     files = []
 59 |     for name in names:
 60 |         files.append(os.path.splitext(ntpath.basename(name))[0])
 61 | 
 62 |         filelist = sorted(files,key=int)
 63 | 
 64 |     image_lists.append([f + '.png' for f in filelist])
 65 |     datalengths.append(len(names))
 66 | 
 67 | 
 68 | 
 69 | transforms = torch.nn.Sequential(T.Resize((540,640)),)
 70 | target_transforms = torch.nn.Sequential(T.Resize((50,50)),T.Grayscale(num_output_channels=1))
 71 | 
 72 | test_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, 
 73 |                               transform = transforms, target_transform= target_transforms)
 74 | 
 75 | 
 76 | test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False)
 77 | 
 78 | 
 79 | device = "cuda" if torch.cuda.is_available() else "cpu"
 80 | #device = "cpu"
 81 | print(f"Using {device} device")
 82 | 
 83 | model = FisheyeBEVFormer().to(device)
 84 | 
 85 | checkpoint = torch.load('./f2bev_attn_st_seg.pt') 
 86 | model.load_state_dict(checkpoint['model_state_dict'])
 87 | 
 88 | 
 89 | 
 90 | def test_temporal(test_dataloader,seq_len,model,loss_fn,image_list):
 91 |     num_batches = len(test_dataloader)
 92 |     model.eval()
 93 |     test_loss = 0
 94 |     test_iou = 0
 95 |     with torch.no_grad():
 96 |         for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader):
 97 |             inp_img_seq, can_buses_seq = dataseq
 98 |             bs = targetseq[0].shape[0]
 99 |             for ctr in range(seq_len):
100 |                 front = inp_img_seq[ctr][0]
101 |                 left = inp_img_seq[ctr][1]
102 |                 rear = inp_img_seq[ctr][2]
103 |                 right = inp_img_seq[ctr][3]
104 | 
105 | 
106 | 
107 |                 target = targetseq[ctr]
108 |                 front = front.to(device)
109 |                 left = left.to(device)
110 |                 rear = rear.to(device)
111 |                 right = right.to(device)
112 | 
113 |                 target = torch.squeeze(target,dim=1)
114 |                 #print(torch.unique(starget))
115 |                 idx0 = torch.where(target <= 0.02)
116 |                 target[idx0] = 10
117 |                 idx1 = torch.where(target <= 0.07)
118 |                 target[idx1] = 11
119 |                 idx2 = torch.where(target <= 0.22)
120 |                 target[idx2] = 12
121 |                 idx3 = torch.where(target <= 0.60)
122 |                 target[idx3] = 13
123 |                 idx4 = torch.where(target <= 1)
124 |                 target[idx4] = 14
125 |                 target = target - 10
126 |                 target = target.to(torch.int64).to(device)
127 | 
128 |                 can_buses = can_buses_seq[ctr]
129 | 
130 | 
131 |                 if batch_idx == 0:
132 |                     prev_bev = None
133 | 
134 |                 else:
135 |                     prev_bev = torch.Tensor([np.load('./predictions/f2bev_attn_st_seg/bevfeatures/'+image_list[batch_idx-1].split('.')[0]+'.npy')]).to(device)
136 |                 pred, _, for_prev_bev = model(front,left,rear,right, list(can_buses),prev_bev)
137 | 
138 | 
139 |                 for i,p in enumerate(for_prev_bev):
140 |                     np.save('./predictions/f2bev_attn_st_seg/bevfeatures/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy())
141 |                 for i,p in enumerate(pred):
142 |                     np.save('./predictions/f2bev_attn_st_seg/predfull/ce/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy())
143 |                 test_loss += loss_fn(pred, target).item()
144 |                 test_iou += jaccard_index(pred,target.to(pred.device),num_classes=5,average='none')
145 | 
146 |     test_loss/= num_batches*seq_len
147 |     test_iou /= num_batches    
148 |     print(f"Test Error: Avg loss: {test_loss:>8f} \n")    
149 |     print(test_iou)
150 |     return test_loss
151 | 
152 | all_images = []
153 | for test_list in image_lists:
154 |     all_images = all_images + test_list
155 |     
156 | loss = nn.CrossEntropyLoss() #nn.MSELoss() #BinaryFocalLoss(alpha=0.25,gamma=2)
157 | test_temporal(test_dataloader,seq_len,model,loss,all_images)
158 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/loader_multi_task.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Fri Jan  6 10:09:43 2023
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | import torch
 10 | from torch.utils.data import Dataset
 11 | #from torchvision import datasets
 12 | #from torchvision.transforms import ToTensor
 13 | #import matplotlib.pyplot as plt
 14 | #from torch.utils.data import DataLoader
 15 | import numpy as np
 16 | import os#,fnmatch
 17 | from torchvision.io import read_image
 18 | import random
 19 | class UnityImageDataset(Dataset):
 20 |     def __init__(self, bev_dirs, bev_depth_dirs, front_dirs, left_dirs, rear_dirs, right_dirs, image_lists, config_dirs, seq_len, datalengths, num_data_sequences, transform=None, target_transform=None):
 21 |         self.bev_dirs = bev_dirs
 22 |         self.bev_depth_dirs =  bev_depth_dirs 
 23 |         self.front_dirs = front_dirs
 24 |         self.left_dirs = left_dirs
 25 |         self.rear_dirs = rear_dirs
 26 |         self.right_dirs = right_dirs
 27 |         self.image_lists = image_lists
 28 |         self.config_dirs = config_dirs
 29 |         self.transform = transform
 30 |         self.target_transform = target_transform
 31 |         self.seq_len = seq_len
 32 |         self.datalengths = datalengths
 33 |         self.num_data_sequences = num_data_sequences
 34 | 
 35 |     def __len__(self):
 36 |         total = 0
 37 |         for count in self.datalengths:
 38 |             total = total + count
 39 |         return total
 40 |     
 41 |     def find_which_sequence(self,idx):
 42 | 
 43 |         eff_data_lens = [x for x in self.datalengths]
 44 | 
 45 |         
 46 |         currptr = 0
 47 |         nextptr =  eff_data_lens[0]
 48 |         
 49 |         for i in range(self.num_data_sequences):
 50 |             if i == 0:
 51 |                 currptr = 0
 52 |                 nextptr =  eff_data_lens[0]
 53 |                 
 54 |                 if idx > currptr -1 and idx < nextptr:
 55 |                     seq_idx = 0
 56 |             else:
 57 |                 currptr = sum(eff_data_lens[:i])
 58 |                 nextptr = sum(eff_data_lens[:i+1])
 59 |                 if idx > currptr -1 and idx < nextptr:
 60 |                     seq_idx = i
 61 |                     
 62 |         
 63 | 
 64 |         return seq_idx
 65 |         
 66 |     def get_id_in_seq(self,seq_idx,idx):
 67 |         eff_data_lens = [x for x in self.datalengths]
 68 | 
 69 | 
 70 |         if seq_idx == 0:
 71 |             subtract = 0
 72 |         else:
 73 |             subtract = sum(eff_data_lens[:seq_idx])
 74 |         return idx - subtract
 75 |         
 76 |     def read_config_for_bevposrot(self,configdir,filename):
 77 |         with open(os.path.join(configdir, filename)) as f:
 78 |             lines = f.readlines()
 79 |             
 80 |         bpos = [float(lines[5].split(',')[1]),float(lines[5].split(',')[2]),float(lines[5].split(',')[3])]
 81 |         brot = [float(lines[5].split(',')[4]),float(lines[5].split(',')[5]),float(lines[5].split(',')[6])]
 82 |         return [bpos,brot]
 83 |     
 84 |     
 85 |     def __getitem__(self, idx):
 86 |         
 87 |         seq_idx = self.find_which_sequence(idx)
 88 |         
 89 |         bev_dir = self.bev_dirs[seq_idx]
 90 |         bev_depth_dir = self.bev_depth_dirs[seq_idx]
 91 |         image_list = self.image_lists[seq_idx]
 92 |         front_dir = self.front_dirs[seq_idx]
 93 |         left_dir = self.left_dirs[seq_idx]
 94 |         rear_dir = self.rear_dirs[seq_idx]
 95 |         right_dir = self.right_dirs[seq_idx]
 96 |         config_dir = self.config_dirs[seq_idx]
 97 |         
 98 |         idinseq = self.get_id_in_seq(seq_idx,idx)
 99 |         
100 |         return_images_tensor = []
101 |         return_starget = []
102 |         return_htarget = []
103 |         return_can_bus = []
104 |         ##first image
105 |         
106 |         index_list = list(range(idinseq-self.seq_len, idinseq))
107 |         random.shuffle(index_list)
108 |         index_list = sorted(index_list[1:])
109 |         index_list.append(idinseq)
110 |         
111 |         for idxctr,cidx in enumerate(index_list):
112 |             cidx = max(0, cidx)
113 | 
114 |             star_path = os.path.join(bev_dir, image_list[cidx])
115 |             star = read_image(star_path)[:,100:~99,100:~99]
116 |             star = torch.mul(star.float(),1/255)
117 | 
118 |             htar_path = os.path.join(bev_depth_dir, image_list[cidx])
119 |             htar = read_image(htar_path)[:,100:~99,100:~99]
120 |             htar = torch.mul(htar.float(),1/255)
121 |             if self.target_transform:
122 |                 star = self.target_transform(star)
123 |                 htar = self.target_transform(htar)
124 |             inp = []
125 |             for cam_views in [front_dir, left_dir, rear_dir, right_dir]:
126 |                 img_path = os.path.join(cam_views, image_list[cidx])
127 |                 image = read_image(img_path)
128 |                 image = torch.mul(image.float(),1/255)
129 |                 # if self.transform:
130 |                 #     image = self.transform(image)
131 |                 inp.append(image)
132 |                 
133 |             [bpos,brot] = self.read_config_for_bevposrot(config_dir,image_list[cidx].split('.')[0]+'.txt')
134 |             
135 | 
136 |             can_bus = np.zeros((5,))
137 |             if idxctr == 0:
138 |                 #pos
139 |                 can_bus[0] = 0
140 |                 can_bus[1] = 0
141 |                 can_bus[2] = 0
142 |                 #angle
143 |                 can_bus[3] = brot[1] #(90 - bevrot[num,1])/180*np.pi ##ego_angle is kept unchanged .. i.e. no delta ##before that 270 -
144 |                 can_bus[4] = 0
145 |                 
146 |             else:
147 |                 can_bus[0] = bpos[0] - return_can_bus[idxctr-1][0]
148 |                 can_bus[1] = bpos[2] - return_can_bus[idxctr-1][2]
149 |                 can_bus[2] = bpos[1] - return_can_bus[idxctr-1][1]
150 |                 can_bus[3] = brot[1]
151 |                 
152 |                 can_bus[4] =  brot[1]  - return_can_bus[idxctr-1][3]
153 | 
154 |                             
155 |             return_images_tensor.append(torch.stack(inp))
156 |             return_starget.append(star)
157 |             return_htarget.append(htar)
158 |             return_can_bus.append(can_bus)
159 |         
160 |         
161 |         if self.transform:
162 |             return_images_tensor = self.transform(torch.cat(return_images_tensor, dim=0))
163 | 
164 |         #return_images = [list(torch.split(x, 4)) for x in list(torch.split(return_images, self.seq_len))] #because 4 camera views
165 |         return_images = []
166 |         for frameidx in range(self.seq_len):
167 |             inp = []
168 |             for camnum in range(4): #4 cam views
169 |                 inp.append(return_images_tensor[(4*frameidx) + camnum, :,:,:])
170 |             return_images.append(inp)
171 |             
172 |         #return torch.cat((inp[0],inp[1],inp[2],inp[3]),axis=0), tar
173 |         return [return_images,return_can_bus], [return_starget,return_htarget]
174 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/test_loader_multi_task.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Fri Jan  6 10:09:43 2023
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | import torch
 10 | from torch.utils.data import Dataset
 11 | #from torchvision import datasets
 12 | #from torchvision.transforms import ToTensor
 13 | #import matplotlib.pyplot as plt
 14 | #from torch.utils.data import DataLoader
 15 | import numpy as np
 16 | import os#,fnmatch
 17 | from torchvision.io import read_image
 18 | import random
 19 | class UnityImageDataset(Dataset):
 20 |     def __init__(self, bev_dirs, bev_depth_dirs, front_dirs, left_dirs, rear_dirs, right_dirs, image_lists, config_dirs, seq_len, datalengths, num_data_sequences, transform=None, target_transform=None):
 21 |         self.bev_dirs = bev_dirs
 22 |         self.bev_depth_dirs =  bev_depth_dirs 
 23 |         self.front_dirs = front_dirs
 24 |         self.left_dirs = left_dirs
 25 |         self.rear_dirs = rear_dirs
 26 |         self.right_dirs = right_dirs
 27 |         self.image_lists = image_lists
 28 |         self.config_dirs = config_dirs
 29 |         self.transform = transform
 30 |         self.target_transform = target_transform
 31 |         self.seq_len = seq_len
 32 |         self.datalengths = datalengths
 33 |         self.num_data_sequences = num_data_sequences
 34 | 
 35 |     def __len__(self):
 36 |         total = 0
 37 |         for count in self.datalengths:
 38 |             total = total + count
 39 |         return total
 40 |     
 41 |     def find_which_sequence(self,idx):
 42 | 
 43 |         eff_data_lens = [x for x in self.datalengths]
 44 | 
 45 |         
 46 |         currptr = 0
 47 |         nextptr =  eff_data_lens[0]
 48 |         
 49 |         for i in range(self.num_data_sequences):
 50 |             if i == 0:
 51 |                 currptr = 0
 52 |                 nextptr =  eff_data_lens[0]
 53 |                 
 54 |                 if idx > currptr -1 and idx < nextptr:
 55 |                     seq_idx = 0
 56 |             else:
 57 |                 currptr = sum(eff_data_lens[:i])
 58 |                 nextptr = sum(eff_data_lens[:i+1])
 59 |                 if idx > currptr -1 and idx < nextptr:
 60 |                     seq_idx = i
 61 |                     
 62 |         
 63 | 
 64 |         return seq_idx
 65 |         
 66 |     def get_id_in_seq(self,seq_idx,idx):
 67 |         eff_data_lens = [x for x in self.datalengths]
 68 | 
 69 | 
 70 |         if seq_idx == 0:
 71 |             subtract = 0
 72 |         else:
 73 |             subtract = sum(eff_data_lens[:seq_idx])
 74 |         return idx - subtract
 75 |         
 76 |     def read_config_for_bevposrot(self,configdir,filename):
 77 |         with open(os.path.join(configdir, filename)) as f:
 78 |             lines = f.readlines()
 79 |             
 80 |         bpos = [float(lines[5].split(',')[1]),float(lines[5].split(',')[2]),float(lines[5].split(',')[3])]
 81 |         brot = [float(lines[5].split(',')[4]),float(lines[5].split(',')[5]),float(lines[5].split(',')[6])]
 82 |         return [bpos,brot]
 83 |     
 84 |     
 85 |     def __getitem__(self, idx):
 86 |         
 87 |         seq_idx = self.find_which_sequence(idx)
 88 |         
 89 |         bev_dir = self.bev_dirs[seq_idx]
 90 |         bev_depth_dir = self.bev_depth_dirs[seq_idx]
 91 |         image_list = self.image_lists[seq_idx]
 92 |         front_dir = self.front_dirs[seq_idx]
 93 |         left_dir = self.left_dirs[seq_idx]
 94 |         rear_dir = self.rear_dirs[seq_idx]
 95 |         right_dir = self.right_dirs[seq_idx]
 96 |         config_dir = self.config_dirs[seq_idx]
 97 |         
 98 |         idinseq = self.get_id_in_seq(seq_idx,idx)
 99 |         
100 |         return_images_tensor = []
101 |         return_starget = []
102 |         return_htarget = []
103 |         return_can_bus = []
104 |         ##first image
105 |         
106 |         index_list = list(range(idinseq-self.seq_len, idinseq))
107 |         random.shuffle(index_list)
108 |         index_list = sorted(index_list[1:])
109 |         index_list.append(idinseq)
110 |         
111 |         for idxctr,cidx in enumerate(index_list):
112 |             cidx = max(0, cidx)
113 | 
114 |             star_path = os.path.join(bev_dir, image_list[cidx])
115 |             star = read_image(star_path)[:,100:~99,100:~99]
116 |             star = torch.mul(star.float(),1/255)
117 | 
118 |             htar_path = os.path.join(bev_depth_dir, image_list[cidx])
119 |             htar = read_image(htar_path)[:,100:~99,100:~99]
120 |             htar = torch.mul(htar.float(),1/255)
121 |             if self.target_transform:
122 |                 star = self.target_transform(star)
123 |                 htar = self.target_transform(htar)
124 |             inp = []
125 |             for cam_views in [front_dir, left_dir, rear_dir, right_dir]:
126 |                 img_path = os.path.join(cam_views, image_list[cidx])
127 |                 image = read_image(img_path)
128 |                 image = torch.mul(image.float(),1/255)
129 |                 # if self.transform:
130 |                 #     image = self.transform(image)
131 |                 inp.append(image)
132 |                 
133 |             [bpos,brot] = self.read_config_for_bevposrot(config_dir,image_list[cidx].split('.')[0]+'.txt')
134 |             
135 | 
136 |             can_bus = np.zeros((5,))
137 |             if cidx == 0:
138 |                 #pos
139 |                 can_bus[0] = 0
140 |                 can_bus[1] = 0
141 |                 can_bus[2] = 0
142 |                 #angle
143 |                 can_bus[3] = brot[1] #(90 - bevrot[num,1])/180*np.pi ##ego_angle is kept unchanged .. i.e. no delta ##before that 270 -
144 |                 can_bus[4] = 0
145 |                 
146 |             else:
147 |                 [prev_bpos,prev_brot] = self.read_config_for_bevposrot(config_dir,image_list[cidx-1].split('.')[0]+'.txt')
148 |                 
149 |                 can_bus[0] = bpos[0] - prev_bpos[0]
150 |                 can_bus[1] = bpos[2] - prev_bpos[2]
151 |                 can_bus[2] = bpos[1] - prev_bpos[1]
152 |                 can_bus[3] = brot[1]
153 |                 
154 |                 can_bus[4] =  brot[1]  - prev_brot[1]
155 | 
156 |                             
157 |             return_images_tensor.append(torch.stack(inp))
158 |             return_starget.append(star)
159 |             return_htarget.append(htar)
160 |             return_can_bus.append(can_bus)
161 |         
162 |         
163 |         if self.transform:
164 |             return_images_tensor = self.transform(torch.cat(return_images_tensor, dim=0))
165 | 
166 |         #return_images = [list(torch.split(x, 4)) for x in list(torch.split(return_images, self.seq_len))] #because 4 camera views
167 |         return_images = []
168 |         for frameidx in range(self.seq_len):
169 |             inp = []
170 |             for camnum in range(4): #4 cam views
171 |                 inp.append(return_images_tensor[(4*frameidx) + camnum, :,:,:])
172 |             return_images.append(inp)
173 |             
174 |         #return torch.cat((inp[0],inp[1],inp[2],inp[3]),axis=0), tar
175 |         return [return_images,return_can_bus], [return_starget,return_htarget]
176 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/spatial_cross_attention.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue Aug 30 16:51:07 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | 
 12 | 
 13 | from bblocks.deformable_attention import MSDeformAttn3D
 14 | 
 15 | from torch.nn.init import xavier_uniform_, constant_
 16 | 
 17 | class SpatialCrossAttention(nn.Module):
 18 |     """An attention module used in BEVFormer.
 19 |     Args:
 20 |         embed_dims (int): The embedding dimension of Attention.
 21 |             Default: 256.
 22 |         num_cams (int): The number of cameras
 23 |         dropout (float): A Dropout layer on `inp_residual`.
 24 |             Default: 0..
 25 |         init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
 26 |             Default: None.
 27 |         deformable_attention: (dict): The config for the deformable attention used in SCA.
 28 |     """
 29 | 
 30 |     def __init__(self,embed_dims=256,num_cams=4,dropout=0.2):
 31 |         super(SpatialCrossAttention, self).__init__()
 32 | 
 33 |         self.dropout = nn.Dropout(dropout)
 34 |         self.deformable_attention = MSDeformAttn3D()
 35 |         self.embed_dims = embed_dims
 36 |         self.num_cams = num_cams
 37 |         self.output_proj = nn.Linear(embed_dims, embed_dims)
 38 |         self.init_weight()
 39 | 
 40 |     def init_weight(self):
 41 |         """Default initialization for Parameters of Module."""
 42 |         xavier_uniform_(self.output_proj.weight.data)
 43 |         constant_(self.output_proj.bias.data, 0.) ##done
 44 | 
 45 | 
 46 |         
 47 |     #@force_fp32(apply_to=('query', 'key', 'value', 'query_pos', 'reference_points_cam')) ##TODO: is this mandatory figure it out
 48 |     def forward(self,
 49 |                 query,
 50 |                 key,
 51 |                 value,
 52 |                 residual=None,
 53 |                 query_pos=None,
 54 |                 key_padding_mask=None,
 55 |                 reference_points=None,
 56 |                 spatial_shapes=None,
 57 |                 reference_points_cam=None,
 58 |                 bev_mask=None,
 59 |                 level_start_index=None,
 60 |                 **kwargs):
 61 |         """Forward Function of Detr3DCrossAtten.
 62 |         Args:
 63 |             query (Tensor): Query of Transformer with shape
 64 |                 (num_query, bs, embed_dims).
 65 |             key (Tensor): The key tensor with shape
 66 |                 `(num_key, bs, embed_dims)`.
 67 |             value (Tensor): The value tensor with shape
 68 |                 `(num_key, bs, embed_dims)`. (B, N, C, H, W)
 69 |             residual (Tensor): The tensor used for addition, with the
 70 |                 same shape as `x`. Default None. If None, `x` will be used.
 71 |             query_pos (Tensor): The positional encoding for `query`.
 72 |                 Default: None.
 73 |             key_pos (Tensor): The positional encoding for  `key`. Default
 74 |                 None.
 75 |             reference_points (Tensor):  The normalized reference
 76 |                 points with shape (bs, num_query, 4),
 77 |                 all elements is range in [0, 1], top-left (0,0),
 78 |                 bottom-right (1, 1), including padding area.
 79 |                 or (N, Length_{query}, num_levels, 4), add
 80 |                 additional two dimensions is (w, h) to
 81 |                 form reference boxes.
 82 |             key_padding_mask (Tensor): ByteTensor for `query`, with
 83 |                 shape [bs, num_key].
 84 |             spatial_shapes (Tensor): Spatial shape of features in
 85 |                 different level. With shape  (num_levels, 2),
 86 |                 last dimension represent (h, w).
 87 |             level_start_index (Tensor): The start index of each level.
 88 |                 A tensor has shape (num_levels) and can be represented
 89 |                 as [0, h_0*w_0, h_0*w_0+h_1*w_1, ...].
 90 |         Returns:
 91 |              Tensor: forwarded results with shape [num_query, bs, embed_dims].
 92 |         """
 93 | 
 94 |         if key is None:
 95 |             key = query
 96 |         if value is None:
 97 |             value = key
 98 | 
 99 |         if residual is None:
100 |             inp_residual = query
101 |             slots = torch.zeros_like(query)
102 |         if query_pos is not None:
103 |             query = query + query_pos
104 |         #query shape is 2,2500,256
105 |         bs, num_query, _ = query.size()
106 |         #reference_points_cam size is 6 2 2500 4 2
107 |         #bev mask size is 6 2 2500 4
108 |         D = reference_points_cam.size(3)
109 |         #print(D)
110 |         indexes = []
111 |         for i, mask_per_img in enumerate(bev_mask):
112 |             index_query_per_img = mask_per_img[0].sum(-1).nonzero().squeeze(-1)
113 |             indexes.append(index_query_per_img)
114 |         max_len = max([len(each) for each in indexes])
115 |         #print([len(each) for each in indexes])
116 |         # each camera only interacts with its corresponding BEV queries. This step can  greatly save GPU memory.
117 |         queries_rebatch = query.new_zeros(
118 |             [bs, self.num_cams, max_len, self.embed_dims])
119 |         reference_points_rebatch = reference_points_cam.new_zeros(
120 |             [bs, self.num_cams, max_len, D, 2])
121 |         
122 |         #print(queries_rebatch.shape)
123 |         #print(reference_points_rebatch.shape)
124 |         #queries rebatch 2,6,sth,256
125 |         #reference points rebatch 2,6,sth,4,2
126 |         for j in range(bs):
127 |             for i, reference_points_per_img in enumerate(reference_points_cam):   
128 |                 index_query_per_img = indexes[i]
129 |                 queries_rebatch[j, i, :len(index_query_per_img)] = query[j, index_query_per_img]
130 |                 reference_points_rebatch[j, i, :len(index_query_per_img)] = reference_points_per_img[j, index_query_per_img]
131 |                 #print(torch.unique(reference_points_per_img[j, index_query_per_img]))
132 | 
133 |         num_cams, l, bs, embed_dims = key.shape
134 | 
135 |         key = key.permute(2, 0, 1, 3).reshape(
136 |             bs * self.num_cams, l, self.embed_dims)
137 |         value = value.permute(2, 0, 1, 3).reshape(
138 |             bs * self.num_cams, l, self.embed_dims)
139 |         
140 |         queries = self.deformable_attention(query=queries_rebatch.view(bs*self.num_cams, max_len, self.embed_dims), query_pos = query_pos,
141 |                                             reference_points=reference_points_rebatch.view(bs*self.num_cams, max_len, D, 2), input_flatten = value,
142 |                                             input_spatial_shapes=spatial_shapes, input_level_start_index=level_start_index).view(bs, self.num_cams, max_len, self.embed_dims)
143 |         for j in range(bs):
144 |             for i, index_query_per_img in enumerate(indexes):
145 |                 slots[j, index_query_per_img] += queries[j, i, :len(index_query_per_img)]
146 | 
147 |         count = bev_mask.sum(-1) > 0
148 |         count = count.permute(1, 2, 0).sum(-1)
149 |         count = torch.clamp(count, min=1.0)
150 |         slots = slots / count[..., None]
151 |         slots = self.output_proj(slots)
152 | 
153 |         return self.dropout(slots) + inp_residual
154 | 
155 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/cnndecoder.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Aug 31 11:12:33 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | import numpy as np
 12 | 
 13 | class HeightMulticlassHead(nn.Sequential):
 14 | 
 15 |     def __init__(self, in_channels=16, out_channels=3, kernel_size=3, upsampling=1):
 16 |         conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2)
 17 | 
 18 |         upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity()
 19 |         #softmax = nn.Softmax()  
 20 |         super().__init__(conv2d,upsampling)
 21 | 
 22 | 
 23 | class HeightHead(nn.Sequential):
 24 | 
 25 |     def __init__(self, in_channels=16, out_channels=1, kernel_size=3, upsampling=1):
 26 |         conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2)
 27 | 
 28 |         upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity()
 29 |         relu = nn.ReLU()  
 30 |         super().__init__(conv2d,relu,upsampling)
 31 | 
 32 | class SegmentationHead(nn.Sequential):
 33 | 
 34 |     def __init__(self, in_channels=16, out_channels=5, kernel_size=3, upsampling=1):
 35 |         conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2)
 36 |         sigmoid = nn.Sigmoid()
 37 |         upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity()
 38 |         super().__init__(conv2d,upsampling)
 39 |         #super().__init__(conv2d, sigmoid,upsampling)
 40 | 
 41 | 
 42 | class Conv2dReLU(nn.Sequential):
 43 |     def __init__(
 44 |             self,
 45 |             in_channels,
 46 |             out_channels,
 47 |             kernel_size,
 48 |             padding=0,
 49 |             stride=1,
 50 |             use_batchnorm=True,
 51 |     ):
 52 |         conv = nn.Conv2d(
 53 |             in_channels,
 54 |             out_channels,
 55 |             kernel_size,
 56 |             stride=stride,
 57 |             padding=padding,
 58 |             bias=not (use_batchnorm),
 59 |         )
 60 |         relu = nn.ReLU(inplace=True)
 61 | 
 62 |         bn = nn.BatchNorm2d(out_channels)
 63 | 
 64 |         super(Conv2dReLU, self).__init__(conv, bn, relu)
 65 |         
 66 | 
 67 | class DecoderBlock(nn.Module):
 68 |     def __init__(
 69 |             self,
 70 |             in_channels,
 71 |             out_channels,
 72 |             skip_channels=0,
 73 |             use_batchnorm=True,
 74 |     ):
 75 |         super().__init__()
 76 |         self.conv1 = Conv2dReLU(
 77 |             in_channels + skip_channels,
 78 |             out_channels,
 79 |             kernel_size=3,
 80 |             padding=1,
 81 |             use_batchnorm=use_batchnorm,
 82 |         )
 83 |         self.conv2 = Conv2dReLU(
 84 |             out_channels,
 85 |             out_channels,
 86 |             kernel_size=3,
 87 |             padding=1,
 88 |             use_batchnorm=use_batchnorm,
 89 |         )
 90 |         self.up = nn.UpsamplingBilinear2d(scale_factor=2)
 91 |         self.dropout = nn.Dropout(p=0.2) #new addition by me
 92 | 
 93 |     def forward(self, x, skip=None):
 94 |         x = self.up(x)
 95 |         if skip is not None:
 96 |             x = torch.cat([x, skip], dim=1)
 97 |         x = self.conv1(x)
 98 |         x = self.dropout(x)
 99 |         x = self.conv2(x)
100 |         return x
101 |     
102 | 
103 | class DecoderCup(nn.Module):
104 |     def __init__(self):
105 |         super().__init__()
106 |         self.hidden_size = 256
107 |         self.decoder_channels = (128,64,16)
108 |         self.head_channels = 512
109 |         self.n_skip = 0
110 |         self.skip_channels = [256,64,16] ##dummy
111 |         self.conv_more = Conv2dReLU(
112 |             self.hidden_size,
113 |             self.head_channels,
114 |             kernel_size=3,
115 |             padding=1,
116 |             use_batchnorm=True,
117 |         )
118 |         decoder_channels = self.decoder_channels
119 |         head_channels = self.head_channels
120 |         in_channels = [head_channels] + list(decoder_channels[:-1])
121 |         out_channels = decoder_channels
122 | 
123 |         if self.n_skip != 0:
124 |             skip_channels = self.skip_channels
125 |             for i in range(4-self.n_skip):  # re-select the skip channels according to n_skip
126 |                 skip_channels[3-i]=0
127 | 
128 |         else:
129 |             skip_channels=[0,0,0]
130 | 
131 |         blocks = [
132 |             DecoderBlock(in_ch, out_ch, sk_ch) for in_ch, out_ch, sk_ch in zip(in_channels, out_channels, skip_channels)
133 |         ]
134 |         self.blocks = nn.ModuleList(blocks)
135 | 
136 |     def forward(self, hidden_states, features=None):
137 |         B, n_patch, hidden = hidden_states.size()  # reshape from (B, n_patch, hidden) to (B, h, w, hidden)
138 |         #print(B, n_patch,hidden)
139 |         h, w = int(np.sqrt(n_patch)), int(np.sqrt(n_patch))
140 |         x = hidden_states.permute(0, 2, 1)
141 |         x = x.contiguous().view(B, hidden, h, w)
142 |         x = self.conv_more(x)
143 |         #print(x.shape)
144 |         for i, decoder_block in enumerate(self.blocks):
145 |             if features is not None:
146 |                 skip = features[i] if (i < self.n_skip) else None
147 |             else:
148 |                 skip = None
149 |             x = decoder_block(x, skip=skip)
150 |             #print(x.shape)
151 |         return x
152 | 
153 | class UpSampleBlock(nn.Module):
154 |     def __init__(
155 |             self,
156 |             in_channels,
157 |             out_channels,
158 |             use_batchnorm=True,
159 |     ):
160 |         super().__init__()
161 |         self.conv1 = Conv2dReLU(
162 |             in_channels,
163 |             out_channels,
164 |             kernel_size=3,
165 |             padding=1,
166 |             use_batchnorm=use_batchnorm,
167 |         )
168 |         self.conv2 = Conv2dReLU(
169 |             out_channels,
170 |             out_channels,
171 |             kernel_size=1,
172 |             padding=0,
173 |             use_batchnorm=use_batchnorm,
174 |         )
175 |         self.up = nn.UpsamplingBilinear2d(scale_factor=2)
176 | 
177 | 
178 |     def forward(self, x, skip=None):
179 |         x = self.conv1(x)
180 |         x = self.conv2(x)
181 |         x = self.up(x)
182 |         return x
183 | 
184 | class BEVUpSample(nn.Module):
185 |     def __init__(self):
186 |         super().__init__()
187 |         in_channels = [256,128,64]
188 |         out_channels = [128,64,16]        
189 |         blocks = [
190 |             UpSampleBlock(in_ch, out_ch) for in_ch, out_ch in zip(in_channels, out_channels)
191 |         ]
192 |         
193 |         self.blocks = nn.ModuleList(blocks)
194 |         self.dropout = nn.Dropout(p=0.2)
195 |         self.final_head = HeightMulticlassHead()
196 |         
197 |     def forward(self,hidden_states,features=None):
198 |         B, n_patch, hidden = hidden_states.size()  # reshape from (B, n_patch, hidden) to (B, h, w, hidden)
199 |         #print(B, n_patch,hidden)
200 |         h, w = int(np.sqrt(n_patch)), int(np.sqrt(n_patch))
201 |         x = hidden_states.permute(0, 2, 1)
202 |         x = x.contiguous().view(B, hidden, h, w)
203 |         for i, up_block in enumerate(self.blocks):
204 |             x = up_block(x)
205 |             
206 |         x = self.dropout(x)
207 |         x = self.final_head(x)
208 |         
209 |         return x
210 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/test_f2bev_attn_mt.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Feb 23 14:05:34 2023
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | 
 10 | import torch,time,ntpath #cv2
 11 | from torch import nn, optim
 12 | import numpy as np
 13 | from test_loader_multi_task import UnityImageDataset
 14 | import os,fnmatch
 15 | from torch.utils.data import DataLoader
 16 | from model_f2bev_attn_mt import FisheyeBEVFormer
 17 | import torchvision.transforms as T
 18 | from torchmetrics.functional import jaccard_index
 19 | from losses.focal import BinaryFocalLoss
 20 | 
 21 | 
 22 | def numpy_sigmoid(x):
 23 |     return 1/(1 + np.exp(-x))
 24 | 
 25 | if not os.path.exists('./predictions/'):
 26 |    os.makedirs('./predictions/')
 27 | 
 28 | if not os.path.exists('./predictions/f2bev_attn_mt/'):
 29 |    os.makedirs('./predictions/f2bev_attn_mt/')
 30 | if not os.path.exists('./predictions/f2bev_attn_mt/bevfeatures'):
 31 |    os.makedirs('./predictions/f2bev_attn_mt/features')
 32 | if not os.path.exists('./predictions/f2bev_attn_mt/predfull/'):
 33 |    os.makedirs('./predictions/f2bev_attn_mt/predfull/')
 34 | if not os.path.exists('./predictions/f2bev_attn_mt/predfull/ce/'):
 35 |    os.makedirs('./predictions/f2bev_attn_mt/predfull/ce/')
 36 | 
 37 | 
 38 | num_data_sequences = 1
 39 | 
 40 | 
 41 | bev_dirs = ['./data/images'+str(i)+'/test/seg/bev' for i in range(num_data_sequences)]
 42 | bev_depth_dirs = ['./data/images'+str(i)+'/test/depth' for i in range(num_data_sequences)]
 43 | front_dirs = ['./data/images'+str(i)+'/test/rgb/front' for i in range(num_data_sequences)]
 44 | left_dirs = ['./data/images'+str(i)+'/test/rgb/left' for i in range(num_data_sequences)]
 45 | rear_dirs = ['./data/images'+str(i)+'/test/rgb/rear' for i in range(num_data_sequences)]
 46 | right_dirs = ['./data/images'+str(i)+'/test/rgb/right' for i in range(num_data_sequences)]
 47 | config_dirs = ['./data/images'+str(i)+'/test/cameraconfig' for i in range(num_data_sequences)]
 48 | 
 49 | 
 50 | seq_len = 1
 51 | 
 52 | image_lists = []
 53 | 
 54 | datalengths = []
 55 | 
 56 | for bev_dir in bev_dirs:
 57 |     names = fnmatch.filter(os.listdir(bev_dir), '*.png')
 58 |     
 59 |     files = []
 60 |     for name in names:
 61 |         files.append(os.path.splitext(ntpath.basename(name))[0])
 62 | 
 63 |         filelist = sorted(files,key=int)
 64 | 
 65 |     image_lists.append([f + '.png' for f in filelist])
 66 |     datalengths.append(len(names))
 67 | 
 68 | 
 69 | 
 70 | transforms = torch.nn.Sequential(T.Resize((540,640)),)
 71 | target_transforms = torch.nn.Sequential(T.Grayscale(num_output_channels=1))
 72 | 
 73 | test_data = UnityImageDataset(bev_dirs = bev_dirs, bev_depth_dirs = bev_depth_dirs, front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, 
 74 |                               transform = transforms, target_transform= target_transforms)
 75 | 
 76 | 
 77 | test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False)
 78 | 
 79 | 
 80 | device = "cuda" if torch.cuda.is_available() else "cpu"
 81 | print(f"Using {device} device")
 82 | 
 83 | model = FisheyeBEVFormer().to(device)
 84 | 
 85 | checkpoint = torch.load('./f2bev_attn_mt.pt')
 86 | 
 87 | model.load_state_dict(checkpoint['model_state_dict'])
 88 | 
 89 | 
 90 | 
 91 | def test_temporal(test_dataloader,seq_len,model,loss_fn,image_list):
 92 |     num_batches = len(test_dataloader)
 93 |     model.eval()
 94 |     stest_loss = 0
 95 |     stest_iou = 0
 96 |     htest_loss = 0
 97 |     htest_iou = 0
 98 |     with torch.no_grad():
 99 |         for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader):
100 |             inp_img_seq, can_buses_seq = dataseq
101 |             stargetseq,htargetseq = targetseq
102 |             bs = stargetseq[0].shape[0]
103 |             for ctr in range(seq_len):
104 |                 front = inp_img_seq[ctr][0]
105 |                 left = inp_img_seq[ctr][1]
106 |                 rear = inp_img_seq[ctr][2]
107 |                 right = inp_img_seq[ctr][3]
108 | 
109 | 
110 | 
111 |                 starget = stargetseq[ctr]
112 |                 htarget = htargetseq[ctr]
113 | 
114 |                 front = front.to(device)
115 |                 left = left.to(device)
116 |                 rear = rear.to(device)
117 |                 right = right.to(device)
118 | 
119 |                 starget = torch.squeeze(starget,dim=1)
120 |                 idx0 = torch.where(starget <= 0.02)
121 |                 starget[idx0] = 10
122 |                 idx1 = torch.where(starget <= 0.07)
123 |                 starget[idx1] = 11
124 |                 idx2 = torch.where(starget <= 0.22)
125 |                 starget[idx2] = 12
126 |                 idx3 = torch.where(starget <= 0.60)
127 |                 starget[idx3] = 13
128 |                 idx4 = torch.where(starget <=1)
129 |                 starget[idx4] = 14
130 |                 starget = starget - 10
131 |                 starget = starget.to(torch.int64).to(device)
132 | 
133 |                 
134 |                             
135 |                 htarget = torch.squeeze(htarget,dim=1)
136 |                 idx2 = torch.where(htarget <= 0.35)
137 |                 idx0 = torch.where(htarget >= 0.69)
138 |                 htarget[htarget >= 0] = 1
139 |                 htarget[idx2] = 2
140 |                 htarget[idx0] = 0
141 |                 htarget = htarget.to(torch.int64).to(device)
142 | 
143 | 
144 |                 can_buses = can_buses_seq[ctr]
145 | 
146 | 
147 |                 if batch_idx == 0:
148 |                     prev_bev = None
149 | 
150 |                 else:
151 |                     prev_bev = torch.Tensor([np.load('./predictions/f2bev_attn_mt/bevfeatures/'+image_list[batch_idx-1].split('.')[0]+'.npy')]).to(device)
152 |                 spred,_, hpred, _ ,for_prev_bev = model(front,left,rear,right, list(can_buses),prev_bev)
153 | 
154 |                 for i,p in enumerate(for_prev_bev):
155 |                     np.save('./predictions/f2bev_attn_mt/bevfeatures/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy())
156 |                 for i,p in enumerate(hpred):
157 |                     np.save('./predictions/f2bev_attn_mt/predfull/ce/'+image_list[batch_idx].split('.')[0]+'h.npy',p.detach().cpu().numpy())
158 |                 for i,p in enumerate(spred):
159 |                     np.save('./predictions/f2bev_attn_mt/predfull/ce/'+image_list[batch_idx].split('.')[0]+'s.npy',p.detach().cpu().numpy())
160 | 
161 |                 stest_loss += loss_fn(spred, starget).item()
162 |                 stest_iou += jaccard_index(spred,starget.to(spred.device),num_classes=5,average='none')
163 |                 htest_loss += loss_fn(hpred, htarget).item()
164 |                 htest_iou += jaccard_index(hpred,htarget.to(hpred.device),num_classes=3,average='none')
165 | 
166 | 
167 |     htest_loss/= num_batches*seq_len
168 |     htest_iou /= num_batches    
169 |     stest_loss/= num_batches*seq_len
170 |     stest_iou /= num_batches
171 | 
172 |     print(f"Test Error: Avg seg loss: {stest_loss:>8f} \n")    
173 |     print(f"Test Error: Avg ht loss: {htest_loss:>8f} \n")    
174 | 
175 |     print("Segmentation: ", stest_iou)
176 |     print("Height: ", htest_iou)
177 |     return stest_loss,htest_loss
178 | 
179 | all_images = []
180 | for test_list in image_lists:
181 |     all_images = all_images + test_list
182 |     
183 | loss = nn.CrossEntropyLoss() #nn.MSELoss() #BinaryFocalLoss(alpha=0.25,gamma=2)
184 | test_temporal(test_dataloader,seq_len,model,loss,all_images)
185 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/test_f2bev_conv_mt.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Dec 22 11:47:19 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | import torch,time,ntpath #cv2
  9 | from torch import nn, optim
 10 | import numpy as np
 11 | from test_loader_multi_task import UnityImageDataset
 12 | import os,fnmatch
 13 | from torch.utils.data import DataLoader
 14 | from model_f2bev_conv_mt import FisheyeBEVFormer
 15 | import torchvision.transforms as T
 16 | from torchmetrics.functional import jaccard_index
 17 | from losses.focal import BinaryFocalLoss
 18 | 
 19 | 
 20 | def numpy_sigmoid(x):
 21 |     return 1/(1 + np.exp(-x))
 22 | 
 23 | if not os.path.exists('./predictions/'):
 24 |    os.makedirs('./predictions/')
 25 |    
 26 | if not os.path.exists('./predictions/f2bev_conv_mt/'):
 27 |    os.makedirs('./predictions/f2bev_conv_mt/')
 28 | if not os.path.exists('./predictions/f2bev_conv_mt/bevfeatures'):
 29 |    os.makedirs('./predictions/f2bev_conv_mt/features')
 30 | if not os.path.exists('./predictions/f2bev_conv_mt/predfull/'):
 31 |    os.makedirs('./predictions/f2bev_conv_mt/predfull/')
 32 | if not os.path.exists('./predictions/f2bev_conv_mt/predfull/ce/'):
 33 |    os.makedirs('./predictions/f2bev_conv_mt/predfull/ce/')
 34 | 
 35 | 
 36 | num_data_sequences = 20
 37 | 
 38 | bev_dirs = ['./data/images'+str(i)+'/test/seg/bev' for i in range(num_data_sequences)]
 39 | bev_depth_dirs = ['./data/images'+str(i)+'/test/depth' for i in range(num_data_sequences)]
 40 | front_dirs = ['./data/images'+str(i)+'/test/rgb/front' for i in range(num_data_sequences)]
 41 | left_dirs = ['./data/images'+str(i)+'/test/rgb/left' for i in range(num_data_sequences)]
 42 | rear_dirs = ['./data/images'+str(i)+'/test/rgb/rear' for i in range(num_data_sequences)]
 43 | right_dirs = ['./data/images'+str(i)+'/test/rgb/right' for i in range(num_data_sequences)]
 44 | config_dirs = ['./data/images'+str(i)+'/test/cameraconfig' for i in range(num_data_sequences)]
 45 | 
 46 | 
 47 | seq_len = 1
 48 | 
 49 | image_lists = []
 50 | 
 51 | datalengths = []
 52 | 
 53 | for bev_dir in bev_dirs:
 54 |     names = fnmatch.filter(os.listdir(bev_dir), '*.png')
 55 |     
 56 |     files = []
 57 |     for name in names:
 58 |         files.append(os.path.splitext(ntpath.basename(name))[0])
 59 | 
 60 |         filelist = sorted(files,key=int)
 61 | 
 62 |     image_lists.append([f + '.png' for f in filelist])
 63 |     datalengths.append(len(names))
 64 | 
 65 | 
 66 | 
 67 | transforms = torch.nn.Sequential(T.Resize((540,640)),)
 68 | target_transforms = torch.nn.Sequential(T.Grayscale(num_output_channels=1))
 69 | 
 70 | test_data = UnityImageDataset(bev_dirs = bev_dirs, bev_depth_dirs = bev_depth_dirs, front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, 
 71 |                               transform = transforms, target_transform= target_transforms)
 72 | 
 73 | 
 74 | test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False)
 75 | 
 76 | 
 77 | device = "cuda" if torch.cuda.is_available() else "cpu"
 78 | #device = "cpu"
 79 | print(f"Using {device} device")
 80 | 
 81 | model = FisheyeBEVFormer().to(device)
 82 | 
 83 | checkpoint = torch.load('./f2bev_conv_mt.pt')
 84 | 
 85 | model.load_state_dict(checkpoint['model_state_dict'])
 86 | 
 87 | 
 88 | 
 89 | def test_temporal(test_dataloader,seq_len,model,loss_fn,image_list):
 90 |     num_batches = len(test_dataloader)
 91 |     model.eval()
 92 |     stest_loss = 0
 93 |     stest_iou = 0
 94 |     htest_loss = 0
 95 |     htest_iou = 0
 96 |     with torch.no_grad():
 97 |         for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader):
 98 |             inp_img_seq, can_buses_seq = dataseq
 99 |             stargetseq,htargetseq = targetseq
100 |             bs = stargetseq[0].shape[0]
101 |             for ctr in range(seq_len):
102 |                 front = inp_img_seq[ctr][0]
103 |                 left = inp_img_seq[ctr][1]
104 |                 rear = inp_img_seq[ctr][2]
105 |                 right = inp_img_seq[ctr][3]
106 | 
107 | 
108 | 
109 |                 starget = stargetseq[ctr]
110 |                 htarget = htargetseq[ctr]
111 | 
112 |                 front = front.to(device)
113 |                 left = left.to(device)
114 |                 rear = rear.to(device)
115 |                 right = right.to(device)
116 | 
117 |                 starget = torch.squeeze(starget,dim=1)
118 |                 idx0 = torch.where(starget <= 0.02)
119 |                 starget[idx0] = 10
120 |                 idx1 = torch.where(starget <= 0.07)
121 |                 starget[idx1] = 11
122 |                 idx2 = torch.where(starget <= 0.22)
123 |                 starget[idx2] = 12
124 |                 idx3 = torch.where(starget <= 0.60)
125 |                 starget[idx3] = 13
126 |                 idx4 = torch.where(starget <=1)
127 |                 starget[idx4] = 14
128 |                 starget = starget - 10
129 |                 starget = starget.to(torch.int64).to(device)
130 | 
131 |                 
132 |                             
133 |                 htarget = torch.squeeze(htarget,dim=1)
134 |                 idx2 = torch.where(htarget <= 0.35)
135 |                 idx0 = torch.where(htarget >= 0.69)
136 |                 htarget[htarget >= 0] = 1
137 |                 htarget[idx2] = 2
138 |                 htarget[idx0] = 0
139 |                 htarget = htarget.to(torch.int64).to(device)
140 | 
141 | 
142 |                 can_buses = can_buses_seq[ctr]
143 | 
144 | 
145 |                 if batch_idx == 0:
146 |                     prev_bev = None
147 | 
148 |                 else:
149 |                     prev_bev = torch.Tensor([np.load('./predictions/f2bev_conv_mt/bevfeatures/'+image_list[batch_idx-1].split('.')[0]+'.npy')]).to(device)
150 |                 spred, hpred, for_prev_bev = model(front,left,rear,right, list(can_buses),prev_bev)
151 | 
152 |                 
153 |                 for i,p in enumerate(for_prev_bev):
154 |                     np.save('./predictions/f2bev_conv_mt/bevfeatures/'+image_list[batch_idx].split('.')[0]+'.npy',p.detach().cpu().numpy())
155 |                 for i,p in enumerate(hpred):
156 |                     np.save('./predictions/f2bev_conv_mt/predfull/ce/'+image_list[batch_idx].split('.')[0]+'h.npy',p.detach().cpu().numpy())
157 |                 for i,p in enumerate(spred):
158 |                     np.save('./predictions/f2bev_conv_mt/predfull/ce/'+image_list[batch_idx].split('.')[0]+'s.npy',p.detach().cpu().numpy())
159 | 
160 |                 stest_loss += loss_fn(spred, starget).item()
161 |                 stest_iou += jaccard_index(spred,starget.to(spred.device),num_classes=5,average='none')
162 |                 htest_loss += loss_fn(hpred, htarget).item()
163 |                 htest_iou += jaccard_index(hpred,htarget.to(hpred.device),num_classes=3,average='none')
164 | 
165 | 
166 |     htest_loss/= num_batches*seq_len
167 |     htest_iou /= num_batches    
168 |     stest_loss/= num_batches*seq_len
169 |     stest_iou /= num_batches
170 | 
171 |     print(f"Test Error: Avg seg loss: {stest_loss:>8f} \n")    
172 |     print(f"Test Error: Avg ht loss: {htest_loss:>8f} \n")    
173 | 
174 |     print("Segmentation: ", stest_iou)
175 |     print("Height: ", htest_iou)
176 |     return stest_loss,htest_loss
177 | 
178 | all_images = []
179 | for test_list in image_lists:
180 |     all_images = all_images + test_list
181 |     
182 | loss = nn.CrossEntropyLoss() #nn.MSELoss() #BinaryFocalLoss(alpha=0.25,gamma=2)
183 | test_temporal(test_dataloader,seq_len,model,loss,all_images)
184 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/bifpn_configs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | """
  5 | Created on Wed Dec 28 11:35:28 2022
  6 | 
  7 | @author: Ekta
  8 | """
  9 | 
 10 | import itertools
 11 | 
 12 | from omegaconf import OmegaConf
 13 | 
 14 | 
 15 | def bifpn_config(min_level, max_level, weight_method=None):
 16 |     """BiFPN config.
 17 |     Adapted from https://github.com/google/automl/blob/56815c9986ffd4b508fe1d68508e268d129715c1/efficientdet/keras/fpn_configs.py
 18 |     """
 19 |     p = OmegaConf.create()
 20 |     weight_method = weight_method or 'fastattn'
 21 | 
 22 |     num_levels = max_level - min_level + 1
 23 |     node_ids = {min_level + i: [i] for i in range(num_levels)}
 24 | 
 25 |     level_last_id = lambda level: node_ids[level][-1]
 26 |     level_all_ids = lambda level: node_ids[level]
 27 |     id_cnt = itertools.count(num_levels)
 28 | 
 29 |     p.nodes = []
 30 |     for i in range(max_level - 1, min_level - 1, -1):
 31 |         # top-down path.
 32 |         p.nodes.append({
 33 |             'feat_level': i,
 34 |             'inputs_offsets': [level_last_id(i), level_last_id(i + 1)],
 35 |             'weight_method': weight_method,
 36 |         })
 37 |         node_ids[i].append(next(id_cnt))
 38 | 
 39 |     for i in range(min_level + 1, max_level + 1):
 40 |         # bottom-up path.
 41 |         p.nodes.append({
 42 |             'feat_level': i,
 43 |             'inputs_offsets': level_all_ids(i) + [level_last_id(i - 1)],
 44 |             'weight_method': weight_method,
 45 |         })
 46 |         node_ids[i].append(next(id_cnt))
 47 |     return p
 48 | 
 49 | 
 50 | def panfpn_config(min_level, max_level, weight_method=None):
 51 |     """PAN FPN config.
 52 | 
 53 |     This defines FPN layout from Path Aggregation Networks as an alternate to
 54 |     BiFPN, it does not implement the full PAN spec.
 55 | 
 56 |     Paper: https://arxiv.org/abs/1803.01534
 57 |     """
 58 |     p = OmegaConf.create()
 59 |     weight_method = weight_method or 'fastattn'
 60 | 
 61 |     num_levels = max_level - min_level + 1
 62 |     node_ids = {min_level + i: [i] for i in range(num_levels)}
 63 |     level_last_id = lambda level: node_ids[level][-1]
 64 |     id_cnt = itertools.count(num_levels)
 65 | 
 66 |     p.nodes = []
 67 |     for i in range(max_level, min_level - 1, -1):
 68 |         # top-down path.
 69 |         offsets = [level_last_id(i), level_last_id(i + 1)] if i != max_level else [level_last_id(i)]
 70 |         p.nodes.append({
 71 |             'feat_level': i,
 72 |             'inputs_offsets': offsets,
 73 |             'weight_method': weight_method,
 74 |         })
 75 |         node_ids[i].append(next(id_cnt))
 76 | 
 77 |     for i in range(min_level, max_level + 1):
 78 |         # bottom-up path.
 79 |         offsets = [level_last_id(i), level_last_id(i - 1)] if i != min_level else [level_last_id(i)]
 80 |         p.nodes.append({
 81 |             'feat_level': i,
 82 |             'inputs_offsets': offsets,
 83 |             'weight_method': weight_method,
 84 |         })
 85 |         node_ids[i].append(next(id_cnt))
 86 | 
 87 |     return p
 88 | 
 89 | 
 90 | def qufpn_config(min_level, max_level, weight_method=None):
 91 |     """A dynamic quad fpn config that can adapt to different min/max levels.
 92 | 
 93 |     It extends the idea of BiFPN, and has four paths:
 94 |         (up_down -> bottom_up) + (bottom_up -> up_down).
 95 | 
 96 |     Paper: https://ieeexplore.ieee.org/document/9225379
 97 |     Ref code: From contribution to TF EfficientDet
 98 |     https://github.com/google/automl/blob/eb74c6739382e9444817d2ad97c4582dbe9a9020/efficientdet/keras/fpn_configs.py
 99 |     """
100 |     p = OmegaConf.create()
101 |     weight_method = weight_method or 'fastattn'
102 |     quad_method = 'fastattn'
103 |     num_levels = max_level - min_level + 1
104 |     node_ids = {min_level + i: [i] for i in range(num_levels)}
105 |     level_last_id = lambda level: node_ids[level][-1]
106 |     level_all_ids = lambda level: node_ids[level]
107 |     level_first_id = lambda level: node_ids[level][0]
108 |     id_cnt = itertools.count(num_levels)
109 | 
110 |     p.nodes = []
111 |     for i in range(max_level - 1, min_level - 1, -1):
112 |         # top-down path 1.
113 |         p.nodes.append({
114 |             'feat_level': i,
115 |             'inputs_offsets': [level_last_id(i), level_last_id(i + 1)],
116 |             'weight_method': weight_method
117 |         })
118 |         node_ids[i].append(next(id_cnt))
119 |     node_ids[max_level].append(node_ids[max_level][-1])
120 | 
121 |     for i in range(min_level + 1, max_level):
122 |         # bottom-up path 2.
123 |         p.nodes.append({
124 |             'feat_level': i,
125 |             'inputs_offsets': level_all_ids(i) + [level_last_id(i - 1)],
126 |             'weight_method': weight_method
127 |         })
128 |         node_ids[i].append(next(id_cnt))
129 | 
130 |     i = max_level
131 |     p.nodes.append({
132 |         'feat_level': i,
133 |         'inputs_offsets': [level_first_id(i)] + [level_last_id(i - 1)],
134 |         'weight_method': weight_method
135 |     })
136 |     node_ids[i].append(next(id_cnt))
137 |     node_ids[min_level].append(node_ids[min_level][-1])
138 | 
139 |     for i in range(min_level + 1, max_level + 1, 1):
140 |         # bottom-up path 3.
141 |         p.nodes.append({
142 |             'feat_level': i,
143 |             'inputs_offsets': [
144 |                 level_first_id(i), level_last_id(i - 1) if i != min_level + 1 else level_first_id(i - 1)],
145 |             'weight_method': weight_method
146 |         })
147 |         node_ids[i].append(next(id_cnt))
148 |     node_ids[min_level].append(node_ids[min_level][-1])
149 | 
150 |     for i in range(max_level - 1, min_level, -1):
151 |         # top-down path 4.
152 |         p.nodes.append({
153 |             'feat_level': i,
154 |             'inputs_offsets': [node_ids[i][0]] + [node_ids[i][-1]] + [level_last_id(i + 1)],
155 |             'weight_method': weight_method
156 |         })
157 |         node_ids[i].append(next(id_cnt))
158 |     i = min_level
159 |     p.nodes.append({
160 |         'feat_level': i,
161 |         'inputs_offsets': [node_ids[i][0]] + [level_last_id(i + 1)],
162 |         'weight_method': weight_method
163 |     })
164 |     node_ids[i].append(next(id_cnt))
165 |     node_ids[max_level].append(node_ids[max_level][-1])
166 | 
167 |     # NOTE: the order of the quad path is reversed from the original, my code expects the output of
168 |     # each FPN repeat to be same as input from backbone, in order of increasing reductions
169 |     for i in range(min_level, max_level + 1):
170 |         # quad-add path.
171 |         p.nodes.append({
172 |             'feat_level': i,
173 |             'inputs_offsets': [node_ids[i][2], node_ids[i][4]],
174 |             'weight_method': quad_method
175 |         })
176 |         node_ids[i].append(next(id_cnt))
177 | 
178 |     return p
179 | 
180 | 
181 | def get_fpn_config(fpn_name, min_level=3, max_level=7):
182 |     if not fpn_name:
183 |         fpn_name = 'bifpn_fa'
184 |     name_to_config = {
185 |         'bifpn_sum': bifpn_config(min_level=min_level, max_level=max_level, weight_method='sum'),
186 |         'bifpn_attn': bifpn_config(min_level=min_level, max_level=max_level, weight_method='attn'),
187 |         'bifpn_fa': bifpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'),
188 |         'pan_sum': panfpn_config(min_level=min_level, max_level=max_level, weight_method='sum'),
189 |         'pan_fa': panfpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'),
190 |         'qufpn_sum': qufpn_config(min_level=min_level, max_level=max_level, weight_method='sum'),
191 |         'qufpn_fa': qufpn_config(min_level=min_level, max_level=max_level, weight_method='fastattn'),
192 |     }
193 |     return name_to_config[fpn_name]
194 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/loader_single_task.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Wed Dec 21 12:15:36 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | import torch
 10 | from torch.utils.data import Dataset
 11 | #from torchvision import datasets
 12 | #from torchvision.transforms import ToTensor
 13 | #import matplotlib.pyplot as plt
 14 | #from torch.utils.data import DataLoader
 15 | import numpy as np
 16 | import os#,fnmatch
 17 | from torchvision.io import read_image
 18 | import random
 19 | class UnityImageDataset(Dataset):
 20 |     def __init__(self, bev_dirs, front_dirs, left_dirs, rear_dirs, right_dirs, image_lists, config_dirs, seq_len, datalengths, num_data_sequences, transform=None, target_transform=None):
 21 |         self.bev_dirs = bev_dirs
 22 |         self.front_dirs = front_dirs
 23 |         self.left_dirs = left_dirs
 24 |         self.rear_dirs = rear_dirs
 25 |         self.right_dirs = right_dirs
 26 |         self.image_lists = image_lists
 27 |         self.config_dirs = config_dirs
 28 |         self.transform = transform
 29 |         self.target_transform = target_transform
 30 |         self.seq_len = seq_len
 31 |         self.datalengths = datalengths
 32 |         self.num_data_sequences = num_data_sequences
 33 | 
 34 |     def __len__(self):
 35 |         total = 0
 36 |         for count in self.datalengths:
 37 |             total = total + count
 38 |         return total
 39 |     
 40 |     def find_which_sequence(self,idx):
 41 | 
 42 |         eff_data_lens = [x for x in self.datalengths]
 43 | 
 44 |         # seq_idx = 0
 45 |         # if idx > -1 and idx < eff_data_lens[0]: 
 46 |         #     seq_idx = 0
 47 |         # elif idx > eff_data_lens[0] -1 and idx < (eff_data_lens[0] + eff_data_lens[1]) : 
 48 |         #     seq_idx = 1
 49 |         # elif idx > (eff_data_lens[0] + eff_data_lens[1]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) : 
 50 |         #     seq_idx = 2
 51 |         # elif idx > (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2] + eff_data_lens[3]):
 52 |         #     seq_idx = 3            
 53 |         # else:
 54 |         #     raise NotImplementedError
 55 |         
 56 |         currptr = 0
 57 |         nextptr =  eff_data_lens[0]
 58 |         
 59 |         for i in range(self.num_data_sequences):
 60 |             if i == 0:
 61 |                 currptr = 0
 62 |                 nextptr =  eff_data_lens[0]
 63 |                 
 64 |                 if idx > currptr -1 and idx < nextptr:
 65 |                     seq_idx = 0
 66 |             else:
 67 |                 currptr = sum(eff_data_lens[:i])
 68 |                 nextptr = sum(eff_data_lens[:i+1])
 69 |                 if idx > currptr -1 and idx < nextptr:
 70 |                     seq_idx = i
 71 |                     
 72 |         
 73 |         # if idx > -1 and idx < eff_data_lens[0]: 
 74 |         #     seq_idx = 0
 75 |         # elif idx > eff_data_lens[0] -1 and idx < (eff_data_lens[0] + eff_data_lens[1]) : 
 76 |         #     seq_idx = 1
 77 |         # elif idx > (eff_data_lens[0] + eff_data_lens[1]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) : 
 78 |         #     seq_idx = 2
 79 |         # elif idx > (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2] + eff_data_lens[3]):
 80 |         #     seq_idx = 3            
 81 |         # else:
 82 |         #     raise NotImplementedError
 83 | 
 84 | 
 85 |         #print(idx, seq_idx)
 86 |         return seq_idx
 87 |         
 88 |     def get_id_in_seq(self,seq_idx,idx):
 89 |         eff_data_lens = [x for x in self.datalengths]
 90 |         
 91 |         # if seq_idx == 0:
 92 |         #     subtract = 0
 93 |         # elif seq_idx == 1:
 94 |         #     subtract = eff_data_lens[0] 
 95 |         # elif seq_idx == 2:
 96 |         #     subtract = eff_data_lens[0]  + eff_data_lens[1] 
 97 |         # elif seq_idx == 3:
 98 |         #     subtract = eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]  
 99 | 
100 |         if seq_idx == 0:
101 |             subtract = 0
102 |         else:
103 |             subtract = sum(eff_data_lens[:seq_idx])
104 |         return idx - subtract
105 |         
106 |     def read_config_for_bevposrot(self,configdir,filename):
107 |         with open(os.path.join(configdir, filename)) as f:
108 |             lines = f.readlines()
109 |             
110 |         bpos = [float(lines[5].split(',')[1]),float(lines[5].split(',')[2]),float(lines[5].split(',')[3])]
111 |         brot = [float(lines[5].split(',')[4]),float(lines[5].split(',')[5]),float(lines[5].split(',')[6])]
112 |         return [bpos,brot]
113 |     
114 |     
115 |     def __getitem__(self, idx):
116 |         
117 |         seq_idx = self.find_which_sequence(idx)
118 |         
119 |         bev_dir = self.bev_dirs[seq_idx]
120 |         image_list = self.image_lists[seq_idx]
121 |         front_dir = self.front_dirs[seq_idx]
122 |         left_dir = self.left_dirs[seq_idx]
123 |         rear_dir = self.rear_dirs[seq_idx]
124 |         right_dir = self.right_dirs[seq_idx]
125 |         config_dir = self.config_dirs[seq_idx]
126 |         
127 |         idinseq = self.get_id_in_seq(seq_idx,idx)
128 |         
129 |         return_images_tensor = []
130 |         return_target = []
131 |         return_can_bus = []
132 |         ##first image
133 |         
134 |         index_list = list(range(idinseq-self.seq_len, idinseq))
135 |         random.shuffle(index_list)
136 |         index_list = sorted(index_list[1:])
137 |         index_list.append(idinseq)
138 |         
139 |         for idxctr,cidx in enumerate(index_list):
140 |             cidx = max(0, cidx)
141 |             tar_path = os.path.join(bev_dir, image_list[cidx])
142 |             tar = read_image(tar_path)[:,100:~99,100:~99]
143 |             tar = torch.mul(tar.float(),1/255)
144 |             if self.target_transform:
145 |                 tar = self.target_transform(tar)
146 |             inp = []
147 |             for cam_views in [front_dir, left_dir, rear_dir, right_dir]:
148 |                 img_path = os.path.join(cam_views, image_list[cidx])
149 |                 image = read_image(img_path)
150 |                 if image.shape[0] == 4:
151 |                     image = image[0:3,:,:]
152 |                 image = torch.mul(image.float(),1/255)
153 |                 # if self.transform:
154 |                 #     image = self.transform(image)
155 |                 inp.append(image)
156 |                 
157 |             [bpos,brot] = self.read_config_for_bevposrot(config_dir,image_list[cidx].split('.')[0]+'.txt')
158 |             
159 | 
160 |             can_bus = np.zeros((5,))
161 |             if idxctr == 0:
162 |                 #pos
163 |                 can_bus[0] = 0
164 |                 can_bus[1] = 0
165 |                 can_bus[2] = 0
166 |                 #angle
167 |                 can_bus[3] = brot[1] #(90 - bevrot[num,1])/180*np.pi ##ego_angle is kept unchanged .. i.e. no delta ##before that 270 -
168 |                 can_bus[4] = 0
169 |                 
170 |             else:
171 |                 can_bus[0] = bpos[0] - return_can_bus[idxctr-1][0]
172 |                 can_bus[1] = bpos[2] - return_can_bus[idxctr-1][2]
173 |                 can_bus[2] = bpos[1] - return_can_bus[idxctr-1][1]
174 |                 can_bus[3] = brot[1]
175 |                 
176 |                 can_bus[4] =  brot[1]  - return_can_bus[idxctr-1][3]
177 | 
178 |                             
179 |             return_images_tensor.append(torch.stack(inp))
180 |             return_target.append(tar)
181 |             return_can_bus.append(can_bus)
182 |         
183 |         
184 |         if self.transform:
185 |             return_images_tensor = self.transform(torch.cat(return_images_tensor, dim=0))
186 | 
187 |         #return_images = [list(torch.split(x, 4)) for x in list(torch.split(return_images, self.seq_len))] #because 4 camera views
188 |         return_images = []
189 |         for frameidx in range(self.seq_len):
190 |             inp = []
191 |             for camnum in range(4): #4 cam views
192 |                 inp.append(return_images_tensor[(4*frameidx) + camnum, :,:,:])
193 |             return_images.append(inp)
194 |             
195 |         #return torch.cat((inp[0],inp[1],inp[2],inp[3]),axis=0), tar
196 |         return [return_images,return_can_bus], return_target
197 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/test_loader_single_task.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Dec 22 11:53:01 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | 
 10 | import torch
 11 | from torch.utils.data import Dataset
 12 | #from torchvision import datasets
 13 | #from torchvision.transforms import ToTensor
 14 | #import matplotlib.pyplot as plt
 15 | #from torch.utils.data import DataLoader
 16 | import numpy as np
 17 | import os#,fnmatch
 18 | from torchvision.io import read_image
 19 | import random
 20 | 
 21 | ##this will have seq len = 1
 22 | 
 23 | class UnityImageDataset(Dataset):
 24 |     def __init__(self, bev_dirs, front_dirs, left_dirs, rear_dirs, right_dirs, image_lists, config_dirs, seq_len, datalengths, num_data_sequences, transform=None, target_transform=None):
 25 |         self.bev_dirs = bev_dirs
 26 |         self.front_dirs = front_dirs
 27 |         self.left_dirs = left_dirs
 28 |         self.rear_dirs = rear_dirs
 29 |         self.right_dirs = right_dirs
 30 |         self.image_lists = image_lists
 31 |         self.config_dirs = config_dirs
 32 |         self.transform = transform
 33 |         self.target_transform = target_transform
 34 |         self.seq_len = seq_len
 35 |         self.datalengths = datalengths
 36 |         self.num_data_sequences = num_data_sequences
 37 | 
 38 |     def __len__(self):
 39 |         total = 0
 40 |         for count in self.datalengths:
 41 |             total = total + count
 42 |         return total
 43 |     
 44 |     def find_which_sequence(self,idx):
 45 | 
 46 |         eff_data_lens = [x for x in self.datalengths]
 47 | 
 48 |         # seq_idx = 0
 49 |         # if idx > -1 and idx < eff_data_lens[0]: 
 50 |         #     seq_idx = 0
 51 |         # elif idx > eff_data_lens[0] -1 and idx < (eff_data_lens[0] + eff_data_lens[1]) : 
 52 |         #     seq_idx = 1
 53 |         # elif idx > (eff_data_lens[0] + eff_data_lens[1]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) : 
 54 |         #     seq_idx = 2
 55 |         # elif idx > (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2] + eff_data_lens[3]):
 56 |         #     seq_idx = 3            
 57 |         # else:
 58 |         #     raise NotImplementedError
 59 |         
 60 |         currptr = 0
 61 |         nextptr =  eff_data_lens[0]
 62 |         
 63 |         for i in range(self.num_data_sequences):
 64 |             if i == 0:
 65 |                 currptr = 0
 66 |                 nextptr =  eff_data_lens[0]
 67 |                 
 68 |                 if idx > currptr -1 and idx < nextptr:
 69 |                     seq_idx = 0
 70 |             else:
 71 |                 currptr = sum(eff_data_lens[:i])
 72 |                 nextptr = sum(eff_data_lens[:i+1])
 73 |                 if idx > currptr -1 and idx < nextptr:
 74 |                     seq_idx = i
 75 |                     
 76 |         
 77 |         # if idx > -1 and idx < eff_data_lens[0]: 
 78 |         #     seq_idx = 0
 79 |         # elif idx > eff_data_lens[0] -1 and idx < (eff_data_lens[0] + eff_data_lens[1]) : 
 80 |         #     seq_idx = 1
 81 |         # elif idx > (eff_data_lens[0] + eff_data_lens[1]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) : 
 82 |         #     seq_idx = 2
 83 |         # elif idx > (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]) - 1 and idx < (eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2] + eff_data_lens[3]):
 84 |         #     seq_idx = 3            
 85 |         # else:
 86 |         #     raise NotImplementedError
 87 | 
 88 | 
 89 |         #print(idx, seq_idx)
 90 |         return seq_idx
 91 |         
 92 |     def get_id_in_seq(self,seq_idx,idx):
 93 |         eff_data_lens = [x for x in self.datalengths]
 94 |         
 95 |         # if seq_idx == 0:
 96 |         #     subtract = 0
 97 |         # elif seq_idx == 1:
 98 |         #     subtract = eff_data_lens[0] 
 99 |         # elif seq_idx == 2:
100 |         #     subtract = eff_data_lens[0]  + eff_data_lens[1] 
101 |         # elif seq_idx == 3:
102 |         #     subtract = eff_data_lens[0] + eff_data_lens[1] + eff_data_lens[2]  
103 | 
104 |         if seq_idx == 0:
105 |             subtract = 0
106 |         else:
107 |             subtract = sum(eff_data_lens[:seq_idx])
108 |         return idx - subtract
109 |         
110 |     def read_config_for_bevposrot(self,configdir,filename):
111 |         with open(os.path.join(configdir, filename)) as f:
112 |             lines = f.readlines()
113 |             
114 |         bpos = [float(lines[5].split(',')[1]),float(lines[5].split(',')[2]),float(lines[5].split(',')[3])]
115 |         brot = [float(lines[5].split(',')[4]),float(lines[5].split(',')[5]),float(lines[5].split(',')[6])]
116 |         return [bpos,brot]
117 |     
118 |     
119 |     def __getitem__(self, idx):
120 |         
121 |         seq_idx = self.find_which_sequence(idx)
122 |         
123 |         bev_dir = self.bev_dirs[seq_idx]
124 |         image_list = self.image_lists[seq_idx]
125 |         front_dir = self.front_dirs[seq_idx]
126 |         left_dir = self.left_dirs[seq_idx]
127 |         rear_dir = self.rear_dirs[seq_idx]
128 |         right_dir = self.right_dirs[seq_idx]
129 |         config_dir = self.config_dirs[seq_idx]
130 |         
131 |         idinseq = self.get_id_in_seq(seq_idx,idx)
132 |         
133 |         return_images_tensor = []
134 |         return_target = []
135 |         return_can_bus = []
136 |         ##first image
137 |         
138 |         index_list = list(range(idinseq-self.seq_len, idinseq))
139 |         random.shuffle(index_list)
140 |         index_list = sorted(index_list[1:])
141 |         index_list.append(idinseq)
142 |         
143 |         for idxctr,cidx in enumerate(index_list):
144 |             cidx = max(0, cidx)
145 |             tar_path = os.path.join(bev_dir, image_list[cidx])
146 |             tar = read_image(tar_path)[:,100:~99,100:~99]
147 |             tar = torch.mul(tar.float(),1/255)
148 |             if self.target_transform:
149 |                 tar = self.target_transform(tar)
150 |             inp = []
151 |             for cam_views in [front_dir, left_dir, rear_dir, right_dir]:
152 |                 img_path = os.path.join(cam_views, image_list[cidx])
153 |                 image = read_image(img_path)
154 |                 image = torch.mul(image.float(),1/255)
155 |                 # if self.transform:
156 |                 #     image = self.transform(image)
157 |                 inp.append(image)
158 |                 
159 |             [bpos,brot] = self.read_config_for_bevposrot(config_dir,image_list[cidx].split('.')[0]+'.txt')
160 |             
161 |             can_bus = np.zeros((5,))
162 |             if cidx == 0:
163 |                 #pos
164 |                 can_bus[0] = 0
165 |                 can_bus[1] = 0
166 |                 can_bus[2] = 0
167 |                 #angle
168 |                 can_bus[3] = brot[1] #(90 - bevrot[num,1])/180*np.pi ##ego_angle is kept unchanged .. i.e. no delta ##before that 270 -
169 |                 can_bus[4] = 0
170 |                 
171 |             else:
172 |                 [prev_bpos,prev_brot] = self.read_config_for_bevposrot(config_dir,image_list[cidx-1].split('.')[0]+'.txt')
173 |                 
174 |                 can_bus[0] = bpos[0] - prev_bpos[0]
175 |                 can_bus[1] = bpos[2] - prev_bpos[2]
176 |                 can_bus[2] = bpos[1] - prev_bpos[1]
177 |                 can_bus[3] = brot[1]
178 |                 
179 |                 can_bus[4] =  brot[1]  - prev_brot[1]
180 |                             
181 |             return_images_tensor.append(torch.stack(inp))
182 |             return_target.append(tar)
183 |             return_can_bus.append(can_bus)
184 |         
185 |         
186 |         if self.transform:
187 |             return_images_tensor = self.transform(torch.cat(return_images_tensor, dim=0))
188 | 
189 |         #return_images = [list(torch.split(x, 4)) for x in list(torch.split(return_images, self.seq_len))] #because 4 camera views
190 |         return_images = []
191 |         for frameidx in range(self.seq_len):
192 |             inp = []
193 |             for camnum in range(4): #4 cam views
194 |                 inp.append(return_images_tensor[(4*frameidx) + camnum, :,:,:])
195 |             return_images.append(inp)
196 |             
197 |         #return torch.cat((inp[0],inp[1],inp[2],inp[3]),axis=0), tar
198 |         return [return_images,return_can_bus], return_target
199 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/pre_computation/computeNormalizedReferencePoints.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Feb  2 10:27:02 2023
  5 | 
  6 | @author: smartslab
  7 | """
  8 | 
  9 | import cv2,fnmatch,os
 10 | import numpy as np
 11 | import matplotlib.pyplot as plt
 12 | from scipy.spatial.transform import Rotation as R
 13 | 
 14 | 
 15 | 
 16 | files =  fnmatch.filter(os.listdir('./forward_looking_camera_model/data/bev'),'*.png')
 17 | 
 18 | 
 19 | 
 20 | def load_yaml(filename):
 21 |     content = cv2.FileStorage(filename, cv2.FILE_STORAGE_READ)
 22 |     K = content.getNode("K").mat()
 23 |     D = content.getNode("D").mat()
 24 |     xi = content.getNode("xi").mat()
 25 |     return K,D,xi
 26 |     
 27 | 
 28 | def convertPoseFromUnityToOpenCV(pose):
 29 |     opencvpose = {}
 30 |     for key,value in pose.items():
 31 |         # ======================= UnityToOpenCV: Flip y axis ========================
 32 |         opencvpose[key] = [[-1,1,-1]*value[0] , [1,-1,1]*value[1]]
 33 |         # ===========================================================================
 34 |     return opencvpose
 35 | 
 36 | 
 37 | def convertOpenCVPoseToRvecTvec(opencvpose):
 38 |     extrinsics = {}
 39 |     for key,value in opencvpose.items():
 40 |         rot = value[0]
 41 |         # ====================== Use "ZYX" for extrinsic rotation =================== 
 42 |         intermediater = R.from_euler('ZYX',[rot[2], rot[1], rot[0]])
 43 |         # =========================================================================== 
 44 |         rotmat = intermediater.as_matrix()
 45 |         # ========================= cam2world -> world2cam ==========================
 46 |         '''
 47 |         cam2world: R, t
 48 |         world2cam: R', t'
 49 |         R' = R.T
 50 |         t' = - R.T @ t
 51 |         '''
 52 |         rvec,_ = cv2.Rodrigues(rotmat.T) 
 53 |         tvec = -rotmat.T @ value[1].reshape(3,1)
 54 |         # ===========================================================================
 55 |         extrinsics[key] = [rvec,tvec]
 56 |     return extrinsics
 57 |         
 58 | 
 59 | def computeRealWorldLocationOfBEVPixels(h,w, resolution,height_anchors,scale,offset):
 60 |     z = np.arange(0, h, 1)
 61 |     x = np.arange(0, w, 1)
 62 |     zprime = ((offset/scale)+h/2-z)*resolution[1]  # offset for unity...unity origin at mid of the car rear and on the ground
 63 |     # ==============================================================================
 64 |     xprime = (x-w/2)*resolution[0]
 65 |     # ==============================================================================
 66 |     
 67 |     worldpoints = []
 68 |     bevpointsforworldpoints = []
 69 |     for j,tz in enumerate(zprime):
 70 |         for i,tx in enumerate(xprime):
 71 |             for ty in height_anchors:
 72 |                 worldpoints.append([tx,-ty,tz]) # because opencv right handed coordinate system has x,z same as unity but y is downward
 73 |                 bevpointsforworldpoints.append([x[i],z[j]])
 74 |                 
 75 |     return np.asarray(worldpoints),np.asarray(bevpointsforworldpoints)
 76 | 
 77 | 
 78 | 
 79 | def getValidProjectPoints(imgpoints,validfisheyemask):
 80 |     validpointidxes = []
 81 |     validimgpoints = []
 82 |     inpfisheyeshape = validfisheyemask.shape
 83 |     imgpoints = imgpoints.astype(int)
 84 |     for i in range(len(imgpoints)):
 85 |         loc = imgpoints[i,:]
 86 |         if loc[0] > 0 and loc[0] < inpfisheyeshape[1]:
 87 |             if loc[1] > 0 and loc[1] < inpfisheyeshape[0]:        
 88 |                 if validfisheyemask[loc[1],loc[0]] == 255:
 89 |                     validimgpoints.append(loc)
 90 |                     validpointidxes.append(i)
 91 |     return np.asarray(validimgpoints).astype(int),validpointidxes
 92 | 
 93 | 
 94 | 
 95 | 
 96 | unitypose = {}
 97 | unitypose['front'] = [(np.pi/180) *np.asarray([26,0,0]),np.asarray([0,0.406,3.873])]  ##xyz as per unity
 98 | unitypose['left'] = [(np.pi/180) *np.asarray([0,-90,0]),np.asarray([-1.024,0.8,2.053])] 
 99 | unitypose['rear'] = [(np.pi/180) *np.asarray([3,180,0]),np.asarray([0.132,0.744,-1.001])]
100 | unitypose['right'] = [(np.pi/180) *np.asarray([0,90,0]),np.asarray([1.015,0.801,2.04])]
101 | 
102 | opencvpose = convertPoseFromUnityToOpenCV(unitypose)            
103 | extrinsics = convertOpenCVPoseToRvecTvec(opencvpose)
104 | 
105 | for file in files[0:1]:
106 |     
107 | 
108 |     unity_offset_for_orgin = 33 #pixels 56 is ffset for unity...unity origin at mid of the car rear wheel axis and on the ground
109 |     #this 56 is considering 600*600 BEV. If I resize it this will change. If I crop it, it will not change
110 |     
111 |     batch_size = 1
112 |     bev_h = 400 #height of the Unity generated bev i consider (if i crop then consider cropped size)
113 |     bev_w = 400 #width of the Unity generated bev i consider (if i crop then consider  cropped size)
114 |     
115 |     bh = 50
116 |     bw = 50
117 |     bev_scale = int(bev_h/bh) ##600/50
118 |     bevformer_bev_size = (bh,bw)
119 | 
120 |     
121 |     K,D,xi = load_yaml('./forward_looking_camera_model/flcw_unity.yml')
122 |     #resolution = bev_scale*np.asarray([0.036,0.042])
123 |     resolution = bev_scale*np.asarray([2/48,2/48])      ## resolution: 48 pixels is 2m
124 |     bev_mask = []
125 | 
126 |     height_anchors = [0-0.377, 0.25 -0.377,1.8 - 0.377] #[0, 0.25,1.8] #[0, 1.5, 3, 4.5] # in meters
127 | 
128 |     bev_mask = []
129 |     reference_points_cam = []
130 |     for camtype in ['front','left','rear','right']:
131 | 
132 |         img = cv2.imread('./forward_looking_camera_model/data/'+camtype+'/'+file)
133 |         imgrgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
134 |         h,w,_ = img.shape
135 |         validfisheyemask = np.load('./forward_looking_camera_model/masks/'+camtype+'.npy')
136 |         
137 |         if camtype == 'front' or camtype=='rear':
138 |             validfisheyemask = np.flip(validfisheyemask)
139 |             
140 |         view_bev_mask = []
141 |         view_reference_points_cam = []
142 |         
143 |         for height in height_anchors:
144 |             
145 |             worldpoints,bevptsforworldpoints = computeRealWorldLocationOfBEVPixels(bh,bw, resolution,[height],bev_scale,unity_offset_for_orgin)
146 |             worldpoints = np.expand_dims(worldpoints, 0)
147 | 
148 |             imgpoints,_ = cv2.omnidir.projectPoints(worldpoints, extrinsics[camtype][0], extrinsics[camtype][1], K, xi[0,0], D)
149 |             validimgpoints,valididxes = getValidProjectPoints(imgpoints[0,:,:], validfisheyemask)
150 |             
151 |             ## new addition
152 |             filtered_valididxes = []
153 |             for idx in valididxes:
154 |                 
155 |                 bloc = bevptsforworldpoints[idx,:]
156 |                 #print(bloc)
157 |                 if (camtype == 'front' and bloc[1] < 25) or  (camtype == 'left' and bloc[0] < 25) or (camtype == 'rear' and bloc[1] > 25) or (camtype == 'right' and bloc[0] > 25):
158 |                     filtered_valididxes.append(idx)
159 |                     
160 |                     
161 |             curr_bev_mask = np.zeros((bh*bw,))
162 |             
163 |             for fidx in filtered_valididxes:
164 |                 curr_bev_mask[fidx] = 1
165 | 
166 |             imgpoints[0,:,0] = imgpoints[0,:,0].astype(float)/w  ##normalize 
167 |             imgpoints[0,:,1] = imgpoints[0,:,1].astype(float)/h        ##normalize 
168 |             
169 |             plt.figure()
170 |             plt.imshow(np.reshape(curr_bev_mask,(50,50)))
171 |             
172 |             view_bev_mask.append(curr_bev_mask)
173 |             view_reference_points_cam.append(imgpoints[0,:,:])
174 |             
175 |         view_bev_mask = np.expand_dims(np.transpose(np.asarray(view_bev_mask)), axis = 0)
176 |         view_reference_points_cam = np.transpose(np.asarray(view_reference_points_cam))
177 |         view_reference_points_cam = np.transpose(view_reference_points_cam,[1,2,0]) 
178 |         view_reference_points_cam = np.expand_dims(view_reference_points_cam, axis = 0)   
179 |         
180 |         bev_mask.append(view_bev_mask)
181 |         reference_points_cam.append(view_reference_points_cam)
182 |     
183 |     bev_mask = np.tile(np.asarray(bev_mask),(1,batch_size,1,1))
184 |     reference_points_cam = np.tile(np.asarray(reference_points_cam),(1,batch_size,1,1,1))
185 |                     
186 |     np.save('./unity_data/bev_mask.npy',np.array(bev_mask,dtype=bool))
187 |     np.save('./unity_data/reference_points_cam.npy',reference_points_cam)
188 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/train_f2bev_conv_st_height.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue Dec 13 16:44:04 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | 
 10 | import torch,ntpath,random
 11 | from torch import nn, optim
 12 | import numpy as np
 13 | from loader_single_task import UnityImageDataset
 14 | 
 15 | import os,fnmatch
 16 | from torch.utils.data import DataLoader
 17 | from model_f2bev_conv_st_height import FisheyeBEVFormer
 18 | import torchvision.transforms as T
 19 | from losses.focal import BinaryFocalLoss, CrossEntropyFocalLoss #FocalLoss
 20 | 
 21 | def numpy_sigmoid(x):
 22 |     return 1/(1 + np.exp(-x))
 23 | 
 24 | 
 25 | 
 26 | def gamma_correction(image):
 27 |     gamma = random.choice([0.8,1.0,1.2,1.4])
 28 |     return T.functional.adjust_gamma(image,gamma,gain = 1)
 29 | 
 30 | def plt_pred_image(pred):
 31 |     p = pred.detach().cpu().numpy()
 32 |     #zero = numpy_sigmoid(p[0,:,:])
 33 |     one = numpy_sigmoid(p[1,:,:])
 34 |     two = numpy_sigmoid(p[2,:,:])
 35 |     
 36 |     show = np.zeros((p.shape[1],p.shape[2]))
 37 |     show[np.where(one > 0.5)] = 1
 38 |     show[np.where(two > 0.5)] = 2
 39 |     
 40 |     return show
 41 |     
 42 |     
 43 | num_data_sequences = 20
 44 | 
 45 | 
 46 | bev_dirs = ['./data/images'+str(i)+'/train/depth' for i in range(num_data_sequences)]
 47 | front_dirs = ['./data/images'+str(i)+'/train/rgb/front' for i in range(num_data_sequences)]
 48 | left_dirs = ['./data/images'+str(i)+'/train/rgb/left' for i in range(num_data_sequences)]
 49 | rear_dirs = ['./data/images'+str(i)+'/train/rgb/rear' for i in range(num_data_sequences)]
 50 | right_dirs = ['./data/images'+str(i)+'/train/rgb/right' for i in range(num_data_sequences)]
 51 | config_dirs = ['./data/images'+str(i)+'/train/cameraconfig' for i in range(num_data_sequences)]
 52 |     
 53 | seq_len = 3
 54 | 
 55 | image_lists = []
 56 | datalengths = []
 57 | 
 58 | for bev_dir in bev_dirs:
 59 |     names = fnmatch.filter(os.listdir(bev_dir), '*.png')
 60 |     
 61 |     files = []
 62 |     for name in names:
 63 |         files.append(os.path.splitext(ntpath.basename(name))[0])
 64 |         
 65 |         filelist = sorted(files,key=int)
 66 |         
 67 |     image_lists.append([f + '.png' for f in filelist])
 68 |     datalengths.append(len(names))
 69 | 
 70 | vtransforms = T.Compose([T.Resize((540,640))])   #,GammaCorrectionTransform())
 71 | 
 72 | transforms = T.Compose([T.Resize((540,640)), T.ColorJitter(brightness=.3, hue=.3, contrast = 0.2, saturation = 0.3),  T.Lambda(gamma_correction) ])   #,GammaCorrectionTransform())
 73 |  
 74 | target_transforms = T.Compose([T.Grayscale(num_output_channels=1)])
 75 |    
 76 | training_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, transform = transforms,target_transform=target_transforms)
 77 | 
 78 | 
 79 | vbev_dirs = ['./data/images'+str(i)+'/val/depth' for i in range(num_data_sequences)]
 80 | vfront_dirs = ['./data/images'+str(i)+'/val/rgb/front' for i in range(num_data_sequences)]
 81 | vleft_dirs = ['./data/images'+str(i)+'/val/rgb/left' for i in range(num_data_sequences)]
 82 | vrear_dirs = ['./data/images'+str(i)+'/val/rgb/rear' for i in range(num_data_sequences)]
 83 | vright_dirs = ['./data/images'+str(i)+'/val/rgb/right' for i in range(num_data_sequences)]
 84 | vconfig_dirs = ['./data/images'+str(i)+'/val/cameraconfig' for i in range(num_data_sequences)]
 85 | 
 86 | vimage_lists = []
 87 | vdatalengths = []
 88 | 
 89 | for vbev_dir in vbev_dirs:
 90 |     vnames = fnmatch.filter(os.listdir(vbev_dir), '*.png')
 91 |     
 92 |     vfiles = []
 93 |     for vname in vnames:
 94 |         vfiles.append(os.path.splitext(ntpath.basename(vname))[0])
 95 |         
 96 |         vfilelist = sorted(vfiles,key=int)
 97 |         
 98 |     vimage_lists.append([f + '.png' for f in vfilelist])
 99 |     vdatalengths.append(len(vnames))
100 | 
101 | val_data = UnityImageDataset(bev_dirs = vbev_dirs,front_dirs=vfront_dirs,left_dirs=vleft_dirs,rear_dirs=vrear_dirs,right_dirs=vright_dirs,image_lists=vimage_lists,config_dirs=vconfig_dirs,seq_len= seq_len,datalengths = vdatalengths, num_data_sequences = num_data_sequences, transform = vtransforms,target_transform=target_transforms)
102 | 
103 | 
104 | train_dataloader = DataLoader(training_data, batch_size = 1, shuffle=True)
105 | val_dataloader = DataLoader(val_data, batch_size=1, shuffle=False)
106 | 
107 | 
108 | 
109 | random_seed = 1 # or any of your favorite number
110 | torch.manual_seed(random_seed)
111 | torch.cuda.manual_seed(random_seed)
112 | torch.backends.cudnn.deterministic = True
113 | torch.backends.cudnn.benchmark = False
114 | #model = FisheyeBEVFormer().to(device)
115 | 
116 | 
117 |         
118 |         
119 | def train(epoch,model,train_dataloader,seq_len, criterion,optimizer):
120 | 
121 |     model.train()
122 | 
123 |     train_loss = 0
124 |     num_batches = len(train_dataloader)
125 |     # with torch.autograd.detect_anomaly():
126 |     for batch_idx, (dataseq, targetseq) in enumerate(train_dataloader):
127 |         
128 |         inp_img_seq, can_buses_seq = dataseq
129 |         #print(len(inp_img_seq))
130 |         bs = targetseq[0].shape[0]
131 |         for ctr in range(seq_len):
132 |             front = inp_img_seq[ctr][0]
133 |             left = inp_img_seq[ctr][1]
134 |             rear = inp_img_seq[ctr][2]
135 |             right = inp_img_seq[ctr][3]
136 |             
137 |             target = targetseq[ctr]
138 |             
139 |             front = front.to(device)
140 |             left = left.to(device)
141 |             rear = rear.to(device)
142 |             right = right.to(device)
143 |             
144 |                         
145 |             target = torch.squeeze(target,dim=1)
146 |             idx2 = torch.where(target <= 0.35)
147 |             idx0 = torch.where(target >= 0.69)
148 |             target[target >= 0] = 1
149 |             target[idx2] = 2
150 |             target[idx0] = 0
151 |             
152 | 
153 |             target = target.to(torch.int64).to(device)
154 |             can_buses = can_buses_seq[ctr]
155 | 
156 |             if ctr == 0:
157 |                 prev_bev = None
158 |                 
159 |             optimizer.zero_grad()
160 |             output, prev_bev_org = model(front,left,rear,right, list(can_buses),prev_bev)
161 |             
162 |             prev_bev = prev_bev_org.detach()
163 |             
164 |             loss = criterion(output, target)
165 |     
166 |             loss.backward()
167 |             optimizer.step()
168 |             
169 |             train_loss += loss.data
170 |             print('Train Epoch: {} [{}/{} Srno: {} ({:.0f}%)]\tLoss: {:.6f}'.format(
171 |                     epoch, (batch_idx + 1) * 1, len(train_dataloader), ctr,
172 |                     100. * (batch_idx + 1) / len(train_dataloader), loss.data))
173 |             
174 |     train_loss/= num_batches*seq_len
175 |     
176 |     print(f"Train Error: Avg loss: {train_loss:>8f} \n")    
177 | 
178 |     return train_loss    
179 | 
180 | 
181 | def val(epoch,test_dataloader,seq_len,model,loss_fn):
182 |     num_batches = len(test_dataloader)
183 |     model.eval()
184 |     test_loss = 0
185 |     with torch.no_grad():
186 |         for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader):
187 |             inp_img_seq, can_buses_seq = dataseq
188 |             bs = targetseq[0].shape[0]
189 |             for ctr in range(seq_len):
190 |                 front = inp_img_seq[ctr][0]
191 |                 left = inp_img_seq[ctr][1]
192 |                 rear = inp_img_seq[ctr][2]
193 |                 right = inp_img_seq[ctr][3]
194 |                 
195 |                 target = targetseq[ctr]
196 |                 
197 |                 front = front.to(device)
198 |                 left = left.to(device)
199 |                 rear = rear.to(device)
200 |                 right = right.to(device)
201 |                 
202 |                 target = torch.squeeze(target,dim=1)
203 |                 #print(torch.sort(target))
204 |                 #target = (target > 0.5).float().to(device)
205 |                 idx2 = torch.where(target <= 0.35)
206 |                 idx0 = torch.where(target >= 0.69)
207 |                 target[target >= 0] = 1
208 |                 target[idx2] = 2
209 |                 target[idx0] = 0
210 | 
211 |                 target = target.to(torch.int64).to(device)
212 |                 can_buses = can_buses_seq[ctr]
213 | 
214 |                 if ctr == 0:
215 |                     prev_bev = None
216 |                 pred, prev_bev = model(front,left,rear,right, list(can_buses),prev_bev)
217 |                 test_loss += loss_fn(pred, target).item()
218 |                 
219 |         test_loss/= num_batches*seq_len
220 |     
221 |     print(f"Test Error: Avg loss: {test_loss:>8f} \n")    
222 | 
223 |     return test_loss
224 | 
225 | 
226 | device = "cuda" if torch.cuda.is_available() else "cpu"
227 | #device = "cpu"
228 | print(f"Using {device} device")
229 | 
230 | model = FisheyeBEVFormer().to(device)
231 | 
232 | optimizer = optim.AdamW(model.parameters(), lr=0.0002)
233 | criterionCE = nn.CrossEntropyLoss()
234 | criterionFocal = CrossEntropyFocalLoss()
235 | 
236 | n_epochs = 5
237 | 
238 | PATH = './f2bev_conv_st_height.pt'
239 | min_val_loss = np.inf
240 | 
241 | for epoch in range(n_epochs):
242 |     train_loss = train(epoch,model,train_dataloader,seq_len,criterionFocal,optimizer)
243 |     val_loss = val(epoch,val_dataloader,seq_len,model,criterionFocal)
244 | 
245 | 
246 |     if val_loss < min_val_loss:
247 |         min_val_loss = val_loss        
248 |         torch.save({'epoch' : epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict' : optimizer.state_dict(), 'loss': val_loss}, PATH)
249 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/train_f2bev_attn_st_height.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue Dec 13 16:44:04 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | import os
 10 | os.environ["CUDA_VISIBLE_DEVICES"]="1"
 11 | import torch,ntpath,random
 12 | from torch import nn, optim
 13 | import numpy as np
 14 | from loader_single_task import UnityImageDataset
 15 | 
 16 | import fnmatch
 17 | from torch.utils.data import DataLoader
 18 | from model_f2bev_attn_st_height import FisheyeBEVFormer
 19 | import torchvision.transforms as T
 20 | from losses.focal import BinaryFocalLoss, CrossEntropyFocalLoss #FocalLoss
 21 | 
 22 | def numpy_sigmoid(x):
 23 |     return 1/(1 + np.exp(-x))
 24 | 
 25 | 
 26 | 
 27 | def gamma_correction(image):
 28 |     gamma = random.choice([0.8,1.0,1.2,1.4])
 29 |     return T.functional.adjust_gamma(image,gamma,gain = 1)
 30 | 
 31 | def plt_pred_image(pred):
 32 |     p = pred.detach().cpu().numpy()
 33 |     one = numpy_sigmoid(p[1,:,:])
 34 |     two = numpy_sigmoid(p[2,:,:])
 35 |     
 36 |     show = np.zeros((p.shape[1],p.shape[2]))
 37 |     show[np.where(one > 0.5)] = 1
 38 |     show[np.where(two > 0.5)] = 2
 39 |     
 40 |     return show
 41 |     
 42 |     
 43 | num_data_sequences = 20
 44 | 
 45 | 
 46 | 
 47 | bev_dirs = ['./data/images'+str(i)+'/train/depth' for i in range(num_data_sequences)]
 48 | front_dirs = ['./data/images'+str(i)+'/train/rgb/front' for i in range(num_data_sequences)]
 49 | left_dirs = ['./data/images'+str(i)+'/train/rgb/left' for i in range(num_data_sequences)]
 50 | rear_dirs = ['./data/images'+str(i)+'/train/rgb/rear' for i in range(num_data_sequences)]
 51 | right_dirs = ['./data/images'+str(i)+'/train/rgb/right' for i in range(num_data_sequences)]
 52 | config_dirs = ['./data/images'+str(i)+'/train/cameraconfig' for i in range(num_data_sequences)]
 53 |     
 54 | seq_len = 3
 55 | 
 56 | image_lists = []
 57 | datalengths = []
 58 | 
 59 | for bev_dir in bev_dirs:
 60 |     names = fnmatch.filter(os.listdir(bev_dir), '*.png')
 61 |     
 62 |     files = []
 63 |     for name in names:
 64 |         files.append(os.path.splitext(ntpath.basename(name))[0])
 65 |         
 66 |         filelist = sorted(files,key=int)
 67 |         
 68 |     image_lists.append([f + '.png' for f in filelist])
 69 |     datalengths.append(len(names))
 70 | 
 71 | vtransforms = T.Compose([T.Resize((540,640))])   #,GammaCorrectionTransform())
 72 | 
 73 | transforms = T.Compose([T.Resize((540,640)), T.ColorJitter(brightness=.3, hue=.3, contrast = 0.2, saturation = 0.3),  T.Lambda(gamma_correction) ])   #,GammaCorrectionTransform())
 74 | target_transforms = T.Compose([T.Resize((50,50)),T.Grayscale(num_output_channels=1)])
 75 |  
 76 |    
 77 | training_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, transform = transforms,target_transform=target_transforms)
 78 | 
 79 | 
 80 | 
 81 | vbev_dirs = ['./data/images'+str(i)+'/val/depth' for i in range(num_data_sequences)]
 82 | vfront_dirs = ['./data/images'+str(i)+'/val/rgb/front' for i in range(num_data_sequences)]
 83 | vleft_dirs = ['./data/images'+str(i)+'/val/rgb/left' for i in range(num_data_sequences)]
 84 | vrear_dirs = ['./data/images'+str(i)+'/val/rgb/rear' for i in range(num_data_sequences)]
 85 | vright_dirs = ['./data/images'+str(i)+'/val/rgb/right' for i in range(num_data_sequences)]
 86 | vconfig_dirs = ['./data/images'+str(i)+'/val/cameraconfig' for i in range(num_data_sequences)]
 87 | 
 88 | vimage_lists = []
 89 | vdatalengths = []
 90 | 
 91 | for vbev_dir in vbev_dirs:
 92 |     vnames = fnmatch.filter(os.listdir(vbev_dir), '*.png')
 93 |     
 94 |     vfiles = []
 95 |     for vname in vnames:
 96 |         vfiles.append(os.path.splitext(ntpath.basename(vname))[0])
 97 |         
 98 |         vfilelist = sorted(vfiles,key=int)
 99 |         
100 |     vimage_lists.append([f + '.png' for f in vfilelist])
101 |     vdatalengths.append(len(vnames))
102 | 
103 | val_data = UnityImageDataset(bev_dirs = vbev_dirs,front_dirs=vfront_dirs,left_dirs=vleft_dirs,rear_dirs=vrear_dirs,right_dirs=vright_dirs,image_lists=vimage_lists,config_dirs=vconfig_dirs,seq_len= seq_len,datalengths = vdatalengths, num_data_sequences = num_data_sequences, transform = vtransforms,target_transform=target_transforms)
104 | 
105 | 
106 | train_dataloader = DataLoader(training_data, batch_size = 2, shuffle=True)
107 | val_dataloader = DataLoader(val_data, batch_size=2, shuffle=False)
108 | 
109 | 
110 | 
111 | random_seed = 1 # or any of your favorite number
112 | torch.manual_seed(random_seed)
113 | torch.cuda.manual_seed(random_seed)
114 | torch.backends.cudnn.deterministic = True
115 | torch.backends.cudnn.benchmark = False
116 | 
117 | 
118 |         
119 |         
120 | def train(epoch,model,train_dataloader,seq_len, criterion,optimizer):
121 | 
122 |     model.train()
123 | 
124 |     train_loss = 0
125 |     num_batches = len(train_dataloader)
126 |     # with torch.autograd.detect_anomaly():
127 |     for batch_idx, (dataseq, targetseq) in enumerate(train_dataloader):
128 |         
129 |         inp_img_seq, can_buses_seq = dataseq
130 |         bs = targetseq[0].shape[0]
131 |         for ctr in range(seq_len):
132 |             front = inp_img_seq[ctr][0]
133 |             left = inp_img_seq[ctr][1]
134 |             rear = inp_img_seq[ctr][2]
135 |             right = inp_img_seq[ctr][3]
136 |             
137 |             target = targetseq[ctr]
138 |             
139 |             front = front.to(device)
140 |             left = left.to(device)
141 |             rear = rear.to(device)
142 |             right = right.to(device)
143 |             
144 |             
145 |                         
146 |             target = torch.squeeze(target,dim=1)
147 |             idx2 = torch.where(target <= 0.35)
148 |             idx0 = torch.where(target >= 0.69)
149 |             target[target >= 0] = 1
150 |             target[idx2] = 2
151 |             target[idx0] = 0
152 | 
153 |             target = target.to(torch.int64).to(device)
154 |             can_buses = can_buses_seq[ctr]
155 | 
156 |             if ctr == 0:
157 |                 prev_bev = None
158 |                 
159 |             optimizer.zero_grad()
160 |             output, inter_outputs, prev_bev_org = model(front,left,rear,right, list(can_buses),prev_bev)
161 |             
162 |             prev_bev = prev_bev_org.detach()
163 |             
164 |             loss = criterion(output, target)
165 | 
166 |             for inter_stage in inter_outputs:
167 |                 loss += criterion(inter_stage,target)
168 | 
169 |             loss.backward()
170 |             optimizer.step()
171 |             
172 |             train_loss += loss.data
173 |             print('Train Epoch: {} [{}/{} Srno: {} ({:.0f}%)]\tLoss: {:.6f}'.format(
174 |                     epoch, (batch_idx + 1) * 1, len(train_dataloader), ctr,
175 |                     100. * (batch_idx + 1) / len(train_dataloader), loss.data))
176 |             
177 |     train_loss/= num_batches*seq_len
178 |     
179 |     print(f"Train Error: Avg loss: {train_loss:>8f} \n")    
180 | 
181 |     return train_loss    
182 | 
183 | 
184 | def val(epoch,test_dataloader,seq_len,model,loss_fn):
185 |     num_batches = len(test_dataloader)
186 |     model.eval()
187 |     test_loss = 0
188 |     with torch.no_grad():
189 |         for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader):
190 |             inp_img_seq, can_buses_seq = dataseq
191 |             bs = targetseq[0].shape[0]
192 |             for ctr in range(seq_len):
193 |                 front = inp_img_seq[ctr][0]
194 |                 left = inp_img_seq[ctr][1]
195 |                 rear = inp_img_seq[ctr][2]
196 |                 right = inp_img_seq[ctr][3]
197 |                 
198 |                 target = targetseq[ctr]
199 |                 
200 |                 front = front.to(device)
201 |                 left = left.to(device)
202 |                 rear = rear.to(device)
203 |                 right = right.to(device)
204 |                 
205 |                 target = torch.squeeze(target,dim=1)
206 |                 idx2 = torch.where(target <= 0.35)
207 |                 idx0 = torch.where(target >= 0.69)
208 |                 target[target >= 0] = 1
209 |                 target[idx2] = 2
210 |                 target[idx0] = 0
211 | 
212 |                 target = target.to(torch.int64).to(device)
213 |                 can_buses = can_buses_seq[ctr]
214 | 
215 |                 if ctr == 0:
216 |                     prev_bev = None
217 |                 pred, inter_pred, prev_bev = model(front,left,rear,right, list(can_buses),prev_bev)
218 |                 
219 |                 test_loss += loss_fn(pred, target).item()
220 |                 
221 |         test_loss/= num_batches*seq_len
222 |     
223 |     print(f"Test Error: Avg loss: {test_loss:>8f} \n")    
224 | 
225 |     return test_loss
226 | 
227 | 
228 | device = "cuda" if torch.cuda.is_available() else "cpu"
229 | print(f"Using {device} device")
230 | 
231 | model = FisheyeBEVFormer().to(device)
232 | 
233 | optimizer = optim.AdamW(model.parameters(), lr=0.0002)
234 | criterionCE = nn.CrossEntropyLoss()
235 | criterionFocal = CrossEntropyFocalLoss()
236 | 
237 | n_epochs = 1
238 | 
239 | PATH = './f2bev_attn_st_height.pt'
240 | min_val_loss = np.inf
241 | min_epoch = n_epochs
242 | for epoch in range(n_epochs):
243 |     train_loss = train(epoch,model,train_dataloader,seq_len,criterionCE,optimizer)
244 |     val_loss = val(epoch,val_dataloader,seq_len,model,criterionCE)
245 | 
246 |     if val_loss < min_val_loss:
247 |         min_epoch = epoch
248 |         min_val_loss = val_loss        
249 |         torch.save({'epoch' : epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict' : optimizer.state_dict(), 'loss': val_loss}, PATH)
250 |     else:
251 |         if epoch > min_epoch:
252 |             break
253 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/train_f2bev_conv_st_seg.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue Dec 13 16:44:04 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | 
 10 | import torch,ntpath,random
 11 | from torch import nn, optim
 12 | import numpy as np
 13 | from loader_single_task import UnityImageDataset
 14 | 
 15 | import os,fnmatch
 16 | from torch.utils.data import DataLoader
 17 | from model_f2bev_conv_st_seg import FisheyeBEVFormer
 18 | import torchvision.transforms as T
 19 | from losses.focal import BinaryFocalLoss, CrossEntropyFocalLoss #FocalLoss
 20 | 
 21 | def numpy_sigmoid(x):
 22 |     return 1/(1 + np.exp(-x))
 23 | 
 24 | 
 25 | 
 26 | def gamma_correction(image):
 27 |     gamma = random.choice([0.8,1.0,1.2,1.4])
 28 |     return T.functional.adjust_gamma(image,gamma,gain = 1)
 29 | 
 30 | def plt_pred_image(pred):
 31 |     p = pred.detach().cpu().numpy()
 32 |     #zero = numpy_sigmoid(p[0,:,:])
 33 |     one = numpy_sigmoid(p[1,:,:])
 34 |     two = numpy_sigmoid(p[2,:,:])
 35 |     
 36 |     show = np.zeros((p.shape[1],p.shape[2]))
 37 |     show[np.where(one > 0.5)] = 1
 38 |     show[np.where(two > 0.5)] = 2
 39 |     
 40 |     return show
 41 |     
 42 |     
 43 | num_data_sequences = 20
 44 | 
 45 | 
 46 | 
 47 | bev_dirs = ['./data/images'+str(i)+'/train/seg/bev' for i in range(num_data_sequences)]
 48 | front_dirs = ['./data/images'+str(i)+'/train/rgb/front' for i in range(num_data_sequences)]
 49 | left_dirs = ['./data/images'+str(i)+'/train/rgb/left' for i in range(num_data_sequences)]
 50 | rear_dirs = ['./data/images'+str(i)+'/train/rgb/rear' for i in range(num_data_sequences)]
 51 | right_dirs = ['./data/images'+str(i)+'/train/rgb/right' for i in range(num_data_sequences)]
 52 | config_dirs = ['./data/images'+str(i)+'/train/cameraconfig' for i in range(num_data_sequences)]
 53 |     
 54 | seq_len = 3
 55 | 
 56 | image_lists = []
 57 | datalengths = []
 58 | 
 59 | for bev_dir in bev_dirs:
 60 |     names = fnmatch.filter(os.listdir(bev_dir), '*.png')
 61 |     
 62 |     files = []
 63 |     for name in names:
 64 |         files.append(os.path.splitext(ntpath.basename(name))[0])
 65 |         
 66 |         filelist = sorted(files,key=int)
 67 |         
 68 |     image_lists.append([f + '.png' for f in filelist])
 69 |     datalengths.append(len(names))
 70 | 
 71 | vtransforms = T.Compose([T.Resize((540,640))])   #,GammaCorrectionTransform())
 72 | 
 73 | transforms = T.Compose([T.Resize((540,640)), T.ColorJitter(brightness=.3, hue=.3, contrast = 0.2, saturation = 0.3),  T.Lambda(gamma_correction) ])   #,GammaCorrectionTransform())
 74 |  
 75 | target_transforms = T.Compose([T.Grayscale(num_output_channels=1)])
 76 |    
 77 | training_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, transform = transforms,target_transform=target_transforms)
 78 | 
 79 | 
 80 | 
 81 | 
 82 | 
 83 | 
 84 | vbev_dirs = ['./data/images'+str(i)+'/val/seg/bev' for i in range(num_data_sequences)]
 85 | vfront_dirs = ['./data/images'+str(i)+'/val/rgb/front' for i in range(num_data_sequences)]
 86 | vleft_dirs = ['./data/images'+str(i)+'/val/rgb/left' for i in range(num_data_sequences)]
 87 | vrear_dirs = ['./data/images'+str(i)+'/val/rgb/rear' for i in range(num_data_sequences)]
 88 | vright_dirs = ['./data/images'+str(i)+'/val/rgb/right' for i in range(num_data_sequences)]
 89 | vconfig_dirs = ['./data/images'+str(i)+'/val/cameraconfig' for i in range(num_data_sequences)]
 90 | vimage_lists = []
 91 | vdatalengths = []
 92 | 
 93 | for vbev_dir in vbev_dirs:
 94 |     vnames = fnmatch.filter(os.listdir(vbev_dir), '*.png')
 95 |     
 96 |     vfiles = []
 97 |     for vname in vnames:
 98 |         vfiles.append(os.path.splitext(ntpath.basename(vname))[0])
 99 |         
100 |         vfilelist = sorted(vfiles,key=int)
101 |         
102 |     vimage_lists.append([f + '.png' for f in vfilelist])
103 |     vdatalengths.append(len(vnames))
104 | 
105 | val_data = UnityImageDataset(bev_dirs = vbev_dirs,front_dirs=vfront_dirs,left_dirs=vleft_dirs,rear_dirs=vrear_dirs,right_dirs=vright_dirs,image_lists=vimage_lists,config_dirs=vconfig_dirs,seq_len= seq_len,datalengths = vdatalengths, num_data_sequences = num_data_sequences, transform = vtransforms,target_transform=target_transforms)
106 | 
107 | 
108 | train_dataloader = DataLoader(training_data, batch_size = 1, shuffle=True)
109 | val_dataloader = DataLoader(val_data, batch_size=1, shuffle=False)
110 | 
111 | 
112 | 
113 | random_seed = 1 # or any of your favorite number
114 | torch.manual_seed(random_seed)
115 | torch.cuda.manual_seed(random_seed)
116 | torch.backends.cudnn.deterministic = True
117 | torch.backends.cudnn.benchmark = False
118 | 
119 | 
120 |         
121 |         
122 | def train(epoch,model,train_dataloader,seq_len, criterion,optimizer):
123 | 
124 |     model.train()
125 | 
126 |     train_loss = 0
127 |     num_batches = len(train_dataloader)
128 |     # with torch.autograd.detect_anomaly():
129 |     for batch_idx, (dataseq, targetseq) in enumerate(train_dataloader):
130 |         
131 |         inp_img_seq, can_buses_seq = dataseq
132 |         #print(len(inp_img_seq))
133 |         bs = targetseq[0].shape[0]
134 |         for ctr in range(seq_len):
135 |             front = inp_img_seq[ctr][0]
136 |             left = inp_img_seq[ctr][1]
137 |             rear = inp_img_seq[ctr][2]
138 |             right = inp_img_seq[ctr][3]
139 |             
140 |             target = targetseq[ctr]
141 |             
142 |             front = front.to(device)
143 |             left = left.to(device)
144 |             rear = rear.to(device)
145 |             right = right.to(device)
146 |             
147 |                         
148 |             target = torch.squeeze(target,dim=1)
149 |             idx0 = torch.where(target <= 0.02)
150 |             target[idx0] = 10
151 |             idx1 = torch.where(target <= 0.07)
152 |             target[idx1] = 11
153 |             idx2 = torch.where(target <= 0.22)
154 |             target[idx2] = 12
155 |             idx3 = torch.where(target <= 0.60)
156 |             target[idx3] = 13
157 |             idx4 = torch.where(target <= 1)
158 |             target[idx4] = 14
159 |             target = target - 10
160 |             target = target.to(torch.int64).to(device)
161 | 
162 |             can_buses = can_buses_seq[ctr]
163 | 
164 |             if ctr == 0:
165 |                 prev_bev = None
166 |                 
167 |             optimizer.zero_grad()
168 |             output, prev_bev_org = model(front,left,rear,right, list(can_buses),prev_bev)
169 |             
170 |             prev_bev = prev_bev_org.detach()
171 |             
172 |             loss = criterion(output, target)
173 |     
174 |             loss.backward()
175 |             optimizer.step()
176 |             
177 |             train_loss += loss.data
178 |             print('Train Epoch: {} [{}/{} Srno: {} ({:.0f}%)]\tLoss: {:.6f}'.format(
179 |                     epoch, (batch_idx + 1) * 1, len(train_dataloader), ctr,
180 |                     100. * (batch_idx + 1) / len(train_dataloader), loss.data))
181 |             
182 |     train_loss/= num_batches*seq_len
183 |     
184 |     print(f"Train Error: Avg loss: {train_loss:>8f} \n")    
185 | 
186 |     return train_loss    
187 | 
188 | 
189 | def val(epoch,test_dataloader,seq_len,model,loss_fn):
190 |     num_batches = len(test_dataloader)
191 |     model.eval()
192 |     test_loss = 0
193 |     with torch.no_grad():
194 |         for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader):
195 |             inp_img_seq, can_buses_seq = dataseq
196 |             bs = targetseq[0].shape[0]
197 |             for ctr in range(seq_len):
198 |                 front = inp_img_seq[ctr][0]
199 |                 left = inp_img_seq[ctr][1]
200 |                 rear = inp_img_seq[ctr][2]
201 |                 right = inp_img_seq[ctr][3]
202 |                 
203 |                 target = targetseq[ctr]
204 |                 
205 |                 front = front.to(device)
206 |                 left = left.to(device)
207 |                 rear = rear.to(device)
208 |                 right = right.to(device)
209 |                 
210 |                 target = torch.squeeze(target,dim=1)
211 |                 idx0 = torch.where(target <= 0.02)
212 |                 target[idx0] = 10
213 |                 idx1 = torch.where(target <= 0.07)
214 |                 target[idx1] = 11
215 |                 idx2 = torch.where(target <= 0.22)
216 |                 target[idx2] = 12
217 |                 idx3 = torch.where(target <= 0.60)
218 |                 target[idx3] = 13
219 |                 idx4 = torch.where(target <= 1)
220 |                 target[idx4] = 14
221 |                 target = target - 10
222 |                 target = target.to(torch.int64).to(device)
223 | 
224 | 
225 | 
226 |                 can_buses = can_buses_seq[ctr]
227 | 
228 |                 if ctr == 0:
229 |                     prev_bev = None
230 |                 pred, prev_bev = model(front,left,rear,right, list(can_buses),prev_bev)
231 |                 
232 |     
233 |                 test_loss += loss_fn(pred, target).item()
234 |                 
235 |         test_loss/= num_batches*seq_len
236 |     
237 |     print(f"Test Error: Avg loss: {test_loss:>8f} \n")    
238 | 
239 |     return test_loss
240 | 
241 | 
242 | device = "cuda" if torch.cuda.is_available() else "cpu"
243 | #device = "cpu"
244 | print(f"Using {device} device")
245 | 
246 | model = FisheyeBEVFormer().to(device)
247 | 
248 | optimizer = optim.AdamW(model.parameters(), lr=0.0002)
249 | criterionCE = nn.CrossEntropyLoss()
250 | criterionFocal = CrossEntropyFocalLoss()
251 | 
252 | n_epochs = 5
253 | 
254 | PATH = './f2bev_conv_st_seg.pt'
255 | min_val_loss = np.inf
256 | 
257 | for epoch in range(n_epochs):
258 |     train_loss = train(epoch,model,train_dataloader,seq_len,criterionFocal,optimizer)
259 |     val_loss = val(epoch,val_dataloader,seq_len,model,criterionFocal)
260 | 
261 | 
262 |     if val_loss < min_val_loss:
263 |         min_val_loss = val_loss        
264 |         torch.save({'epoch' : epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict' : optimizer.state_dict(), 'loss': val_loss}, PATH)
265 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/bblocks/deformable_attention.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Aug 25 11:09:49 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | # ------------------------------------------------------------------------------------------------# ------------------------------------------------------------------------------------------------
 10 | # Modified from https://raw.githubusercontent.com/fundamentalvision/Deformable-DETR/
 11 | # ------------------------------------------------------------------------------------------------
 12 | 
 13 | from __future__ import absolute_import
 14 | from __future__ import print_function
 15 | from __future__ import division
 16 | 
 17 | import warnings
 18 | import math
 19 | 
 20 | import torch
 21 | from torch import nn
 22 | import torch.nn.functional as F
 23 | from torch.nn.init import xavier_uniform_, constant_
 24 | from bblocks.deformable_attention_function import ms_deform_attn_core_pytorch #,MSDeformAttnFunction
 25 | 
 26 | #from bblocks.multi_scale_deformable_attn_function import MultiScaleDeformableAttnFunction_fp32
 27 | 
 28 | 
 29 | def _is_power_of_2(n):
 30 |     if (not isinstance(n, int)) or (n < 0):
 31 |         raise ValueError("invalid input for _is_power_of_2: {} (type: {})".format(n, type(n)))
 32 |     return (n & (n-1) == 0) and n != 0 #checked and same
 33 | 
 34 | 
 35 | class MSDeformAttn3D(nn.Module):
 36 |     def __init__(self, d_model=256, n_levels=4, n_heads=8, n_points=6, batch_first= True):
 37 |         """
 38 |         Multi-Scale Deformable Attention Module
 39 |         :param d_model      hidden dimension
 40 |         :param n_levels     number of feature levels
 41 |         :param n_heads      number of attention heads
 42 |         :param n_points     number of sampling points per attention head per feature level
 43 |         """
 44 |         super().__init__()
 45 |         if d_model % n_heads != 0:
 46 |             raise ValueError('d_model must be divisible by n_heads, but got {} and {}'.format(d_model, n_heads))
 47 |         _d_per_head = d_model // n_heads
 48 |         # you'd better set _d_per_head to a power of 2 which is more efficient in our CUDA implementation
 49 |         if not _is_power_of_2(_d_per_head):
 50 |             warnings.warn("You'd better set d_model in MSDeformAttn to make the dimension of each attention head a power of 2 "
 51 |                           "which is more efficient in our CUDA implementation.")
 52 | 
 53 |         self.im2col_step = 64
 54 | 
 55 |         self.d_model = d_model #bevformer calls this embed_dims
 56 |         self.n_levels = n_levels
 57 |         self.n_heads = n_heads
 58 |         self.n_points = n_points
 59 | 
 60 |         self.sampling_offsets = nn.Linear(d_model, n_heads * n_levels * n_points * 2)
 61 |         self.attention_weights = nn.Linear(d_model, n_heads * n_levels * n_points)
 62 |         self.value_proj = nn.Linear(d_model, d_model)
 63 |         #self.output_proj = nn.Linear(d_model, d_model) ##TODO: this is new in my implementation
 64 |         self.batch_first = True
 65 |         self._reset_parameters()
 66 | 
 67 |     def _reset_parameters(self):
 68 |         constant_(self.sampling_offsets.weight.data, 0.)
 69 |         thetas = torch.arange(self.n_heads, dtype=torch.float32) * (2.0 * math.pi / self.n_heads)
 70 |         grid_init = torch.stack([thetas.cos(), thetas.sin()], -1)
 71 |         grid_init = (grid_init / grid_init.abs().max(-1, keepdim=True)[0]).view(self.n_heads, 1, 1, 2).repeat(1, self.n_levels, self.n_points, 1)
 72 |         for i in range(self.n_points):
 73 |             grid_init[:, :, i, :] *= i + 1
 74 |         with torch.no_grad():
 75 |             self.sampling_offsets.bias = nn.Parameter(grid_init.view(-1))
 76 |         constant_(self.attention_weights.weight.data, 0.)
 77 |         constant_(self.attention_weights.bias.data, 0.)
 78 |         xavier_uniform_(self.value_proj.weight.data)
 79 |         constant_(self.value_proj.bias.data, 0.)
 80 |         #xavier_uniform_(self.output_proj.weight.data)
 81 |         #constant_(self.output_proj.bias.data, 0.) ##these are all mostly doing the same thing; calculated guess
 82 | 
 83 | 
 84 |     def forward(self, query, query_pos, reference_points, input_flatten, input_spatial_shapes, input_level_start_index, input_padding_mask=None):
 85 |         """
 86 |         :param query                       (N, Length_{query}, C)
 87 |         :param reference_points            (N, Length_{query}, n_levels, 2), range in [0, 1], top-left (0,0), bottom-right (1, 1), including padding area
 88 |                                         or (N, Length_{query}, n_levels, 4), add additional (w, h) to form reference boxes
 89 |         :param input_flatten               (N, \sum_{l=0}^{L-1} H_l \cdot W_l, C)
 90 |         :param input_spatial_shapes        (n_levels, 2), [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})]
 91 |         :param input_level_start_index     (n_levels, ), [0, H_0*W_0, H_0*W_0+H_1*W_1, H_0*W_0+H_1*W_1+H_2*W_2, ..., H_0*W_0+H_1*W_1+...+H_{L-1}*W_{L-1}]
 92 |         :param input_padding_mask          (N, \sum_{l=0}^{L-1} H_l \cdot W_l), True for padding elements, False for non-padding elements
 93 | 
 94 |         :return output                     (N, Length_{query}, C)
 95 |         """
 96 |         
 97 |         #######bev former has this
 98 |         
 99 |         if input_flatten is None:
100 |             input_flatten = query
101 |         # if query_pos is not None:
102 |         #     query = query + query_pos  ##I think BEVformer had this but its an error
103 |         
104 | 
105 |         if not self.batch_first:
106 |             # change to (bs, num_query ,embed_dims)
107 |             query = query.permute(1, 0, 2)
108 |             input_flatten = input_flatten.permute(1, 0, 2)    
109 |             
110 |         ##################    
111 |         N, Len_q, _ = query.shape ##capital N is batchsize. i.e. bs in BEVformer
112 |         N, Len_in, _ = input_flatten.shape
113 |         assert (input_spatial_shapes[:, 0] * input_spatial_shapes[:, 1]).sum() == Len_in
114 | 
115 |         value = self.value_proj(input_flatten)
116 |         if input_padding_mask is not None: ##TODO: Figure out the deal with masks
117 |             value = value.masked_fill(input_padding_mask[..., None], float(0))
118 |         value = value.view(N, Len_in, self.n_heads, self.d_model // self.n_heads)
119 |         sampling_offsets = self.sampling_offsets(query).view(N, Len_q, self.n_heads, self.n_levels, self.n_points, 2)
120 |         attention_weights = self.attention_weights(query).view(N, Len_q, self.n_heads, self.n_levels * self.n_points)
121 |         attention_weights = F.softmax(attention_weights, -1).view(N, Len_q, self.n_heads, self.n_levels, self.n_points)
122 |         
123 |         #print(attention_weights.shape)
124 |             
125 |         if reference_points.shape[-1] == 2:
126 |             
127 |             """
128 |             For each BEV query, it owns `num_Z_anchors` in 3D space that having different heights.
129 |             After proejcting, each BEV query has `num_Z_anchors` reference points in each 2D image.
130 |             For each referent point, we sample `num_points` sampling points.
131 |             For `num_Z_anchors` reference points,  it has overall `num_points * num_Z_anchors` sampling points.
132 |             """
133 |             
134 |             offset_normalizer = torch.stack([input_spatial_shapes[..., 1], input_spatial_shapes[..., 0]], -1)
135 |             #print(offset_normalizer)
136 |             
137 |             ##added by me : this is where sampling points are obtained in a manner that works with SCA
138 |             N, Len_q, num_Z_anchors,xy = reference_points.shape
139 |             #print(num_Z_anchors)
140 |             reference_points = reference_points[:, :, None, None, None, :, :]
141 |             #print(reference_points.shape)
142 |             #print(sampling_offsets.shape)
143 |             sampling_offsets = sampling_offsets / \
144 |                 offset_normalizer[None, None, None, :, None, :]
145 |             #print(sampling_offsets.shape)
146 |             N, Len_q, n_heads,n_levels,num_all_points,xy = sampling_offsets.shape
147 |             #print(num_all_points)
148 |             #print(num_Z_anchors)
149 |             sampling_offsets = sampling_offsets.view(
150 |                 N, Len_q, n_heads, n_levels, num_all_points // num_Z_anchors, num_Z_anchors, xy)
151 | 
152 | 
153 |             sampling_locations = reference_points + sampling_offsets
154 |             N, Len_q,n_heads,n_levels, n_points,num_Z_anchors,xy = sampling_locations.shape
155 |             
156 |             
157 |             assert num_all_points == n_points*num_Z_anchors
158 |             
159 |             sampling_locations = sampling_locations.view(N,Len_q,n_heads,n_levels,num_all_points,xy)
160 |             
161 |             ## commented by me: this is original Deformable attention
162 |             # sampling_locations = reference_points[:, :, None, :, None, :] \
163 |             #                      + sampling_offsets / offset_normalizer[None, None, None, :, None, :]
164 |         elif reference_points.shape[-1] == 4:
165 |             ## commented by me: this is original Deformable attention
166 |             # sampling_locations = reference_points[:, :, None, :, None, :2] \
167 |             #                      + sampling_offsets / self.n_points * reference_points[:, :, None, :, None, 2:] * 0.5
168 |             assert False
169 |         else:
170 |             raise ValueError(
171 |                 'Last dim of reference_points must be 2 or 4, but get {} instead.'.format(reference_points.shape[-1]))
172 |         #print(value.dtype, input_spatial_shapes.type,input_level_start_index.shape,sampling_locations.type,attention_weights.type,type(self.im2col_step))
173 |         # output = MultiScaleDeformableAttnFunction_fp32.apply(
174 |         #     value, input_spatial_shapes, input_level_start_index, sampling_locations, attention_weights, self.im2col_step)
175 | 
176 |         output = ms_deform_attn_core_pytorch(value,input_spatial_shapes,sampling_locations,attention_weights)
177 |         
178 |         if not self.batch_first:
179 |             output = output.permute(1, 0, 2)
180 |         #print(output.shape)    
181 |         #output = self.output_proj(output) ##TODO: BEVFormer does not have this
182 |         return output
183 | 


--------------------------------------------------------------------------------
/F2BEV_code/F2BEV/train_f2bev_attn_st_seg.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Tue Dec 13 16:44:04 2022
  5 | 
  6 | @author: Ekta
  7 | """
  8 | 
  9 | import os
 10 | os.environ["CUDA_VISIBLE_DEVICES"]="0"
 11 | import torch,ntpath,random
 12 | from torch import nn, optim
 13 | import numpy as np
 14 | from loader_single_task import UnityImageDataset
 15 | 
 16 | 
 17 | import fnmatch
 18 | from torch.utils.data import DataLoader
 19 | from model_f2bev_attn_st_seg import FisheyeBEVFormer
 20 | import torchvision.transforms as T
 21 | from losses.focal import BinaryFocalLoss, CrossEntropyFocalLoss #FocalLoss
 22 | 
 23 | def numpy_sigmoid(x):
 24 |     return 1/(1 + np.exp(-x))
 25 | 
 26 | 
 27 | 
 28 | def gamma_correction(image):
 29 |     gamma = random.choice([0.8,1.0,1.2,1.4])
 30 |     return T.functional.adjust_gamma(image,gamma,gain = 1)
 31 | 
 32 | def plt_pred_image(pred):
 33 |     p = pred.detach().cpu().numpy()
 34 |     #zero = numpy_sigmoid(p[0,:,:])
 35 |     one = numpy_sigmoid(p[1,:,:])
 36 |     two = numpy_sigmoid(p[2,:,:])
 37 |     
 38 |     show = np.zeros((p.shape[1],p.shape[2]))
 39 |     show[np.where(one > 0.5)] = 1
 40 |     show[np.where(two > 0.5)] = 2
 41 |     
 42 |     return show
 43 |     
 44 |     
 45 | num_data_sequences = 20
 46 | 
 47 | 
 48 | bev_dirs = ['./data/images'+str(i)+'/train/seg/bev' for i in range(num_data_sequences)]
 49 | front_dirs = ['./data/images'+str(i)+'/train/rgb/front' for i in range(num_data_sequences)]
 50 | left_dirs = ['./data/images'+str(i)+'/train/rgb/left' for i in range(num_data_sequences)]
 51 | rear_dirs = ['./data/images'+str(i)+'/train/rgb/rear' for i in range(num_data_sequences)]
 52 | right_dirs = ['./data/images'+str(i)+'/train/rgb/right' for i in range(num_data_sequences)]
 53 | config_dirs = ['./data/images'+str(i)+'/train/cameraconfig' for i in range(num_data_sequences)]
 54 |     
 55 | seq_len = 3
 56 | 
 57 | image_lists = []
 58 | datalengths = []
 59 | 
 60 | for bev_dir in bev_dirs:
 61 |     names = fnmatch.filter(os.listdir(bev_dir), '*.png')
 62 |     
 63 |     files = []
 64 |     for name in names:
 65 |         files.append(os.path.splitext(ntpath.basename(name))[0])
 66 |         
 67 |         filelist = sorted(files,key=int)
 68 |         
 69 |     image_lists.append([f + '.png' for f in filelist])
 70 |     datalengths.append(len(names))
 71 | 
 72 | 
 73 | vtransforms = T.Compose([T.Resize((540,640))])   #,GammaCorrectionTransform())
 74 | 
 75 | transforms = T.Compose([T.Resize((540,640)), T.ColorJitter(brightness=.3, hue=.3, contrast = 0.2, saturation = 0.3),  T.Lambda(gamma_correction) ])   #,GammaCorrectionTransform())
 76 | target_transforms = T.Compose([T.Resize((50,50)),T.Grayscale(num_output_channels=1)])
 77 |  
 78 |    
 79 | training_data = UnityImageDataset(bev_dirs = bev_dirs,front_dirs=front_dirs,left_dirs=left_dirs,rear_dirs=rear_dirs,right_dirs=right_dirs,image_lists=image_lists,config_dirs=config_dirs,seq_len= seq_len,datalengths = datalengths,num_data_sequences = num_data_sequences, transform = transforms,target_transform=target_transforms)
 80 | 
 81 | 
 82 | vbev_dirs = ['./data/images'+str(i)+'/val/seg/bev' for i in range(num_data_sequences)]
 83 | vfront_dirs = ['./data/images'+str(i)+'/val/rgb/front' for i in range(num_data_sequences)]
 84 | vleft_dirs = ['./data/images'+str(i)+'/val/rgb/left' for i in range(num_data_sequences)]
 85 | vrear_dirs = ['./data/images'+str(i)+'/val/rgb/rear' for i in range(num_data_sequences)]
 86 | vright_dirs = ['./data/images'+str(i)+'/val/rgb/right' for i in range(num_data_sequences)]
 87 | vconfig_dirs = ['./data/images'+str(i)+'/val/cameraconfig' for i in range(num_data_sequences)]
 88 | 
 89 | vimage_lists = []
 90 | vdatalengths = []
 91 | 
 92 | for vbev_dir in vbev_dirs:
 93 |     vnames = fnmatch.filter(os.listdir(vbev_dir), '*.png')
 94 |     
 95 |     vfiles = []
 96 |     for vname in vnames:
 97 |         vfiles.append(os.path.splitext(ntpath.basename(vname))[0])
 98 |         
 99 |         vfilelist = sorted(vfiles,key=int)
100 |         
101 |     vimage_lists.append([f + '.png' for f in vfilelist])
102 |     vdatalengths.append(len(vnames))
103 | 
104 | val_data = UnityImageDataset(bev_dirs = vbev_dirs,front_dirs=vfront_dirs,left_dirs=vleft_dirs,rear_dirs=vrear_dirs,right_dirs=vright_dirs,image_lists=vimage_lists,config_dirs=vconfig_dirs,seq_len= seq_len,datalengths = vdatalengths, num_data_sequences = num_data_sequences, transform = vtransforms,target_transform=target_transforms)
105 | 
106 | 
107 | train_dataloader = DataLoader(training_data, batch_size = 2, shuffle=True)
108 | val_dataloader = DataLoader(val_data, batch_size=2, shuffle=False)
109 | 
110 | 
111 | 
112 | random_seed = 1 # or any of your favorite number
113 | torch.manual_seed(random_seed)
114 | torch.cuda.manual_seed(random_seed)
115 | torch.backends.cudnn.deterministic = True
116 | torch.backends.cudnn.benchmark = False
117 | 
118 | 
119 |         
120 |         
121 | def train(epoch,model,train_dataloader,seq_len, criterion,optimizer):
122 | 
123 |     model.train()
124 | 
125 |     train_loss = 0
126 |     num_batches = len(train_dataloader)
127 |     # with torch.autograd.detect_anomaly():
128 |     for batch_idx, (dataseq, targetseq) in enumerate(train_dataloader):
129 |         
130 |         inp_img_seq, can_buses_seq = dataseq
131 |         #print(len(inp_img_seq))
132 |         bs = targetseq[0].shape[0]
133 |         for ctr in range(seq_len):
134 |             front = inp_img_seq[ctr][0]
135 |             left = inp_img_seq[ctr][1]
136 |             rear = inp_img_seq[ctr][2]
137 |             right = inp_img_seq[ctr][3]
138 |             
139 |             target = targetseq[ctr]
140 |             
141 |             front = front.to(device)
142 |             left = left.to(device)
143 |             rear = rear.to(device)
144 |             right = right.to(device)
145 |             
146 |             
147 |                         
148 |             target = torch.squeeze(target,dim=1)
149 |             idx0 = torch.where(target <= 0.02)
150 |             target[idx0] = 10
151 |             idx1 = torch.where(target <= 0.07)
152 |             target[idx1] = 11
153 |             idx2 = torch.where(target <= 0.22)
154 |             target[idx2] = 12
155 |             idx3 = torch.where(target <= 0.60)
156 |             target[idx3] = 13
157 |             idx4 = torch.where(target <= 1)
158 |             target[idx4] = 14
159 |             target = target - 10
160 |             target = target.to(torch.int64).to(device)
161 | 
162 |             can_buses = can_buses_seq[ctr]
163 | 
164 |             if ctr == 0:
165 |                 prev_bev = None
166 |                 
167 |             optimizer.zero_grad()
168 |             output, inter_outputs, prev_bev_org = model(front,left,rear,right, list(can_buses),prev_bev)
169 |             
170 |             prev_bev = prev_bev_org.detach()
171 |             
172 |             loss = criterion(output, target)
173 | 
174 |             for inter_stage in inter_outputs:
175 |                 loss += criterion(inter_stage,target)
176 | 
177 |             loss.backward()
178 |             optimizer.step()
179 |             
180 |             train_loss += loss.data
181 |             print('Train Epoch: {} [{}/{} Srno: {} ({:.0f}%)]\tLoss: {:.6f}'.format(
182 |                     epoch, (batch_idx + 1) * 1, len(train_dataloader), ctr,
183 |                     100. * (batch_idx + 1) / len(train_dataloader), loss.data))
184 |             
185 |     train_loss/= num_batches*seq_len
186 |     
187 |     print(f"Train Error: Avg loss: {train_loss:>8f} \n")    
188 | 
189 |     return train_loss    
190 | 
191 | 
192 | def val(epoch,test_dataloader,seq_len,model,loss_fn):
193 |     num_batches = len(test_dataloader)
194 |     model.eval()
195 |     test_loss = 0
196 |     with torch.no_grad():
197 |         for batch_idx, (dataseq, targetseq) in enumerate(test_dataloader):
198 |             inp_img_seq, can_buses_seq = dataseq
199 |             bs = targetseq[0].shape[0]
200 |             for ctr in range(seq_len):
201 |                 front = inp_img_seq[ctr][0]
202 |                 left = inp_img_seq[ctr][1]
203 |                 rear = inp_img_seq[ctr][2]
204 |                 right = inp_img_seq[ctr][3]
205 |                 
206 |                 target = targetseq[ctr]
207 |                 
208 |                 front = front.to(device)
209 |                 left = left.to(device)
210 |                 rear = rear.to(device)
211 |                 right = right.to(device)
212 |                 
213 |                 target = torch.squeeze(target,dim=1)
214 |                 idx0 = torch.where(target <= 0.02)
215 |                 target[idx0] = 10
216 |                 idx1 = torch.where(target <= 0.07)
217 |                 target[idx1] = 11
218 |                 idx2 = torch.where(target <= 0.22)
219 |                 target[idx2] = 12
220 |                 idx3 = torch.where(target <= 0.60)
221 |                 target[idx3] = 13
222 |                 idx4 = torch.where(target <= 1)
223 |                 target[idx4] = 14
224 |                 target = target - 10
225 |                 target = target.to(torch.int64).to(device)
226 | 
227 |                 can_buses = can_buses_seq[ctr]
228 |                 if ctr == 0:
229 |                     prev_bev = None
230 |                 pred, inter_pred, prev_bev = model(front,left,rear,right, list(can_buses),prev_bev)
231 |                 
232 |                 test_loss += loss_fn(pred, target).item()
233 |                 
234 |         test_loss/= num_batches*seq_len
235 |     
236 |     print(f"Test Error: Avg loss: {test_loss:>8f} \n")    
237 | 
238 |     return test_loss
239 | 
240 | 
241 | device = "cuda" if torch.cuda.is_available() else "cpu"
242 | #device = "cpu"
243 | print(f"Using {device} device")
244 | 
245 | model = FisheyeBEVFormer().to(device)
246 | 
247 | optimizer = optim.AdamW(model.parameters(), lr=0.0002)
248 | criterionCE = nn.CrossEntropyLoss()
249 | criterionFocal = CrossEntropyFocalLoss()
250 | 
251 | n_epochs = 1
252 | 
253 | PATH = './f2bev_attn_st_seg.pt'
254 | min_val_loss = np.inf
255 | min_epoch = n_epochs
256 | for epoch in range(n_epochs):
257 |     train_loss = train(epoch,model,train_dataloader,seq_len,criterionCE,optimizer)
258 |     val_loss = val(epoch,val_dataloader,seq_len,model,criterionCE)
259 | 
260 |     if val_loss < min_val_loss:
261 |         min_epoch = epoch
262 |         min_val_loss = val_loss        
263 |         torch.save({'epoch' : epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict' : optimizer.state_dict(), 'loss': val_loss}, PATH)
264 |     else:
265 |         if epoch > min_epoch:
266 |             break
267 | 


--------------------------------------------------------------------------------
/F2BEV_code/f2bev_conda_env.yml:
--------------------------------------------------------------------------------
  1 | name: f2bev
  2 | channels:
  3 |   - pytorch
  4 |   - defaults
  5 | dependencies:
  6 |   - _libgcc_mutex=0.1=main
  7 |   - _openmp_mutex=5.1=1_gnu
  8 |   - alabaster=0.7.12=py37_0
  9 |   - arrow=1.2.3=py37h06a4308_0
 10 |   - astroid=2.11.7=py37h06a4308_0
 11 |   - atomicwrites=1.4.0=py_0
 12 |   - attrs=22.1.0=py37h06a4308_0
 13 |   - autopep8=1.6.0=pyhd3eb1b0_1
 14 |   - babel=2.9.1=pyhd3eb1b0_0
 15 |   - backcall=0.2.0=pyhd3eb1b0_0
 16 |   - beautifulsoup4=4.11.1=py37h06a4308_0
 17 |   - binaryornot=0.4.4=pyhd3eb1b0_1
 18 |   - black=22.6.0=py37h06a4308_0
 19 |   - blas=1.0=mkl
 20 |   - bleach=4.1.0=pyhd3eb1b0_0
 21 |   - bottleneck=1.3.5=py37h7deecbd_0
 22 |   - brotli=1.0.9=h5eee18b_7
 23 |   - brotli-bin=1.0.9=h5eee18b_7
 24 |   - brotlipy=0.7.0=py37h27cfd23_1003
 25 |   - bzip2=1.0.8=h7b6447c_0
 26 |   - ca-certificates=2023.01.10=h06a4308_0
 27 |   - certifi=2022.12.7=py37h06a4308_0
 28 |   - cffi=1.15.1=py37h74dc2b5_0
 29 |   - chardet=4.0.0=py37h06a4308_1003
 30 |   - charset-normalizer=2.0.4=pyhd3eb1b0_0
 31 |   - click=8.0.4=py37h06a4308_0
 32 |   - cloudpickle=2.0.0=pyhd3eb1b0_0
 33 |   - colorama=0.4.5=py37h06a4308_0
 34 |   - cookiecutter=1.7.3=pyhd3eb1b0_0
 35 |   - cryptography=37.0.1=py37h9ce1e76_0
 36 |   - cudatoolkit=10.2.89=hfd86e86_1
 37 |   - cycler=0.11.0=pyhd3eb1b0_0
 38 |   - dbus=1.13.18=hb2f20db_0
 39 |   - debugpy=1.5.1=py37h295c915_0
 40 |   - decorator=5.1.1=pyhd3eb1b0_0
 41 |   - defusedxml=0.7.1=pyhd3eb1b0_0
 42 |   - diff-match-patch=20200713=pyhd3eb1b0_0
 43 |   - dill=0.3.6=py37h06a4308_0
 44 |   - docutils=0.17.1=py37h06a4308_1
 45 |   - entrypoints=0.4=py37h06a4308_0
 46 |   - expat=2.4.4=h295c915_0
 47 |   - ffmpeg=4.3=hf484d3e_0
 48 |   - flake8=4.0.1=pyhd3eb1b0_1
 49 |   - fontconfig=2.13.1=h6c09931_0
 50 |   - fonttools=4.25.0=pyhd3eb1b0_0
 51 |   - freetype=2.11.0=h70c0345_0
 52 |   - giflib=5.2.1=h7b6447c_0
 53 |   - glib=2.69.1=h4ff587b_1
 54 |   - gmp=6.2.1=h295c915_3
 55 |   - gnutls=3.6.15=he1e5248_0
 56 |   - gst-plugins-base=1.14.0=h8213a91_2
 57 |   - gstreamer=1.14.0=h28cd5cc_2
 58 |   - icu=58.2=he6710b0_3
 59 |   - idna=3.3=pyhd3eb1b0_0
 60 |   - imagesize=1.4.1=py37h06a4308_0
 61 |   - importlib_metadata=3.10.0=hd3eb1b0_0
 62 |   - importlib_resources=5.2.0=pyhd3eb1b0_1
 63 |   - inflection=0.5.1=py37h06a4308_0
 64 |   - intel-openmp=2021.4.0=h06a4308_3561
 65 |   - intervaltree=3.1.0=pyhd3eb1b0_0
 66 |   - ipykernel=6.15.2=py37h06a4308_0
 67 |   - ipython=7.31.1=py37h06a4308_1
 68 |   - ipython_genutils=0.2.0=pyhd3eb1b0_1
 69 |   - isort=5.9.3=pyhd3eb1b0_0
 70 |   - jedi=0.18.1=py37h06a4308_1
 71 |   - jeepney=0.7.1=pyhd3eb1b0_0
 72 |   - jellyfish=0.9.0=py37h7f8727e_0
 73 |   - jinja2=3.1.2=py37h06a4308_0
 74 |   - jinja2-time=0.2.0=pyhd3eb1b0_3
 75 |   - jpeg=9e=h7f8727e_0
 76 |   - jsonschema=4.16.0=py37h06a4308_0
 77 |   - jupyter_client=7.3.4=py37h06a4308_0
 78 |   - jupyter_core=4.11.2=py37h06a4308_0
 79 |   - jupyterlab_pygments=0.1.2=py_0
 80 |   - keyring=23.4.0=py37h06a4308_0
 81 |   - kiwisolver=1.4.2=py37h295c915_0
 82 |   - krb5=1.19.2=hac12032_0
 83 |   - lame=3.100=h7b6447c_0
 84 |   - lazy-object-proxy=1.6.0=py37h27cfd23_0
 85 |   - lcms2=2.12=h3be6417_0
 86 |   - ld_impl_linux-64=2.38=h1181459_1
 87 |   - lerc=3.0=h295c915_0
 88 |   - libbrotlicommon=1.0.9=h5eee18b_7
 89 |   - libbrotlidec=1.0.9=h5eee18b_7
 90 |   - libbrotlienc=1.0.9=h5eee18b_7
 91 |   - libclang=10.0.1=default_hb85057a_2
 92 |   - libdeflate=1.8=h7f8727e_5
 93 |   - libedit=3.1.20210910=h7f8727e_0
 94 |   - libevent=2.1.12=h8f2d780_0
 95 |   - libffi=3.3=he6710b0_2
 96 |   - libgcc-ng=11.2.0=h1234567_1
 97 |   - libgomp=11.2.0=h1234567_1
 98 |   - libiconv=1.16=h7f8727e_2
 99 |   - libidn2=2.3.2=h7f8727e_0
100 |   - libllvm10=10.0.1=hbcb73fb_5
101 |   - libpng=1.6.37=hbc83047_0
102 |   - libpq=12.9=h16c4e8d_3
103 |   - libsodium=1.0.18=h7b6447c_0
104 |   - libspatialindex=1.9.3=h2531618_0
105 |   - libstdcxx-ng=11.2.0=h1234567_1
106 |   - libtasn1=4.16.0=h27cfd23_0
107 |   - libtiff=4.4.0=hecacb30_0
108 |   - libunistring=0.9.10=h27cfd23_0
109 |   - libuuid=1.0.3=h7f8727e_2
110 |   - libwebp=1.2.2=h55f646e_0
111 |   - libwebp-base=1.2.2=h7f8727e_0
112 |   - libxcb=1.15=h7f8727e_0
113 |   - libxkbcommon=1.0.1=hfa300c1_0
114 |   - libxml2=2.9.14=h74e7548_0
115 |   - libxslt=1.1.35=h4e12654_0
116 |   - lxml=4.9.1=py37h1edc446_0
117 |   - lz4-c=1.9.3=h295c915_1
118 |   - markupsafe=2.1.1=py37h7f8727e_0
119 |   - matplotlib=3.5.2=py37h06a4308_0
120 |   - matplotlib-base=3.5.2=py37hf590b9c_0
121 |   - matplotlib-inline=0.1.6=py37h06a4308_0
122 |   - mccabe=0.7.0=pyhd3eb1b0_0
123 |   - mistune=0.8.4=py37h14c3975_1001
124 |   - mkl=2021.4.0=h06a4308_640
125 |   - mkl-service=2.4.0=py37h7f8727e_0
126 |   - mkl_fft=1.3.1=py37hd3c417c_0
127 |   - mkl_random=1.2.2=py37h51133e4_0
128 |   - munkres=1.1.4=py_0
129 |   - mypy_extensions=0.4.3=py37h06a4308_1
130 |   - nbclient=0.5.13=py37h06a4308_0
131 |   - nbconvert=6.5.4=py37h06a4308_0
132 |   - nbformat=5.7.0=py37h06a4308_0
133 |   - ncurses=6.3=h5eee18b_3
134 |   - nest-asyncio=1.5.5=py37h06a4308_0
135 |   - nettle=3.7.3=hbbd107a_1
136 |   - nspr=4.33=h295c915_0
137 |   - nss=3.74=h0370c37_0
138 |   - numexpr=2.8.4=py37he184ba9_0
139 |   - numpy=1.21.5=py37h6c91a56_3
140 |   - numpy-base=1.21.5=py37ha15fc14_3
141 |   - numpydoc=1.5.0=py37h06a4308_0
142 |   - openh264=2.1.1=h4ff587b_0
143 |   - openssl=1.1.1s=h7f8727e_0
144 |   - packaging=21.3=pyhd3eb1b0_0
145 |   - pandas=1.3.5=py37h8c16a72_0
146 |   - pandocfilters=1.5.0=pyhd3eb1b0_0
147 |   - parso=0.8.3=pyhd3eb1b0_0
148 |   - pathspec=0.9.0=py37h06a4308_0
149 |   - pcre=8.45=h295c915_0
150 |   - pexpect=4.8.0=pyhd3eb1b0_3
151 |   - pickleshare=0.7.5=pyhd3eb1b0_1003
152 |   - pillow=9.2.0=py37hace64e9_1
153 |   - pip=22.1.2=py37h06a4308_0
154 |   - pkgutil-resolve-name=1.3.10=py37h06a4308_0
155 |   - platformdirs=2.5.2=py37h06a4308_0
156 |   - pluggy=1.0.0=py37h06a4308_1
157 |   - ply=3.11=py37_0
158 |   - poyo=0.5.0=pyhd3eb1b0_0
159 |   - prompt-toolkit=3.0.20=pyhd3eb1b0_0
160 |   - psutil=5.9.0=py37h5eee18b_0
161 |   - ptyprocess=0.7.0=pyhd3eb1b0_2
162 |   - pycodestyle=2.8.0=pyhd3eb1b0_0
163 |   - pycparser=2.21=pyhd3eb1b0_0
164 |   - pydocstyle=6.1.1=pyhd3eb1b0_0
165 |   - pyflakes=2.4.0=pyhd3eb1b0_0
166 |   - pygments=2.11.2=pyhd3eb1b0_0
167 |   - pylint=2.14.5=py37h06a4308_0
168 |   - pyls-spyder=0.4.0=pyhd3eb1b0_0
169 |   - pyopenssl=22.0.0=pyhd3eb1b0_0
170 |   - pyparsing=3.0.9=py37h06a4308_0
171 |   - pyqt=5.15.7=py37h6a678d5_1
172 |   - pyqt5-sip=12.11.0=py37h6a678d5_1
173 |   - pyqtwebengine=5.15.7=py37h6a678d5_1
174 |   - pyrsistent=0.18.0=py37heee7806_0
175 |   - pysocks=1.7.1=py37_1
176 |   - python=3.7.13=h12debd9_0
177 |   - python-dateutil=2.8.2=pyhd3eb1b0_0
178 |   - python-fastjsonschema=2.16.2=py37h06a4308_0
179 |   - python-lsp-black=1.2.1=py37h06a4308_0
180 |   - python-lsp-jsonrpc=1.0.0=pyhd3eb1b0_0
181 |   - python-lsp-server=1.5.0=py37h06a4308_0
182 |   - python-slugify=5.0.2=pyhd3eb1b0_0
183 |   - pytorch=1.12.1=py3.7_cuda10.2_cudnn7.6.5_0
184 |   - pytorch-mutex=1.0=cuda
185 |   - pytz=2022.1=py37h06a4308_0
186 |   - pyxdg=0.27=pyhd3eb1b0_0
187 |   - pyyaml=6.0=py37h5eee18b_1
188 |   - pyzmq=23.2.0=py37h6a678d5_0
189 |   - qdarkstyle=3.0.2=pyhd3eb1b0_0
190 |   - qstylizer=0.1.10=pyhd3eb1b0_0
191 |   - qt-main=5.15.2=h327a75a_7
192 |   - qt-webengine=5.15.9=hd2b0992_4
193 |   - qtawesome=1.0.3=pyhd3eb1b0_0
194 |   - qtconsole=5.3.2=py37h06a4308_0
195 |   - qtpy=2.2.0=py37h06a4308_0
196 |   - qtwebkit=5.212=h4eab89a_4
197 |   - readline=8.1.2=h7f8727e_1
198 |   - requests=2.28.1=py37h06a4308_0
199 |   - rope=0.22.0=pyhd3eb1b0_0
200 |   - rtree=0.9.7=py37h06a4308_1
201 |   - secretstorage=3.3.1=py37h06a4308_0
202 |   - setuptools=63.4.1=py37h06a4308_0
203 |   - sip=6.6.2=py37h6a678d5_0
204 |   - six=1.16.0=pyhd3eb1b0_1
205 |   - snowballstemmer=2.2.0=pyhd3eb1b0_0
206 |   - sortedcontainers=2.4.0=pyhd3eb1b0_0
207 |   - soupsieve=2.3.2.post1=py37h06a4308_0
208 |   - sphinx=4.2.0=pyhd3eb1b0_1
209 |   - sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0
210 |   - sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0
211 |   - sphinxcontrib-htmlhelp=2.0.0=pyhd3eb1b0_0
212 |   - sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0
213 |   - sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0
214 |   - sphinxcontrib-serializinghtml=1.1.5=pyhd3eb1b0_0
215 |   - spyder=5.3.3=py37h06a4308_0
216 |   - spyder-kernels=2.3.3=py37h06a4308_0
217 |   - sqlite=3.39.2=h5082296_0
218 |   - text-unidecode=1.3=pyhd3eb1b0_0
219 |   - textdistance=4.2.1=pyhd3eb1b0_0
220 |   - three-merge=0.1.1=pyhd3eb1b0_0
221 |   - tinycss=0.4=pyhd3eb1b0_1002
222 |   - tinycss2=1.2.1=py37h06a4308_0
223 |   - tk=8.6.12=h1ccaba5_0
224 |   - toml=0.10.2=pyhd3eb1b0_0
225 |   - tomli=2.0.1=py37h06a4308_0
226 |   - tomlkit=0.11.1=py37h06a4308_0
227 |   - torchaudio=0.12.1=py37_cu102
228 |   - torchvision=0.13.1=py37_cu102
229 |   - tornado=6.1=py37h27cfd23_0
230 |   - traitlets=5.7.1=py37h06a4308_0
231 |   - typed-ast=1.4.3=py37h7f8727e_1
232 |   - typing-extensions=4.3.0=py37h06a4308_0
233 |   - typing_extensions=4.3.0=py37h06a4308_0
234 |   - ujson=5.4.0=py37h6a678d5_0
235 |   - unidecode=1.2.0=pyhd3eb1b0_0
236 |   - urllib3=1.26.11=py37h06a4308_0
237 |   - watchdog=2.1.6=py37h06a4308_0
238 |   - wcwidth=0.2.5=pyhd3eb1b0_0
239 |   - webencodings=0.5.1=py37_1
240 |   - whatthepatch=1.0.2=py37h06a4308_0
241 |   - wheel=0.37.1=pyhd3eb1b0_0
242 |   - wrapt=1.14.1=py37h5eee18b_0
243 |   - wurlitzer=3.0.2=py37h06a4308_0
244 |   - xz=5.2.5=h7f8727e_1
245 |   - yaml=0.2.5=h7b6447c_0
246 |   - yapf=0.31.0=pyhd3eb1b0_0
247 |   - zeromq=4.3.4=h2531618_0
248 |   - zlib=1.2.12=h7f8727e_2
249 |   - zstd=1.5.2=ha4553b6_0
250 |   - pip:
251 |     - absl-py==1.2.0
252 |     - aiohttp==3.8.1
253 |     - aiosignal==1.2.0
254 |     - antlr4-python3-runtime==4.9.3
255 |     - async-timeout==4.0.2
256 |     - asynctest==0.13.0
257 |     - cachetools==5.2.0
258 |     - cython==0.29.32
259 |     - filelock==3.9.0
260 |     - frozenlist==1.3.1
261 |     - fsspec==2022.8.2
262 |     - google-auth==2.11.1
263 |     - google-auth-oauthlib==0.4.6
264 |     - grpcio==1.48.1
265 |     - huggingface-hub==0.11.1
266 |     - importlib-metadata==4.12.0
267 |     - markdown==3.4.1
268 |     - multidict==6.0.2
269 |     - oauthlib==3.2.1
270 |     - omegaconf==2.3.0
271 |     - opencv-contrib-python==4.5.4.60
272 |     - opencv-python==3.4.2.17
273 |     - protobuf==3.19.5
274 |     - pyasn1==0.4.8
275 |     - pyasn1-modules==0.2.8
276 |     - pycocotools==2.0.6
277 |     - pydeprecate==0.3.2
278 |     - pytorch-lightning==1.7.6
279 |     - requests-oauthlib==1.3.1
280 |     - rsa==4.9
281 |     - scipy==1.7.3
282 |     - tensorboard==2.10.0
283 |     - tensorboard-data-server==0.6.1
284 |     - tensorboard-plugin-wit==1.8.1
285 |     - timm==0.6.12
286 |     - torchmetrics==0.9.3
287 |     - torchsummary==1.5.1
288 |     - tqdm==4.64.0
289 |     - werkzeug==2.2.2
290 |     - yarl==1.8.1
291 |     - zipp==3.8.1
292 | prefix: /home/smartslab/anaconda3/envs/pytorch
293 | 


--------------------------------------------------------------------------------