├── README.md
├── assets
└── overview.png
├── bsg_vln
├── datasets
│ └── vln_data_path.txt
└── map_nav_src
│ ├── models
│ ├── graph_utils.py
│ ├── model_bev.py
│ ├── ops.py
│ ├── transformer.py
│ ├── vilmodel_bev.py
│ └── vlnbert_bev_init.py
│ ├── r2r
│ ├── agent_base.py
│ ├── agent_bev.py
│ ├── data_utils.py
│ ├── env_bev.py
│ ├── main_bevnew.py
│ └── parser.py
│ ├── scripts
│ └── r2r_bev.sh
│ └── utils
│ ├── data.py
│ ├── distributed.py
│ ├── logger.py
│ ├── misc.py
│ └── ops.py
└── mp3dbev
├── data
├── mp3d_test.pkl
├── mp3d_train.pkl
├── mp3d_valtest.pkl
└── mp3d_valunseen.pkl
├── projects
├── __init__.py
├── configs
│ ├── _base_
│ │ ├── datasets
│ │ │ ├── coco_instance.py
│ │ │ ├── kitti-3d-3class.py
│ │ │ ├── kitti-3d-car.py
│ │ │ ├── lyft-3d.py
│ │ │ ├── nuim_instance.py
│ │ │ ├── nus-3d.py
│ │ │ ├── nus-mono3d.py
│ │ │ ├── range100_lyft-3d.py
│ │ │ ├── s3dis-3d-5class.py
│ │ │ ├── s3dis_seg-3d-13class.py
│ │ │ ├── scannet-3d-18class.py
│ │ │ ├── scannet_seg-3d-20class.py
│ │ │ ├── sunrgbd-3d-10class.py
│ │ │ ├── waymoD5-3d-3class.py
│ │ │ └── waymoD5-3d-car.py
│ │ ├── default_runtime.py
│ │ ├── models
│ │ │ ├── 3dssd.py
│ │ │ ├── cascade_mask_rcnn_r50_fpn.py
│ │ │ ├── centerpoint_01voxel_second_secfpn_nus.py
│ │ │ ├── centerpoint_02pillar_second_secfpn_nus.py
│ │ │ ├── fcos3d.py
│ │ │ ├── groupfree3d.py
│ │ │ ├── h3dnet.py
│ │ │ ├── hv_pointpillars_fpn_lyft.py
│ │ │ ├── hv_pointpillars_fpn_nus.py
│ │ │ ├── hv_pointpillars_fpn_range100_lyft.py
│ │ │ ├── hv_pointpillars_secfpn_kitti.py
│ │ │ ├── hv_pointpillars_secfpn_waymo.py
│ │ │ ├── hv_second_secfpn_kitti.py
│ │ │ ├── hv_second_secfpn_waymo.py
│ │ │ ├── imvotenet_image.py
│ │ │ ├── mask_rcnn_r50_fpn.py
│ │ │ ├── paconv_cuda_ssg.py
│ │ │ ├── paconv_ssg.py
│ │ │ ├── parta2.py
│ │ │ ├── pointnet2_msg.py
│ │ │ ├── pointnet2_ssg.py
│ │ │ └── votenet.py
│ │ └── schedules
│ │ │ ├── cosine.py
│ │ │ ├── cyclic_20e.py
│ │ │ ├── cyclic_40e.py
│ │ │ ├── mmdet_schedule_1x.py
│ │ │ ├── schedule_2x.py
│ │ │ ├── schedule_3x.py
│ │ │ ├── seg_cosine_150e.py
│ │ │ ├── seg_cosine_200e.py
│ │ │ └── seg_cosine_50e.py
│ └── bevformer
│ │ ├── getbev.py
│ │ └── mp3dbev.py
└── mmdet3d_plugin
│ ├── __init__.py
│ ├── __pycache__
│ └── __init__.cpython-38.pyc
│ ├── bevformer
│ ├── __init__.py
│ ├── __pycache__
│ │ └── __init__.cpython-38.pyc
│ ├── apis
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ ├── mmdet_train.cpython-38.pyc
│ │ │ ├── test.cpython-38.pyc
│ │ │ └── train.cpython-38.pyc
│ │ ├── mmdet_train.py
│ │ ├── test.py
│ │ └── train.py
│ ├── dense_heads
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ ├── bevformer_head.cpython-38.pyc
│ │ │ └── bevformer_headmp.cpython-38.pyc
│ │ └── bevformer_headmp.py
│ ├── detectors
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ ├── bevformer.cpython-38.pyc
│ │ │ ├── bevformer_fp16.cpython-38.pyc
│ │ │ └── bevformermp.cpython-38.pyc
│ │ └── bevformermp.py
│ ├── hooks
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ └── custom_hooks.cpython-38.pyc
│ │ └── custom_hooks.py
│ ├── modules
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ ├── custom_base_transformer_layer.cpython-38.pyc
│ │ │ ├── decoder.cpython-38.pyc
│ │ │ ├── encoder.cpython-38.pyc
│ │ │ ├── multi_scale_deformable_attn_function.cpython-38.pyc
│ │ │ ├── spatial_cross_attention.cpython-38.pyc
│ │ │ ├── temporal_self_attention.cpython-38.pyc
│ │ │ └── transformer.cpython-38.pyc
│ │ ├── custom_base_transformer_layer.py
│ │ ├── decoder.py
│ │ ├── encoder.py
│ │ ├── multi_scale_deformable_attn_function.py
│ │ ├── spatial_cross_attention.py
│ │ ├── temporal_self_attention.py
│ │ └── transformer.py
│ └── runner
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ └── epoch_based_runner.cpython-38.pyc
│ │ └── epoch_based_runner.py
│ ├── core
│ ├── bbox
│ │ ├── __pycache__
│ │ │ └── util.cpython-38.pyc
│ │ ├── assigners
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-38.pyc
│ │ │ │ └── hungarian_assigner_3d.cpython-38.pyc
│ │ │ └── hungarian_assigner_3d.py
│ │ ├── coders
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-38.pyc
│ │ │ │ └── nms_free_coder.cpython-38.pyc
│ │ │ └── nms_free_coder.py
│ │ ├── match_costs
│ │ │ ├── __init__.py
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-38.pyc
│ │ │ │ └── match_cost.cpython-38.pyc
│ │ │ └── match_cost.py
│ │ └── util.py
│ └── evaluation
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ └── eval_hooks.cpython-38.pyc
│ │ ├── eval_hooks.py
│ │ └── kitti2waymo.py
│ ├── datasets
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── builder.cpython-38.pyc
│ │ ├── indoor_eval.cpython-38.pyc
│ │ ├── mp3d_dataset.cpython-38.pyc
│ │ ├── nuscenes_dataset.cpython-38.pyc
│ │ └── nuscnes_eval.cpython-38.pyc
│ ├── builder.py
│ ├── indoor_eval.py
│ ├── mp3d_dataset.py
│ ├── pipelines
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ │ ├── __init__.cpython-38.pyc
│ │ │ ├── compose.cpython-38.pyc
│ │ │ ├── formating.cpython-38.pyc
│ │ │ └── transform_3d.cpython-38.pyc
│ │ ├── compose.py
│ │ ├── formating.py
│ │ ├── loading.py
│ │ └── transform_3d.py
│ └── samplers
│ │ ├── __init__.py
│ │ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── distributed_sampler.cpython-38.pyc
│ │ ├── group_sampler.cpython-38.pyc
│ │ └── sampler.cpython-38.pyc
│ │ ├── distributed_sampler.py
│ │ ├── group_sampler.py
│ │ └── sampler.py
│ └── models
│ ├── backbones
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ └── vovnet.cpython-38.pyc
│ └── vovnet.py
│ ├── hooks
│ ├── __init__.py
│ └── hooks.py
│ ├── opt
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ └── adamw.cpython-38.pyc
│ └── adamw.py
│ └── utils
│ ├── __init__.py
│ ├── __pycache__
│ ├── __init__.cpython-38.pyc
│ ├── bricks.cpython-38.pyc
│ ├── grid_mask.cpython-38.pyc
│ ├── position_embedding.cpython-38.pyc
│ └── visual.cpython-38.pyc
│ ├── bricks.py
│ ├── grid_mask.py
│ ├── position_embedding.py
│ └── visual.py
└── tools
├── analysis_tools
├── __init__.py
├── analyze_logs.py
├── benchmark.py
├── get_params.py
└── visual.py
├── create_data.py
├── data_converter
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-38.pyc
│ ├── create_gt_database.cpython-38.pyc
│ ├── indoor_converter.cpython-38.pyc
│ ├── kitti_converter.cpython-38.pyc
│ ├── kitti_data_utils.cpython-38.pyc
│ ├── lyft_converter.cpython-38.pyc
│ └── nuscenes_converter.cpython-38.pyc
├── create_gt_database.py
├── indoor_converter.py
├── kitti_converter.py
├── kitti_data_utils.py
├── lyft_converter.py
├── lyft_data_fixer.py
├── nuimage_converter.py
├── nuscenes_converter.py
├── s3dis_data_utils.py
├── scannet_data_utils.py
├── sunrgbd_data_utils.py
└── waymo_converter.py
├── dist_test.sh
├── dist_train.sh
├── fp16
├── dist_train.sh
└── train.py
├── misc
├── browse_dataset.py
├── fuse_conv_bn.py
├── print_config.py
└── visualize_results.py
├── model_converters
├── convert_votenet_checkpoints.py
├── publish_model.py
└── regnet2mmdet.py
├── test.py
└── train.py
/README.md:
--------------------------------------------------------------------------------
1 | # Bird’s-Eye-View Scene Graph for Vision-Language Navigation
2 |
3 | 
4 |
5 | > This repository is an official PyTorch implementation of paper:
6 | > [Bird’s-Eye-View Scene Graph for Vision-Language Navigation](https://arxiv.org/abs/2308.04758).
7 | > ICCV 2023. ([arXiv 2308.04758](https://arxiv.org/abs/2308.04758))
8 |
9 |
10 | ## Abstract
11 | Vision-language navigation (VLN), which entails an agent to navigate 3D environments following human instructions, has shown great advances. However, current agents are built upon panoramic observations, which hinders their ability to perceive 3D scene geometry and easily leads to ambiguous selection of panoramic view. To address these limitations, we present a BEV Scene Graph (BSG), which leverages multi-step BEV representations to encode scene layouts and geometric cues of indoor environment under the supervision of 3D detection. During navigation, BSG builds a local BEV representation at each step and maintains a BEV-based global scene map, which stores and organizes all the online collected local BEV representations according to their topological relations. Based on BSG, the agent predicts a local BEV grid-level decision score and a global graph-level decision score, combined with a sub-view selection score on panoramic views, for more accurate action prediction. Our approach significantly outperforms state-of-the-art methods on REVERIE, R2R, and R4R, showing the potential of BEV perception in VLN.
12 |
13 | ## Installation
14 | The implementation of BEV Detection is built on [MMDetection3D v0.17.1](https://github.com/open-mmlab/mmdetection3d). Please follow [BEVFormer](https://github.com/fundamentalvision/BEVFormer) for installation.
15 |
16 | The implementation of VLN is built on the latest version of [Matterport3D simulators](https://github.com/peteanderson80/Matterport3DSimulator):
17 | ```
18 | export PYTHONPATH=Matterport3DSimulator/build:$PYTHONPATH
19 | ```
20 |
21 | Many thanks to the contributors for their great efforts.
22 |
23 | ## Dataset Preparation
24 | The dataset is based on indoor RGB images from [Matterport3D](https://niessner.github.io/Matterport/). Please fill and sign the [Terms of Use](http://kaldir.vc.in.tum.de/matterport/MP_TOS.pdf) agreement form and send it to matterport3d@googlegroups.com to request access to the dataset.
25 |
26 | Note that we use the undistorted_color_images for BEV Detection. Camera parameters (word-to-pixel matrix) are from undistorted_camera_parameters. The 3D box annotations can be available in mp3dbev/data. For VLN, please follow [VLN-DUET](https://github.com/cshizhe/VLN-DUET) for more details, including processed annotations, features and pretrained models of REVERIE, R2R and R4R datasets.
27 |
28 |
29 |
30 | ## Extracting Features
31 | Please follow the [scripts](https://github.com/cshizhe/VLN-HAMT/tree/main/preprocess) to extract visual features for both undistorted_color_images (for BEV Detection) and matterport_skybox_images (for VLN, optional). Note that all the ViT features of undistorted_color_images should be used (not only the [CLS] token, about 130 GB). Please note this line since different version of [timm](https://github.com/huggingface/pytorch-image-models) models have different output:
32 | ```
33 | b_fts = model.forward_features(images[k: k+args.batch_size])
34 | ```
35 |
36 | ## BEV Detection
37 | ```shell
38 | cd mp3dbev/
39 | # multi-gpu train
40 | CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=${PORT:id} ./tools/dist_train.sh ./projects/configs/bevformer/mp3dbev.py 4
41 |
42 | # multi-gpu test
43 | CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=${PORT:id} ./tools/dist_test.sh ./projects/configs/bevformer/mp3dbev.py ./path/to/ckpts.pth 4
44 |
45 | # inference for BEV features
46 | CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=${PORT:id} ./tools/dist_test.sh ./projects/configs/bevformer/getbev.py ./path/to/ckpts.pth 4
47 | ```
48 | Please also see train and inference for the detailed [usage](https://github.com/open-mmlab/mmdetection3d) of MMDetection3D.
49 |
50 | ## VLN Training
51 | ```shell
52 | cd bsg_vln
53 | # train & infer
54 | cd map_nav_src
55 | bash scripts/run_bev.sh
56 | ```
57 |
58 | ## Citation
59 |
60 | If you find BSG useful or inspiring, please consider citing our paper:
61 |
62 | ```bibtex
63 | @inproceedings{liu2023bird,
64 | title={Bird's-Eye-View Scene Graph for Vision-Language Navigation},
65 | author={Liu, Rui and Wang, Xiaohan and Wang, Wenguan and Yang, Yi},
66 | booktitle={ICCV},
67 | pages={10968--10980},
68 | year={2023}
69 | }
70 | ```
71 |
72 | ## Acknowledgement
73 | We thank the developers of these excellent open source projects: [MMDetection3D](https://github.com/open-mmlab/mmdetection3d), [BEVFormer](https://github.com/fundamentalvision/BEVFormer/tree/master), [DUET](https://github.com/cshizhe/VLN-DUET), [HAMT](https://github.com/cshizhe/VLN-HAMT), [ETPNav](https://github.com/MarSaKi/ETPNav), [MP3D Simulator](https://github.com/peteanderson80/Matterport3DSimulator), [VLNBERT](https://github.com/YicongHong/Recurrent-VLN-BERT). Many thanks to the reviewers for their valuable comments.
74 |
75 | ## Contact
76 | This repository is currently maintained by [Rui Liu](mailto:rui.liu@zju.edu.cn).
77 |
--------------------------------------------------------------------------------
/assets/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/assets/overview.png
--------------------------------------------------------------------------------
/bsg_vln/datasets/vln_data_path.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/bsg_vln/datasets/vln_data_path.txt
--------------------------------------------------------------------------------
/bsg_vln/map_nav_src/models/model_bev.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import collections
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | from transformers import BertPreTrainedModel
9 |
10 | from .vlnbert_bev_init import get_vlnbert_models
11 |
12 | class VLNBert(nn.Module):
13 | def __init__(self, args):
14 | super().__init__()
15 | print('\nInitalizing the VLN-BERT model ...')
16 | self.args = args
17 |
18 | self.vln_bert = get_vlnbert_models(args, config=None) # initialize the VLN-BERT
19 | self.drop_env = nn.Dropout(p=args.feat_dropout)
20 |
21 | def forward(self, mode, batch):
22 | batch = collections.defaultdict(lambda: None, batch)
23 |
24 | if mode == 'language':
25 | txt_embeds = self.vln_bert(mode, batch)
26 | return txt_embeds
27 |
28 | elif mode == 'panorama':
29 | batch['view_img_fts'] = self.drop_env(batch['view_img_fts'])
30 | if 'obj_img_fts' in batch:
31 | batch['obj_img_fts'] = self.drop_env(batch['obj_img_fts'])
32 | pano_embeds, pano_masks = self.vln_bert(mode, batch)
33 | return pano_embeds, pano_masks
34 |
35 | elif mode == 'cam_fts':
36 | bev_embeds, candi_bev, candi_id, finebevembs, bevpoints, candi2bev_tmp = self.vln_bert(mode, batch)
37 | return bev_embeds, candi_bev, candi_id, finebevembs, bevpoints, candi2bev_tmp
38 |
39 | elif mode == 'bev_fts':
40 | bevvp_embeds, bevvp_masks = self.vln_bert(mode, batch)
41 | return bevvp_embeds, bevvp_masks
42 |
43 | elif mode == 'navigation':
44 | outs = self.vln_bert(mode, batch)
45 | return outs
46 |
47 |
48 | else:
49 | raise NotImplementedError('wrong mode: %s'%mode)
50 |
51 |
52 | class Critic(nn.Module):
53 | def __init__(self, args):
54 | super(Critic, self).__init__()
55 | self.state2value = nn.Sequential(
56 | nn.Linear(768, 512),
57 | nn.ReLU(),
58 | nn.Dropout(args.dropout),
59 | nn.Linear(512, 1),
60 | )
61 |
62 | def forward(self, state):
63 | return self.state2value(state).squeeze()
64 |
--------------------------------------------------------------------------------
/bsg_vln/map_nav_src/models/ops.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .transformer import TransformerEncoder, TransformerEncoderLayer
4 |
5 | try:
6 | from apex.normalization.fused_layer_norm import FusedLayerNorm as BertLayerNorm
7 | except (ImportError, AttributeError) as e:
8 | # logger.info("Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .")
9 | BertLayerNorm = torch.nn.LayerNorm
10 |
11 | def create_transformer_encoder(config, num_layers, norm=False):
12 | enc_layer = TransformerEncoderLayer(
13 | config.hidden_size, config.num_attention_heads,
14 | dim_feedforward=config.intermediate_size,
15 | dropout=config.hidden_dropout_prob,
16 | activation=config.hidden_act,
17 | normalize_before=True
18 | )
19 | if norm:
20 | norm_layer = BertLayerNorm(config.hidden_size, eps=1e-12)
21 | else:
22 | norm_layer = None
23 | return TransformerEncoder(enc_layer, num_layers, norm=norm_layer, batch_first=True)
24 |
25 | def extend_neg_masks(masks, dtype=None):
26 | """
27 | mask from (N, L) into (N, 1(H), 1(L), L) and make it negative
28 | """
29 | if dtype is None:
30 | dtype = torch.float
31 | extended_masks = masks.unsqueeze(1).unsqueeze(2)
32 | extended_masks = extended_masks.to(dtype=dtype)
33 | extended_masks = (1.0 - extended_masks) * -10000.0
34 | return extended_masks
35 |
36 | def gen_seq_masks(seq_lens, max_len=None):
37 | if max_len is None:
38 | max_len = max(seq_lens)
39 | batch_size = len(seq_lens)
40 | device = seq_lens.device
41 |
42 | masks = torch.arange(max_len).unsqueeze(0).repeat(batch_size, 1).to(device)
43 | masks = masks < seq_lens.unsqueeze(1)
44 | return masks
45 |
46 | def pad_tensors_wgrad(tensors, lens=None):
47 | """B x [T, ...] torch tensors"""
48 | if lens is None:
49 | lens = [t.size(0) for t in tensors]
50 | max_len = max(lens)
51 | batch_size = len(tensors)
52 | hid = list(tensors[0].size()[1:])
53 |
54 | device = tensors[0].device
55 | dtype = tensors[0].dtype
56 |
57 | output = []
58 | for i in range(batch_size):
59 | if lens[i] < max_len:
60 | tmp = torch.cat(
61 | [tensors[i], torch.zeros([max_len-lens[i]]+hid, dtype=dtype).to(device)],
62 | dim=0
63 | )
64 | else:
65 | tmp = tensors[i]
66 | output.append(tmp)
67 | output = torch.stack(output, 0)
68 | return output
69 |
70 | def findindex(bev_grid, localgrid):
71 | x_grid, y_grid = torch.meshgrid(
72 | torch.arange(bev_grid).cuda(),
73 | torch.arange(bev_grid).cuda(),
74 | indexing='ij'
75 | )
76 | xy_grid = torch.stack((x_grid, y_grid), -1).view(-1, 2).cuda()
77 | center_grid = torch.tensor([bev_grid //2, bev_grid //2]).cuda()
78 | center_grid = center_grid[None].repeat(bev_grid ** 2, 1)
79 | index = torch.pow(xy_grid - center_grid, 2).sum(-1).sort(-1)[1][:localgrid**2].cuda()
80 | #!!! index.sort()[0] instead of index
81 | return index.sort()[0]
--------------------------------------------------------------------------------
/bsg_vln/map_nav_src/models/vlnbert_bev_init.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def get_tokenizer(args):
5 | from transformers import AutoTokenizer
6 | if args.tokenizer == 'xlm':
7 | cfg_name = 'xlm-roberta-base'
8 | else:
9 | cfg_name = 'bert-base-uncased'
10 | tokenizer = AutoTokenizer.from_pretrained(cfg_name)
11 | return tokenizer
12 |
13 | def get_vlnbert_models(args, config=None):
14 |
15 | from transformers import PretrainedConfig
16 | from models.vilmodel_bev import GlocalTextPathNavCMT
17 |
18 | model_name_or_path = args.bert_ckpt_file
19 | new_ckpt_weights = {}
20 | if model_name_or_path is not None:
21 | ckpt_weights = torch.load(model_name_or_path)
22 | for k, v in ckpt_weights.items():
23 | if k.startswith('module'):
24 | k = k[7:]
25 | if '_head' in k or 'sap_fuse' in k:
26 | new_ckpt_weights['bert.' + k] = v
27 | else:
28 | new_ckpt_weights[k] = v
29 |
30 | resume_path = args.resume_file
31 | if resume_path is not None:
32 | ckpt_weights = torch.load(resume_path)
33 | for k, v in ckpt_weights['vln_bert']['state_dict'].items():
34 | if k.startswith('vln_bert'):
35 | new_ckpt_weights[k[9:]] = v
36 | else:
37 | new_ckpt_weights[k] = v
38 |
39 | if args.tokenizer == 'xlm':
40 | cfg_name = 'xlm-roberta-base'
41 | else:
42 | cfg_name = 'bert-base-uncased'
43 | vis_config = PretrainedConfig.from_pretrained(cfg_name)
44 |
45 | if args.tokenizer == 'xlm':
46 | vis_config.type_vocab_size = 2
47 |
48 | vis_config.max_action_steps = 100
49 | vis_config.image_feat_size = args.image_feat_size
50 | vis_config.angle_feat_size = args.angle_feat_size
51 | vis_config.obj_feat_size = args.obj_feat_size
52 | vis_config.obj_loc_size = 3
53 | vis_config.num_l_layers = args.num_l_layers
54 | vis_config.num_pano_layers = args.num_pano_layers
55 | vis_config.num_x_layers = args.num_x_layers
56 | vis_config.graph_sprels = args.graph_sprels
57 | vis_config.glocal_fuse = args.fusion == 'dynamic'
58 |
59 | vis_config.fix_lang_embedding = args.fix_lang_embedding
60 | vis_config.fix_pano_embedding = args.fix_pano_embedding
61 | vis_config.fix_local_branch = args.fix_local_branch
62 |
63 | vis_config.update_lang_bert = not args.fix_lang_embedding
64 | vis_config.output_attentions = True
65 | vis_config.pred_head_dropout_prob = 0.1
66 | vis_config.use_lang2visn_attn = False
67 |
68 | visual_model = GlocalTextPathNavCMT.from_pretrained(
69 | pretrained_model_name_or_path=None,
70 | config=vis_config,
71 | state_dict=new_ckpt_weights)
72 |
73 | return visual_model
74 |
--------------------------------------------------------------------------------
/bsg_vln/map_nav_src/r2r/data_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import numpy as np
4 |
5 | def load_instr_datasets(anno_dir, dataset, splits, tokenizer, is_test=True):
6 | data = []
7 | for split in splits:
8 | if "/" not in split: # the official splits
9 | if tokenizer == 'bert':
10 | filepath = os.path.join(anno_dir, '%s_%s_enc.json' % (dataset.upper(), split))
11 | elif tokenizer == 'xlm':
12 | filepath = os.path.join(anno_dir, '%s_%s_enc_xlmr.json' % (dataset.upper(), split))
13 | else:
14 | raise NotImplementedError('unspported tokenizer %s' % tokenizer)
15 |
16 | with open(filepath) as f:
17 | new_data = json.load(f)
18 |
19 | if split == 'val_train_seen':
20 | new_data = new_data[:50]
21 |
22 | if not is_test:
23 | if dataset == 'r4r' and split == 'val_unseen':
24 | ridxs = np.random.permutation(len(new_data))[:200]
25 | new_data = [new_data[ridx] for ridx in ridxs]
26 | else: # augmented data
27 | print('\nLoading augmented data %s for pretraining...' % os.path.basename(split))
28 | with open(split) as f:
29 | new_data = json.load(f)
30 | # Join
31 | data += new_data
32 | return data
33 |
34 | def construct_instrs(anno_dir, dataset, splits, tokenizer, max_instr_len=512, is_test=True):
35 | data = []
36 | for i, item in enumerate(load_instr_datasets(anno_dir, dataset, splits, tokenizer, is_test=is_test)):
37 | # Split multiple instructions into separate entries
38 | for j, instr in enumerate(item['instructions']):
39 | new_item = dict(item)
40 | new_item['instr_id'] = '%s_%d' % (item['path_id'], j)
41 | new_item['instruction'] = instr
42 | new_item['instr_encoding'] = item['instr_encodings'][j][:max_instr_len]
43 | del new_item['instructions']
44 | del new_item['instr_encodings']
45 | data.append(new_item)
46 | return data
--------------------------------------------------------------------------------
/bsg_vln/map_nav_src/scripts/r2r_bev.sh:
--------------------------------------------------------------------------------
1 | DATA_ROOT=../datasets
2 | train_alg=dagger
3 | features=vitbase
4 | ft_dim=768
5 | obj_features=vitbase
6 | obj_ft_dim=768
7 | ngpus=1
8 | seed=0
9 | mode=train # train or test or try
10 |
11 | # setting 2
12 | bev_range=5.0
13 |
14 | seed=0
15 | bev_grid=11
16 | name=r2r_bev_${bev_range}_${bev_grid}_${mode}
17 | candi2bevdir=path_of_candi2bev.json
18 | outdir=${DATA_ROOT}/R2R/exprs_map/finetune/${name}
19 |
20 | flag="--root_dir ${DATA_ROOT}
21 | --dataset r2r
22 | --output_dir ${outdir}
23 | --world_size ${ngpus}
24 | --seed ${seed}
25 | --tokenizer bert
26 |
27 | --bev_weight 0.50
28 | --bev_range ${bev_range}
29 | --bev_height 3.0
30 |
31 |
32 | --bevfeaturepath path_to_bev_feature
33 | --bev_grid ${bev_grid}
34 |
35 | --bevglobal
36 | --candi2bev_dir ${candi2bevdir}
37 |
38 | --enc_full_graph
39 | --graph_sprels
40 | --fusion dynamic
41 |
42 | --expert_policy spl
43 | --train_alg ${train_alg}
44 |
45 |
46 | --num_l_layers 9
47 | --num_x_layers 4
48 | --num_pano_layers 2
49 |
50 | --max_action_len 15
51 | --max_instr_len 200
52 |
53 | --batch_size 8
54 | --lr 1e-5
55 | --iters 200000
56 | --log_every 1000
57 | --optim adamW
58 |
59 | --features ${features}
60 | --image_feat_size ${ft_dim}
61 | --angle_feat_size 4
62 |
63 | --ml_weight 0.2
64 |
65 | --feat_dropout 0.4
66 | --dropout 0.5
67 |
68 | --gamma 0."
69 |
70 | # train
71 | CUDA_VISIBLE_DEVICES='1' python r2r/main_bevnew.py $flag \
72 | --tokenizer bert \
73 | --bert_ckpt_file ../ckpts/model_step_97500.pt \
74 | --eval_first
75 |
76 | # test
77 | # CUDA_VISIBLE_DEVICES='0' python r2r/main_bevnew.py $flag \
78 | # --tokenizer bert \
79 | # --resume_file ../datasets/R2R/trained_models/ \
80 | # --test --submit
--------------------------------------------------------------------------------
/bsg_vln/map_nav_src/utils/data.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import jsonlines
4 | import h5py
5 | import networkx as nx
6 | import math
7 | import numpy as np
8 |
9 | class ImageFeaturesDB(object):
10 | def __init__(self, img_ft_file, image_feat_size):
11 | self.image_feat_size = image_feat_size
12 | self.img_ft_file = img_ft_file
13 | self._feature_store = {}
14 |
15 | def get_image_feature(self, scan, viewpoint):
16 | key = '%s_%s' % (scan, viewpoint)
17 | if key in self._feature_store:
18 | ft = self._feature_store[key]
19 | else:
20 | with h5py.File(self.img_ft_file, 'r') as f:
21 | ft = f[key][...][:, :self.image_feat_size].astype(np.float32)
22 | self._feature_store[key] = ft
23 | return ft
24 |
25 | class CamFeatures(object):
26 | def __init__(self, img_ft_file, image_feat_size=768):
27 | self.image_feat_size = image_feat_size
28 | self.img_ft_file = img_ft_file
29 | self._feature_store = {}
30 |
31 | def get_image_feature(self, scan, viewpoint, cam_id, deg):
32 | key = '%s_%s_i%s_%s'%(scan, viewpoint, cam_id, deg)
33 | if key in self._feature_store:
34 | ft = self._feature_store[key]
35 | else:
36 | with h5py.File(self.img_ft_file, 'r') as f:
37 | ft = f[key][:, 1:, :self.image_feat_size].astype(np.float32)
38 | self._feature_store[key] = ft
39 | return ft
40 |
41 | def get_multi_images_feature(self, scan, viewpoint, cams=False):
42 | camfeats = []
43 | if cams:
44 | for cam_id in range(3):
45 | for deg in range(6):
46 | camfeats.append(self.get_image_feature(scan, viewpoint, cam_id, deg))
47 | else:
48 | for deg in range(6):
49 | camfeats.append(self.get_image_feature(scan, viewpoint, '1', deg))
50 | return np.array(camfeats)
51 |
52 |
53 |
54 | def load_nav_graphs(connectivity_dir, scans):
55 | ''' Load connectivity graph for each scan '''
56 |
57 | def distance(pose1, pose2):
58 | ''' Euclidean distance between two graph poses '''
59 | return ((pose1['pose'][3]-pose2['pose'][3])**2\
60 | + (pose1['pose'][7]-pose2['pose'][7])**2\
61 | + (pose1['pose'][11]-pose2['pose'][11])**2)**0.5
62 |
63 | graphs = {}
64 | for scan in scans:
65 | with open(os.path.join(connectivity_dir, '%s_connectivity.json' % scan)) as f:
66 | G = nx.Graph()
67 | positions = {}
68 | data = json.load(f)
69 | for i,item in enumerate(data):
70 | if item['included']:
71 | for j,conn in enumerate(item['unobstructed']):
72 | if conn and data[j]['included']:
73 | positions[item['image_id']] = np.array([item['pose'][3],
74 | item['pose'][7], item['pose'][11]]);
75 | assert data[j]['unobstructed'][i], 'Graph should be undirected'
76 | G.add_edge(item['image_id'],data[j]['image_id'],weight=distance(item,data[j]))
77 | nx.set_node_attributes(G, values=positions, name='position')
78 | graphs[scan] = G
79 | return graphs
80 |
81 | def new_simulator(connectivity_dir, scan_data_dir=None):
82 | import MatterSim
83 |
84 | # Simulator image parameters
85 | WIDTH = 640
86 | HEIGHT = 480
87 | VFOV = 60
88 |
89 | sim = MatterSim.Simulator()
90 | if scan_data_dir:
91 | sim.setDatasetPath(scan_data_dir)
92 | sim.setNavGraphPath(connectivity_dir)
93 | sim.setRenderingEnabled(False)
94 | sim.setCameraResolution(WIDTH, HEIGHT)
95 | sim.setCameraVFOV(math.radians(VFOV))
96 | sim.setDiscretizedViewingAngles(True)
97 | sim.setBatchSize(1)
98 | sim.initialize()
99 |
100 | return sim
101 |
102 | def angle_feature(heading, elevation, angle_feat_size):
103 | return np.array(
104 | [math.sin(heading), math.cos(heading), math.sin(elevation), math.cos(elevation)] * (angle_feat_size // 4),
105 | dtype=np.float32)
106 |
107 | def get_point_angle_feature(sim, angle_feat_size, baseViewId=0):
108 | feature = np.empty((36, angle_feat_size), np.float32)
109 | base_heading = (baseViewId % 12) * math.radians(30)
110 | base_elevation = (baseViewId // 12 - 1) * math.radians(30)
111 |
112 | for ix in range(36):
113 | if ix == 0:
114 | sim.newEpisode(['ZMojNkEp431'], ['2f4d90acd4024c269fb0efe49a8ac540'], [0], [math.radians(-30)])
115 | elif ix % 12 == 0:
116 | sim.makeAction([0], [1.0], [1.0])
117 | else:
118 | sim.makeAction([0], [1.0], [0])
119 |
120 | state = sim.getState()[0]
121 | assert state.viewIndex == ix
122 |
123 | heading = state.heading - base_heading
124 | elevation = state.elevation - base_elevation
125 |
126 | feature[ix, :] = angle_feature(heading, elevation, angle_feat_size)
127 | return feature
128 |
129 | def get_all_point_angle_feature(sim, angle_feat_size):
130 | return [get_point_angle_feature(sim, angle_feat_size, baseViewId) for baseViewId in range(36)]
131 |
132 |
--------------------------------------------------------------------------------
/bsg_vln/map_nav_src/utils/logger.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import math
4 | import time
5 | from collections import OrderedDict
6 |
7 |
8 | def write_to_record_file(data, file_path, verbose=True):
9 | if verbose:
10 | print(data)
11 | record_file = open(file_path, 'a')
12 | record_file.write(data+'\n')
13 | record_file.close()
14 |
15 |
16 | def asMinutes(s):
17 | m = math.floor(s / 60)
18 | s -= m * 60
19 | return '%dm %ds' % (m, s)
20 |
21 | def timeSince(since, percent):
22 | now = time.time()
23 | s = now - since
24 | es = s / (percent)
25 | rs = es - s
26 | return '%s (- %s)' % (asMinutes(s), asMinutes(rs))
27 |
28 | class Timer:
29 | def __init__(self):
30 | self.cul = OrderedDict()
31 | self.start = {}
32 | self.iter = 0
33 |
34 | def reset(self):
35 | self.cul = OrderedDict()
36 | self.start = {}
37 | self.iter = 0
38 |
39 | def tic(self, key):
40 | self.start[key] = time.time()
41 |
42 | def toc(self, key):
43 | delta = time.time() - self.start[key]
44 | if key not in self.cul:
45 | self.cul[key] = delta
46 | else:
47 | self.cul[key] += delta
48 |
49 | def step(self):
50 | self.iter += 1
51 |
52 | def show(self):
53 | total = sum(self.cul.values())
54 | for key in self.cul:
55 | print("%s, total time %0.2f, avg time %0.2f, part of %0.2f" %
56 | (key, self.cul[key], self.cul[key]*1./self.iter, self.cul[key]*1./total))
57 | print(total / self.iter)
58 |
59 |
60 | def print_progress(iteration, total, prefix='', suffix='', decimals=1, bar_length=100):
61 | """
62 | Call in a loop to create terminal progress bar
63 | @params:
64 | iteration - Required : current iteration (Int)
65 | total - Required : total iterations (Int)
66 | prefix - Optional : prefix string (Str)
67 | suffix - Optional : suffix string (Str)
68 | decimals - Optional : positive number of decimals in percent complete (Int)
69 | bar_length - Optional : character length of bar (Int)
70 | """
71 | str_format = "{0:." + str(decimals) + "f}"
72 | percents = str_format.format(100 * (iteration / float(total)))
73 | filled_length = int(round(bar_length * iteration / float(total)))
74 | bar = '█' * filled_length + '-' * (bar_length - filled_length)
75 |
76 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
77 |
78 | if iteration == total:
79 | sys.stdout.write('\n')
80 | sys.stdout.flush()
81 |
--------------------------------------------------------------------------------
/bsg_vln/map_nav_src/utils/misc.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 | import torch
4 |
5 | def set_random_seed(seed):
6 | torch.manual_seed(seed)
7 | torch.cuda.manual_seed(seed)
8 | torch.cuda.manual_seed_all(seed)
9 | random.seed(seed)
10 | np.random.seed(seed)
11 |
12 | def length2mask(length, size=None):
13 | batch_size = len(length)
14 | size = int(max(length)) if size is None else size
15 | mask = (torch.arange(size, dtype=torch.int64).unsqueeze(0).repeat(batch_size, 1)
16 | > (torch.LongTensor(length) - 1).unsqueeze(1)).cuda()
17 | return mask
18 |
--------------------------------------------------------------------------------
/bsg_vln/map_nav_src/utils/ops.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | def pad_tensors(tensors, lens=None, pad=0):
5 | # no grad!!!
6 | """B x [T, ...]"""
7 | if lens is None:
8 | lens = [t.size(0) for t in tensors]
9 | max_len = max(lens)
10 | bs = len(tensors)
11 | hid = list(tensors[0].size()[1:])
12 | size = [bs, max_len] + hid
13 |
14 | dtype = tensors[0].dtype
15 | device = tensors[0].device
16 | output = torch.zeros(*size, dtype=dtype).to(device)
17 | if pad:
18 | output.data.fill_(pad)
19 | for i, (t, l) in enumerate(zip(tensors, lens)):
20 | output.data[i, :l, ...] = t.data
21 | return output
22 |
23 | def gen_seq_masks(seq_lens, max_len=None):
24 | if max_len is None:
25 | max_len = max(seq_lens)
26 |
27 | if isinstance(seq_lens, torch.Tensor):
28 | device = seq_lens.device
29 | masks = torch.arange(max_len).to(device).repeat(len(seq_lens), 1) < seq_lens.unsqueeze(1)
30 | return masks
31 |
32 | if max_len == 0:
33 | return np.zeros((len(seq_lens), 0), dtype=np.bool)
34 |
35 | seq_lens = np.array(seq_lens)
36 | batch_size = len(seq_lens)
37 | masks = np.arange(max_len).reshape(-1, max_len).repeat(batch_size, 0)
38 | masks = masks < seq_lens.reshape(-1, 1)
39 | return masks
--------------------------------------------------------------------------------
/mp3dbev/data/mp3d_test.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/data/mp3d_test.pkl
--------------------------------------------------------------------------------
/mp3dbev/data/mp3d_train.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/data/mp3d_train.pkl
--------------------------------------------------------------------------------
/mp3dbev/data/mp3d_valtest.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/data/mp3d_valtest.pkl
--------------------------------------------------------------------------------
/mp3dbev/data/mp3d_valunseen.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/data/mp3d_valunseen.pkl
--------------------------------------------------------------------------------
/mp3dbev/projects/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/__init__.py
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/datasets/coco_instance.py:
--------------------------------------------------------------------------------
1 | dataset_type = 'CocoDataset'
2 | data_root = 'data/coco/'
3 | img_norm_cfg = dict(
4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
5 | train_pipeline = [
6 | dict(type='LoadImageFromFile'),
7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
8 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
9 | dict(type='RandomFlip', flip_ratio=0.5),
10 | dict(type='Normalize', **img_norm_cfg),
11 | dict(type='Pad', size_divisor=32),
12 | dict(type='DefaultFormatBundle'),
13 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
14 | ]
15 | test_pipeline = [
16 | dict(type='LoadImageFromFile'),
17 | dict(
18 | type='MultiScaleFlipAug',
19 | img_scale=(1333, 800),
20 | flip=False,
21 | transforms=[
22 | dict(type='Resize', keep_ratio=True),
23 | dict(type='RandomFlip'),
24 | dict(type='Normalize', **img_norm_cfg),
25 | dict(type='Pad', size_divisor=32),
26 | dict(type='ImageToTensor', keys=['img']),
27 | dict(type='Collect', keys=['img']),
28 | ])
29 | ]
30 | data = dict(
31 | samples_per_gpu=2,
32 | workers_per_gpu=2,
33 | train=dict(
34 | type=dataset_type,
35 | ann_file=data_root + 'annotations/instances_train2017.json',
36 | img_prefix=data_root + 'train2017/',
37 | pipeline=train_pipeline),
38 | val=dict(
39 | type=dataset_type,
40 | ann_file=data_root + 'annotations/instances_val2017.json',
41 | img_prefix=data_root + 'val2017/',
42 | pipeline=test_pipeline),
43 | test=dict(
44 | type=dataset_type,
45 | ann_file=data_root + 'annotations/instances_val2017.json',
46 | img_prefix=data_root + 'val2017/',
47 | pipeline=test_pipeline))
48 | evaluation = dict(metric=['bbox', 'segm'])
49 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/datasets/kitti-3d-3class.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'KittiDataset'
3 | data_root = 'data/kitti/'
4 | class_names = ['Pedestrian', 'Cyclist', 'Car']
5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1]
6 | input_modality = dict(use_lidar=True, use_camera=False)
7 | db_sampler = dict(
8 | data_root=data_root,
9 | info_path=data_root + 'kitti_dbinfos_train.pkl',
10 | rate=1.0,
11 | prepare=dict(
12 | filter_by_difficulty=[-1],
13 | filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
14 | classes=class_names,
15 | sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6))
16 |
17 | file_client_args = dict(backend='disk')
18 | # Uncomment the following if use ceph or other file clients.
19 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
20 | # for more details.
21 | # file_client_args = dict(
22 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
23 |
24 | train_pipeline = [
25 | dict(
26 | type='LoadPointsFromFile',
27 | coord_type='LIDAR',
28 | load_dim=4,
29 | use_dim=4,
30 | file_client_args=file_client_args),
31 | dict(
32 | type='LoadAnnotations3D',
33 | with_bbox_3d=True,
34 | with_label_3d=True,
35 | file_client_args=file_client_args),
36 | dict(type='ObjectSample', db_sampler=db_sampler),
37 | dict(
38 | type='ObjectNoise',
39 | num_try=100,
40 | translation_std=[1.0, 1.0, 0.5],
41 | global_rot_range=[0.0, 0.0],
42 | rot_range=[-0.78539816, 0.78539816]),
43 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
44 | dict(
45 | type='GlobalRotScaleTrans',
46 | rot_range=[-0.78539816, 0.78539816],
47 | scale_ratio_range=[0.95, 1.05]),
48 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
49 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
50 | dict(type='PointShuffle'),
51 | dict(type='DefaultFormatBundle3D', class_names=class_names),
52 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
53 | ]
54 | test_pipeline = [
55 | dict(
56 | type='LoadPointsFromFile',
57 | coord_type='LIDAR',
58 | load_dim=4,
59 | use_dim=4,
60 | file_client_args=file_client_args),
61 | dict(
62 | type='MultiScaleFlipAug3D',
63 | img_scale=(1333, 800),
64 | pts_scale_ratio=1,
65 | flip=False,
66 | transforms=[
67 | dict(
68 | type='GlobalRotScaleTrans',
69 | rot_range=[0, 0],
70 | scale_ratio_range=[1., 1.],
71 | translation_std=[0, 0, 0]),
72 | dict(type='RandomFlip3D'),
73 | dict(
74 | type='PointsRangeFilter', point_cloud_range=point_cloud_range),
75 | dict(
76 | type='DefaultFormatBundle3D',
77 | class_names=class_names,
78 | with_label=False),
79 | dict(type='Collect3D', keys=['points'])
80 | ])
81 | ]
82 | # construct a pipeline for data and gt loading in show function
83 | # please keep its loading function consistent with test_pipeline (e.g. client)
84 | eval_pipeline = [
85 | dict(
86 | type='LoadPointsFromFile',
87 | coord_type='LIDAR',
88 | load_dim=4,
89 | use_dim=4,
90 | file_client_args=file_client_args),
91 | dict(
92 | type='DefaultFormatBundle3D',
93 | class_names=class_names,
94 | with_label=False),
95 | dict(type='Collect3D', keys=['points'])
96 | ]
97 |
98 | data = dict(
99 | samples_per_gpu=6,
100 | workers_per_gpu=4,
101 | train=dict(
102 | type='RepeatDataset',
103 | times=2,
104 | dataset=dict(
105 | type=dataset_type,
106 | data_root=data_root,
107 | ann_file=data_root + 'kitti_infos_train.pkl',
108 | split='training',
109 | pts_prefix='velodyne_reduced',
110 | pipeline=train_pipeline,
111 | modality=input_modality,
112 | classes=class_names,
113 | test_mode=False,
114 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
115 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
116 | box_type_3d='LiDAR')),
117 | val=dict(
118 | type=dataset_type,
119 | data_root=data_root,
120 | ann_file=data_root + 'kitti_infos_val.pkl',
121 | split='training',
122 | pts_prefix='velodyne_reduced',
123 | pipeline=test_pipeline,
124 | modality=input_modality,
125 | classes=class_names,
126 | test_mode=True,
127 | box_type_3d='LiDAR'),
128 | test=dict(
129 | type=dataset_type,
130 | data_root=data_root,
131 | ann_file=data_root + 'kitti_infos_val.pkl',
132 | split='training',
133 | pts_prefix='velodyne_reduced',
134 | pipeline=test_pipeline,
135 | modality=input_modality,
136 | classes=class_names,
137 | test_mode=True,
138 | box_type_3d='LiDAR'))
139 |
140 | evaluation = dict(interval=1, pipeline=eval_pipeline)
141 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/datasets/kitti-3d-car.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'KittiDataset'
3 | data_root = 'data/kitti/'
4 | class_names = ['Car']
5 | point_cloud_range = [0, -40, -3, 70.4, 40, 1]
6 | input_modality = dict(use_lidar=True, use_camera=False)
7 | db_sampler = dict(
8 | data_root=data_root,
9 | info_path=data_root + 'kitti_dbinfos_train.pkl',
10 | rate=1.0,
11 | prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
12 | classes=class_names,
13 | sample_groups=dict(Car=15))
14 |
15 | file_client_args = dict(backend='disk')
16 | # Uncomment the following if use ceph or other file clients.
17 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
18 | # for more details.
19 | # file_client_args = dict(
20 | # backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
21 |
22 | train_pipeline = [
23 | dict(
24 | type='LoadPointsFromFile',
25 | coord_type='LIDAR',
26 | load_dim=4,
27 | use_dim=4,
28 | file_client_args=file_client_args),
29 | dict(
30 | type='LoadAnnotations3D',
31 | with_bbox_3d=True,
32 | with_label_3d=True,
33 | file_client_args=file_client_args),
34 | dict(type='ObjectSample', db_sampler=db_sampler),
35 | dict(
36 | type='ObjectNoise',
37 | num_try=100,
38 | translation_std=[1.0, 1.0, 0.5],
39 | global_rot_range=[0.0, 0.0],
40 | rot_range=[-0.78539816, 0.78539816]),
41 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
42 | dict(
43 | type='GlobalRotScaleTrans',
44 | rot_range=[-0.78539816, 0.78539816],
45 | scale_ratio_range=[0.95, 1.05]),
46 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
47 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
48 | dict(type='PointShuffle'),
49 | dict(type='DefaultFormatBundle3D', class_names=class_names),
50 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
51 | ]
52 | test_pipeline = [
53 | dict(
54 | type='LoadPointsFromFile',
55 | coord_type='LIDAR',
56 | load_dim=4,
57 | use_dim=4,
58 | file_client_args=file_client_args),
59 | dict(
60 | type='MultiScaleFlipAug3D',
61 | img_scale=(1333, 800),
62 | pts_scale_ratio=1,
63 | flip=False,
64 | transforms=[
65 | dict(
66 | type='GlobalRotScaleTrans',
67 | rot_range=[0, 0],
68 | scale_ratio_range=[1., 1.],
69 | translation_std=[0, 0, 0]),
70 | dict(type='RandomFlip3D'),
71 | dict(
72 | type='PointsRangeFilter', point_cloud_range=point_cloud_range),
73 | dict(
74 | type='DefaultFormatBundle3D',
75 | class_names=class_names,
76 | with_label=False),
77 | dict(type='Collect3D', keys=['points'])
78 | ])
79 | ]
80 | # construct a pipeline for data and gt loading in show function
81 | # please keep its loading function consistent with test_pipeline (e.g. client)
82 | eval_pipeline = [
83 | dict(
84 | type='LoadPointsFromFile',
85 | coord_type='LIDAR',
86 | load_dim=4,
87 | use_dim=4,
88 | file_client_args=file_client_args),
89 | dict(
90 | type='DefaultFormatBundle3D',
91 | class_names=class_names,
92 | with_label=False),
93 | dict(type='Collect3D', keys=['points'])
94 | ]
95 |
96 | data = dict(
97 | samples_per_gpu=6,
98 | workers_per_gpu=4,
99 | train=dict(
100 | type='RepeatDataset',
101 | times=2,
102 | dataset=dict(
103 | type=dataset_type,
104 | data_root=data_root,
105 | ann_file=data_root + 'kitti_infos_train.pkl',
106 | split='training',
107 | pts_prefix='velodyne_reduced',
108 | pipeline=train_pipeline,
109 | modality=input_modality,
110 | classes=class_names,
111 | test_mode=False,
112 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
113 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
114 | box_type_3d='LiDAR')),
115 | val=dict(
116 | type=dataset_type,
117 | data_root=data_root,
118 | ann_file=data_root + 'kitti_infos_val.pkl',
119 | split='training',
120 | pts_prefix='velodyne_reduced',
121 | pipeline=test_pipeline,
122 | modality=input_modality,
123 | classes=class_names,
124 | test_mode=True,
125 | box_type_3d='LiDAR'),
126 | test=dict(
127 | type=dataset_type,
128 | data_root=data_root,
129 | ann_file=data_root + 'kitti_infos_val.pkl',
130 | split='training',
131 | pts_prefix='velodyne_reduced',
132 | pipeline=test_pipeline,
133 | modality=input_modality,
134 | classes=class_names,
135 | test_mode=True,
136 | box_type_3d='LiDAR'))
137 |
138 | evaluation = dict(interval=1, pipeline=eval_pipeline)
139 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/datasets/lyft-3d.py:
--------------------------------------------------------------------------------
1 | # If point cloud range is changed, the models should also change their point
2 | # cloud range accordingly
3 | point_cloud_range = [-80, -80, -5, 80, 80, 3]
4 | # For Lyft we usually do 9-class detection
5 | class_names = [
6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
7 | 'bicycle', 'pedestrian', 'animal'
8 | ]
9 | dataset_type = 'LyftDataset'
10 | data_root = 'data/lyft/'
11 | # Input modality for Lyft dataset, this is consistent with the submission
12 | # format which requires the information in input_modality.
13 | input_modality = dict(
14 | use_lidar=True,
15 | use_camera=False,
16 | use_radar=False,
17 | use_map=False,
18 | use_external=False)
19 | file_client_args = dict(backend='disk')
20 | # Uncomment the following if use ceph or other file clients.
21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
22 | # for more details.
23 | # file_client_args = dict(
24 | # backend='petrel',
25 | # path_mapping=dict({
26 | # './data/lyft/': 's3://lyft/lyft/',
27 | # 'data/lyft/': 's3://lyft/lyft/'
28 | # }))
29 | train_pipeline = [
30 | dict(
31 | type='LoadPointsFromFile',
32 | coord_type='LIDAR',
33 | load_dim=5,
34 | use_dim=5,
35 | file_client_args=file_client_args),
36 | dict(
37 | type='LoadPointsFromMultiSweeps',
38 | sweeps_num=10,
39 | file_client_args=file_client_args),
40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
41 | dict(
42 | type='GlobalRotScaleTrans',
43 | rot_range=[-0.3925, 0.3925],
44 | scale_ratio_range=[0.95, 1.05],
45 | translation_std=[0, 0, 0]),
46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
49 | dict(type='PointShuffle'),
50 | dict(type='DefaultFormatBundle3D', class_names=class_names),
51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
52 | ]
53 | test_pipeline = [
54 | dict(
55 | type='LoadPointsFromFile',
56 | coord_type='LIDAR',
57 | load_dim=5,
58 | use_dim=5,
59 | file_client_args=file_client_args),
60 | dict(
61 | type='LoadPointsFromMultiSweeps',
62 | sweeps_num=10,
63 | file_client_args=file_client_args),
64 | dict(
65 | type='MultiScaleFlipAug3D',
66 | img_scale=(1333, 800),
67 | pts_scale_ratio=1,
68 | flip=False,
69 | transforms=[
70 | dict(
71 | type='GlobalRotScaleTrans',
72 | rot_range=[0, 0],
73 | scale_ratio_range=[1., 1.],
74 | translation_std=[0, 0, 0]),
75 | dict(type='RandomFlip3D'),
76 | dict(
77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range),
78 | dict(
79 | type='DefaultFormatBundle3D',
80 | class_names=class_names,
81 | with_label=False),
82 | dict(type='Collect3D', keys=['points'])
83 | ])
84 | ]
85 | # construct a pipeline for data and gt loading in show function
86 | # please keep its loading function consistent with test_pipeline (e.g. client)
87 | eval_pipeline = [
88 | dict(
89 | type='LoadPointsFromFile',
90 | coord_type='LIDAR',
91 | load_dim=5,
92 | use_dim=5,
93 | file_client_args=file_client_args),
94 | dict(
95 | type='LoadPointsFromMultiSweeps',
96 | sweeps_num=10,
97 | file_client_args=file_client_args),
98 | dict(
99 | type='DefaultFormatBundle3D',
100 | class_names=class_names,
101 | with_label=False),
102 | dict(type='Collect3D', keys=['points'])
103 | ]
104 |
105 | data = dict(
106 | samples_per_gpu=2,
107 | workers_per_gpu=2,
108 | train=dict(
109 | type=dataset_type,
110 | data_root=data_root,
111 | ann_file=data_root + 'lyft_infos_train.pkl',
112 | pipeline=train_pipeline,
113 | classes=class_names,
114 | modality=input_modality,
115 | test_mode=False),
116 | val=dict(
117 | type=dataset_type,
118 | data_root=data_root,
119 | ann_file=data_root + 'lyft_infos_val.pkl',
120 | pipeline=test_pipeline,
121 | classes=class_names,
122 | modality=input_modality,
123 | test_mode=True),
124 | test=dict(
125 | type=dataset_type,
126 | data_root=data_root,
127 | ann_file=data_root + 'lyft_infos_test.pkl',
128 | pipeline=test_pipeline,
129 | classes=class_names,
130 | modality=input_modality,
131 | test_mode=True))
132 | # For Lyft dataset, we usually evaluate the model at the end of training.
133 | # Since the models are trained by 24 epochs by default, we set evaluation
134 | # interval to be 24. Please change the interval accordingly if you do not
135 | # use a default schedule.
136 | evaluation = dict(interval=24, pipeline=eval_pipeline)
137 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/datasets/nuim_instance.py:
--------------------------------------------------------------------------------
1 | dataset_type = 'CocoDataset'
2 | data_root = 'data/nuimages/'
3 | class_names = [
4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
6 | ]
7 | img_norm_cfg = dict(
8 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
9 | train_pipeline = [
10 | dict(type='LoadImageFromFile'),
11 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
12 | dict(
13 | type='Resize',
14 | img_scale=[(1280, 720), (1920, 1080)],
15 | multiscale_mode='range',
16 | keep_ratio=True),
17 | dict(type='RandomFlip', flip_ratio=0.5),
18 | dict(type='Normalize', **img_norm_cfg),
19 | dict(type='Pad', size_divisor=32),
20 | dict(type='DefaultFormatBundle'),
21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
22 | ]
23 | test_pipeline = [
24 | dict(type='LoadImageFromFile'),
25 | dict(
26 | type='MultiScaleFlipAug',
27 | img_scale=(1600, 900),
28 | flip=False,
29 | transforms=[
30 | dict(type='Resize', keep_ratio=True),
31 | dict(type='RandomFlip'),
32 | dict(type='Normalize', **img_norm_cfg),
33 | dict(type='Pad', size_divisor=32),
34 | dict(type='ImageToTensor', keys=['img']),
35 | dict(type='Collect', keys=['img']),
36 | ])
37 | ]
38 | data = dict(
39 | samples_per_gpu=2,
40 | workers_per_gpu=2,
41 | train=dict(
42 | type=dataset_type,
43 | ann_file=data_root + 'annotations/nuimages_v1.0-train.json',
44 | img_prefix=data_root,
45 | classes=class_names,
46 | pipeline=train_pipeline),
47 | val=dict(
48 | type=dataset_type,
49 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
50 | img_prefix=data_root,
51 | classes=class_names,
52 | pipeline=test_pipeline),
53 | test=dict(
54 | type=dataset_type,
55 | ann_file=data_root + 'annotations/nuimages_v1.0-val.json',
56 | img_prefix=data_root,
57 | classes=class_names,
58 | pipeline=test_pipeline))
59 | evaluation = dict(metric=['bbox', 'segm'])
60 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/datasets/nus-mono3d.py:
--------------------------------------------------------------------------------
1 | dataset_type = 'CustomNuScenesMonoDataset'
2 | data_root = 'data/nuscenes/'
3 | class_names = [
4 | 'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
5 | 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
6 | ]
7 | # Input modality for nuScenes dataset, this is consistent with the submission
8 | # format which requires the information in input_modality.
9 | input_modality = dict(
10 | use_lidar=False,
11 | use_camera=True,
12 | use_radar=False,
13 | use_map=False,
14 | use_external=False)
15 | img_norm_cfg = dict(
16 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
17 | train_pipeline = [
18 | dict(type='LoadImageFromFileMono3D'),
19 | dict(
20 | type='LoadAnnotations3D',
21 | with_bbox=True,
22 | with_label=True,
23 | with_attr_label=True,
24 | with_bbox_3d=True,
25 | with_label_3d=True,
26 | with_bbox_depth=True),
27 | dict(type='Resize', img_scale=(1600, 900), keep_ratio=True),
28 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
29 | dict(type='Normalize', **img_norm_cfg),
30 | dict(type='Pad', size_divisor=32),
31 | dict(type='DefaultFormatBundle3D', class_names=class_names),
32 | dict(
33 | type='Collect3D',
34 | keys=[
35 | 'img', 'gt_bboxes', 'gt_labels', 'attr_labels', 'gt_bboxes_3d',
36 | 'gt_labels_3d', 'centers2d', 'depths'
37 | ]),
38 | ]
39 | test_pipeline = [
40 | dict(type='LoadImageFromFileMono3D'),
41 | dict(
42 | type='MultiScaleFlipAug',
43 | scale_factor=1.0,
44 | flip=False,
45 | transforms=[
46 | dict(type='RandomFlip3D'),
47 | dict(type='Normalize', **img_norm_cfg),
48 | dict(type='Pad', size_divisor=32),
49 | dict(
50 | type='DefaultFormatBundle3D',
51 | class_names=class_names,
52 | with_label=False),
53 | dict(type='Collect3D', keys=['img']),
54 | ])
55 | ]
56 | # construct a pipeline for data and gt loading in show function
57 | # please keep its loading function consistent with test_pipeline (e.g. client)
58 | eval_pipeline = [
59 | dict(type='LoadImageFromFileMono3D'),
60 | dict(
61 | type='DefaultFormatBundle3D',
62 | class_names=class_names,
63 | with_label=False),
64 | dict(type='Collect3D', keys=['img'])
65 | ]
66 |
67 | data = dict(
68 | samples_per_gpu=2,
69 | workers_per_gpu=2,
70 | train=dict(
71 | type=dataset_type,
72 | data_root=data_root,
73 | ann_file=data_root + 'nuscenes_infos_train_mono3d.coco.json',
74 | img_prefix=data_root,
75 | classes=class_names,
76 | pipeline=train_pipeline,
77 | modality=input_modality,
78 | test_mode=False,
79 | box_type_3d='Camera'),
80 | val=dict(
81 | type=dataset_type,
82 | data_root=data_root,
83 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
84 | img_prefix=data_root,
85 | classes=class_names,
86 | pipeline=test_pipeline,
87 | modality=input_modality,
88 | test_mode=True,
89 | box_type_3d='Camera'),
90 | test=dict(
91 | type=dataset_type,
92 | data_root=data_root,
93 | ann_file=data_root + 'nuscenes_infos_val_mono3d.coco.json',
94 | img_prefix=data_root,
95 | classes=class_names,
96 | pipeline=test_pipeline,
97 | modality=input_modality,
98 | test_mode=True,
99 | box_type_3d='Camera'))
100 | evaluation = dict(interval=2)
101 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/datasets/range100_lyft-3d.py:
--------------------------------------------------------------------------------
1 | # If point cloud range is changed, the models should also change their point
2 | # cloud range accordingly
3 | point_cloud_range = [-100, -100, -5, 100, 100, 3]
4 | # For Lyft we usually do 9-class detection
5 | class_names = [
6 | 'car', 'truck', 'bus', 'emergency_vehicle', 'other_vehicle', 'motorcycle',
7 | 'bicycle', 'pedestrian', 'animal'
8 | ]
9 | dataset_type = 'LyftDataset'
10 | data_root = 'data/lyft/'
11 | # Input modality for Lyft dataset, this is consistent with the submission
12 | # format which requires the information in input_modality.
13 | input_modality = dict(
14 | use_lidar=True,
15 | use_camera=False,
16 | use_radar=False,
17 | use_map=False,
18 | use_external=False)
19 | file_client_args = dict(backend='disk')
20 | # Uncomment the following if use ceph or other file clients.
21 | # See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
22 | # for more details.
23 | # file_client_args = dict(
24 | # backend='petrel',
25 | # path_mapping=dict({
26 | # './data/lyft/': 's3://lyft/lyft/',
27 | # 'data/lyft/': 's3://lyft/lyft/'
28 | # }))
29 | train_pipeline = [
30 | dict(
31 | type='LoadPointsFromFile',
32 | coord_type='LIDAR',
33 | load_dim=5,
34 | use_dim=5,
35 | file_client_args=file_client_args),
36 | dict(
37 | type='LoadPointsFromMultiSweeps',
38 | sweeps_num=10,
39 | file_client_args=file_client_args),
40 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
41 | dict(
42 | type='GlobalRotScaleTrans',
43 | rot_range=[-0.3925, 0.3925],
44 | scale_ratio_range=[0.95, 1.05],
45 | translation_std=[0, 0, 0]),
46 | dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
47 | dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
48 | dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
49 | dict(type='PointShuffle'),
50 | dict(type='DefaultFormatBundle3D', class_names=class_names),
51 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
52 | ]
53 | test_pipeline = [
54 | dict(
55 | type='LoadPointsFromFile',
56 | coord_type='LIDAR',
57 | load_dim=5,
58 | use_dim=5,
59 | file_client_args=file_client_args),
60 | dict(
61 | type='LoadPointsFromMultiSweeps',
62 | sweeps_num=10,
63 | file_client_args=file_client_args),
64 | dict(
65 | type='MultiScaleFlipAug3D',
66 | img_scale=(1333, 800),
67 | pts_scale_ratio=1,
68 | flip=False,
69 | transforms=[
70 | dict(
71 | type='GlobalRotScaleTrans',
72 | rot_range=[0, 0],
73 | scale_ratio_range=[1., 1.],
74 | translation_std=[0, 0, 0]),
75 | dict(type='RandomFlip3D'),
76 | dict(
77 | type='PointsRangeFilter', point_cloud_range=point_cloud_range),
78 | dict(
79 | type='DefaultFormatBundle3D',
80 | class_names=class_names,
81 | with_label=False),
82 | dict(type='Collect3D', keys=['points'])
83 | ])
84 | ]
85 | # construct a pipeline for data and gt loading in show function
86 | # please keep its loading function consistent with test_pipeline (e.g. client)
87 | eval_pipeline = [
88 | dict(
89 | type='LoadPointsFromFile',
90 | coord_type='LIDAR',
91 | load_dim=5,
92 | use_dim=5,
93 | file_client_args=file_client_args),
94 | dict(
95 | type='LoadPointsFromMultiSweeps',
96 | sweeps_num=10,
97 | file_client_args=file_client_args),
98 | dict(
99 | type='DefaultFormatBundle3D',
100 | class_names=class_names,
101 | with_label=False),
102 | dict(type='Collect3D', keys=['points'])
103 | ]
104 |
105 | data = dict(
106 | samples_per_gpu=2,
107 | workers_per_gpu=2,
108 | train=dict(
109 | type=dataset_type,
110 | data_root=data_root,
111 | ann_file=data_root + 'lyft_infos_train.pkl',
112 | pipeline=train_pipeline,
113 | classes=class_names,
114 | modality=input_modality,
115 | test_mode=False),
116 | val=dict(
117 | type=dataset_type,
118 | data_root=data_root,
119 | ann_file=data_root + 'lyft_infos_val.pkl',
120 | pipeline=test_pipeline,
121 | classes=class_names,
122 | modality=input_modality,
123 | test_mode=True),
124 | test=dict(
125 | type=dataset_type,
126 | data_root=data_root,
127 | ann_file=data_root + 'lyft_infos_test.pkl',
128 | pipeline=test_pipeline,
129 | classes=class_names,
130 | modality=input_modality,
131 | test_mode=True))
132 | # For Lyft dataset, we usually evaluate the model at the end of training.
133 | # Since the models are trained by 24 epochs by default, we set evaluation
134 | # interval to be 24. Please change the interval accordingly if you do not
135 | # use a default schedule.
136 | evaluation = dict(interval=24, pipeline=eval_pipeline)
137 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/datasets/s3dis-3d-5class.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'S3DISDataset'
3 | data_root = './data/s3dis/'
4 | class_names = ('table', 'chair', 'sofa', 'bookcase', 'board')
5 | train_area = [1, 2, 3, 4, 6]
6 | test_area = 5
7 |
8 | train_pipeline = [
9 | dict(
10 | type='LoadPointsFromFile',
11 | coord_type='DEPTH',
12 | shift_height=True,
13 | load_dim=6,
14 | use_dim=[0, 1, 2, 3, 4, 5]),
15 | dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
16 | dict(type='PointSample', num_points=40000),
17 | dict(
18 | type='RandomFlip3D',
19 | sync_2d=False,
20 | flip_ratio_bev_horizontal=0.5,
21 | flip_ratio_bev_vertical=0.5),
22 | dict(
23 | type='GlobalRotScaleTrans',
24 | # following ScanNet dataset the rotation range is 5 degrees
25 | rot_range=[-0.087266, 0.087266],
26 | scale_ratio_range=[1.0, 1.0],
27 | shift_height=True),
28 | dict(type='DefaultFormatBundle3D', class_names=class_names),
29 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
30 | ]
31 | test_pipeline = [
32 | dict(
33 | type='LoadPointsFromFile',
34 | coord_type='DEPTH',
35 | shift_height=True,
36 | load_dim=6,
37 | use_dim=[0, 1, 2, 3, 4, 5]),
38 | dict(
39 | type='MultiScaleFlipAug3D',
40 | img_scale=(1333, 800),
41 | pts_scale_ratio=1,
42 | flip=False,
43 | transforms=[
44 | dict(
45 | type='GlobalRotScaleTrans',
46 | rot_range=[0, 0],
47 | scale_ratio_range=[1., 1.],
48 | translation_std=[0, 0, 0]),
49 | dict(
50 | type='RandomFlip3D',
51 | sync_2d=False,
52 | flip_ratio_bev_horizontal=0.5,
53 | flip_ratio_bev_vertical=0.5),
54 | dict(type='PointSample', num_points=40000),
55 | dict(
56 | type='DefaultFormatBundle3D',
57 | class_names=class_names,
58 | with_label=False),
59 | dict(type='Collect3D', keys=['points'])
60 | ])
61 | ]
62 | # construct a pipeline for data and gt loading in show function
63 | # please keep its loading function consistent with test_pipeline (e.g. client)
64 | eval_pipeline = [
65 | dict(
66 | type='LoadPointsFromFile',
67 | coord_type='DEPTH',
68 | shift_height=False,
69 | load_dim=6,
70 | use_dim=[0, 1, 2, 3, 4, 5]),
71 | dict(
72 | type='DefaultFormatBundle3D',
73 | class_names=class_names,
74 | with_label=False),
75 | dict(type='Collect3D', keys=['points'])
76 | ]
77 |
78 | data = dict(
79 | samples_per_gpu=8,
80 | workers_per_gpu=4,
81 | train=dict(
82 | type='RepeatDataset',
83 | times=5,
84 | dataset=dict(
85 | type='ConcatDataset',
86 | datasets=[
87 | dict(
88 | type=dataset_type,
89 | data_root=data_root,
90 | ann_file=data_root + f's3dis_infos_Area_{i}.pkl',
91 | pipeline=train_pipeline,
92 | filter_empty_gt=False,
93 | classes=class_names,
94 | box_type_3d='Depth') for i in train_area
95 | ],
96 | separate_eval=False)),
97 | val=dict(
98 | type=dataset_type,
99 | data_root=data_root,
100 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
101 | pipeline=test_pipeline,
102 | classes=class_names,
103 | test_mode=True,
104 | box_type_3d='Depth'),
105 | test=dict(
106 | type=dataset_type,
107 | data_root=data_root,
108 | ann_file=data_root + f's3dis_infos_Area_{test_area}.pkl',
109 | pipeline=test_pipeline,
110 | classes=class_names,
111 | test_mode=True,
112 | box_type_3d='Depth'))
113 |
114 | evaluation = dict(pipeline=eval_pipeline)
115 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/datasets/s3dis_seg-3d-13class.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'S3DISSegDataset'
3 | data_root = './data/s3dis/'
4 | class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',
5 | 'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')
6 | num_points = 4096
7 | train_area = [1, 2, 3, 4, 6]
8 | test_area = 5
9 | train_pipeline = [
10 | dict(
11 | type='LoadPointsFromFile',
12 | coord_type='DEPTH',
13 | shift_height=False,
14 | use_color=True,
15 | load_dim=6,
16 | use_dim=[0, 1, 2, 3, 4, 5]),
17 | dict(
18 | type='LoadAnnotations3D',
19 | with_bbox_3d=False,
20 | with_label_3d=False,
21 | with_mask_3d=False,
22 | with_seg_3d=True),
23 | dict(
24 | type='PointSegClassMapping',
25 | valid_cat_ids=tuple(range(len(class_names))),
26 | max_cat_id=13),
27 | dict(
28 | type='IndoorPatchPointSample',
29 | num_points=num_points,
30 | block_size=1.0,
31 | ignore_index=len(class_names),
32 | use_normalized_coord=True,
33 | enlarge_size=0.2,
34 | min_unique_num=None),
35 | dict(type='NormalizePointsColor', color_mean=None),
36 | dict(type='DefaultFormatBundle3D', class_names=class_names),
37 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
38 | ]
39 | test_pipeline = [
40 | dict(
41 | type='LoadPointsFromFile',
42 | coord_type='DEPTH',
43 | shift_height=False,
44 | use_color=True,
45 | load_dim=6,
46 | use_dim=[0, 1, 2, 3, 4, 5]),
47 | dict(type='NormalizePointsColor', color_mean=None),
48 | dict(
49 | # a wrapper in order to successfully call test function
50 | # actually we don't perform test-time-aug
51 | type='MultiScaleFlipAug3D',
52 | img_scale=(1333, 800),
53 | pts_scale_ratio=1,
54 | flip=False,
55 | transforms=[
56 | dict(
57 | type='GlobalRotScaleTrans',
58 | rot_range=[0, 0],
59 | scale_ratio_range=[1., 1.],
60 | translation_std=[0, 0, 0]),
61 | dict(
62 | type='RandomFlip3D',
63 | sync_2d=False,
64 | flip_ratio_bev_horizontal=0.0,
65 | flip_ratio_bev_vertical=0.0),
66 | dict(
67 | type='DefaultFormatBundle3D',
68 | class_names=class_names,
69 | with_label=False),
70 | dict(type='Collect3D', keys=['points'])
71 | ])
72 | ]
73 | # construct a pipeline for data and gt loading in show function
74 | # please keep its loading function consistent with test_pipeline (e.g. client)
75 | # we need to load gt seg_mask!
76 | eval_pipeline = [
77 | dict(
78 | type='LoadPointsFromFile',
79 | coord_type='DEPTH',
80 | shift_height=False,
81 | use_color=True,
82 | load_dim=6,
83 | use_dim=[0, 1, 2, 3, 4, 5]),
84 | dict(
85 | type='LoadAnnotations3D',
86 | with_bbox_3d=False,
87 | with_label_3d=False,
88 | with_mask_3d=False,
89 | with_seg_3d=True),
90 | dict(
91 | type='PointSegClassMapping',
92 | valid_cat_ids=tuple(range(len(class_names))),
93 | max_cat_id=13),
94 | dict(
95 | type='DefaultFormatBundle3D',
96 | with_label=False,
97 | class_names=class_names),
98 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
99 | ]
100 |
101 | data = dict(
102 | samples_per_gpu=8,
103 | workers_per_gpu=4,
104 | # train on area 1, 2, 3, 4, 6
105 | # test on area 5
106 | train=dict(
107 | type=dataset_type,
108 | data_root=data_root,
109 | ann_files=[
110 | data_root + f's3dis_infos_Area_{i}.pkl' for i in train_area
111 | ],
112 | pipeline=train_pipeline,
113 | classes=class_names,
114 | test_mode=False,
115 | ignore_index=len(class_names),
116 | scene_idxs=[
117 | data_root + f'seg_info/Area_{i}_resampled_scene_idxs.npy'
118 | for i in train_area
119 | ]),
120 | val=dict(
121 | type=dataset_type,
122 | data_root=data_root,
123 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',
124 | pipeline=test_pipeline,
125 | classes=class_names,
126 | test_mode=True,
127 | ignore_index=len(class_names),
128 | scene_idxs=data_root +
129 | f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'),
130 | test=dict(
131 | type=dataset_type,
132 | data_root=data_root,
133 | ann_files=data_root + f's3dis_infos_Area_{test_area}.pkl',
134 | pipeline=test_pipeline,
135 | classes=class_names,
136 | test_mode=True,
137 | ignore_index=len(class_names)))
138 |
139 | evaluation = dict(pipeline=eval_pipeline)
140 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/datasets/scannet-3d-18class.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'ScanNetDataset'
3 | data_root = './data/scannet/'
4 | class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
5 | 'bookshelf', 'picture', 'counter', 'desk', 'curtain',
6 | 'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
7 | 'garbagebin')
8 | train_pipeline = [
9 | dict(
10 | type='LoadPointsFromFile',
11 | coord_type='DEPTH',
12 | shift_height=True,
13 | load_dim=6,
14 | use_dim=[0, 1, 2]),
15 | dict(
16 | type='LoadAnnotations3D',
17 | with_bbox_3d=True,
18 | with_label_3d=True,
19 | with_mask_3d=True,
20 | with_seg_3d=True),
21 | dict(type='GlobalAlignment', rotation_axis=2),
22 | dict(
23 | type='PointSegClassMapping',
24 | valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
25 | 36, 39),
26 | max_cat_id=40),
27 | dict(type='PointSample', num_points=40000),
28 | dict(
29 | type='RandomFlip3D',
30 | sync_2d=False,
31 | flip_ratio_bev_horizontal=0.5,
32 | flip_ratio_bev_vertical=0.5),
33 | dict(
34 | type='GlobalRotScaleTrans',
35 | rot_range=[-0.087266, 0.087266],
36 | scale_ratio_range=[1.0, 1.0],
37 | shift_height=True),
38 | dict(type='DefaultFormatBundle3D', class_names=class_names),
39 | dict(
40 | type='Collect3D',
41 | keys=[
42 | 'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
43 | 'pts_instance_mask'
44 | ])
45 | ]
46 | test_pipeline = [
47 | dict(
48 | type='LoadPointsFromFile',
49 | coord_type='DEPTH',
50 | shift_height=True,
51 | load_dim=6,
52 | use_dim=[0, 1, 2]),
53 | dict(type='GlobalAlignment', rotation_axis=2),
54 | dict(
55 | type='MultiScaleFlipAug3D',
56 | img_scale=(1333, 800),
57 | pts_scale_ratio=1,
58 | flip=False,
59 | transforms=[
60 | dict(
61 | type='GlobalRotScaleTrans',
62 | rot_range=[0, 0],
63 | scale_ratio_range=[1., 1.],
64 | translation_std=[0, 0, 0]),
65 | dict(
66 | type='RandomFlip3D',
67 | sync_2d=False,
68 | flip_ratio_bev_horizontal=0.5,
69 | flip_ratio_bev_vertical=0.5),
70 | dict(type='PointSample', num_points=40000),
71 | dict(
72 | type='DefaultFormatBundle3D',
73 | class_names=class_names,
74 | with_label=False),
75 | dict(type='Collect3D', keys=['points'])
76 | ])
77 | ]
78 | # construct a pipeline for data and gt loading in show function
79 | # please keep its loading function consistent with test_pipeline (e.g. client)
80 | eval_pipeline = [
81 | dict(
82 | type='LoadPointsFromFile',
83 | coord_type='DEPTH',
84 | shift_height=False,
85 | load_dim=6,
86 | use_dim=[0, 1, 2]),
87 | dict(type='GlobalAlignment', rotation_axis=2),
88 | dict(
89 | type='DefaultFormatBundle3D',
90 | class_names=class_names,
91 | with_label=False),
92 | dict(type='Collect3D', keys=['points'])
93 | ]
94 |
95 | data = dict(
96 | samples_per_gpu=8,
97 | workers_per_gpu=4,
98 | train=dict(
99 | type='RepeatDataset',
100 | times=5,
101 | dataset=dict(
102 | type=dataset_type,
103 | data_root=data_root,
104 | ann_file=data_root + 'scannet_infos_train.pkl',
105 | pipeline=train_pipeline,
106 | filter_empty_gt=False,
107 | classes=class_names,
108 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
109 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
110 | box_type_3d='Depth')),
111 | val=dict(
112 | type=dataset_type,
113 | data_root=data_root,
114 | ann_file=data_root + 'scannet_infos_val.pkl',
115 | pipeline=test_pipeline,
116 | classes=class_names,
117 | test_mode=True,
118 | box_type_3d='Depth'),
119 | test=dict(
120 | type=dataset_type,
121 | data_root=data_root,
122 | ann_file=data_root + 'scannet_infos_val.pkl',
123 | pipeline=test_pipeline,
124 | classes=class_names,
125 | test_mode=True,
126 | box_type_3d='Depth'))
127 |
128 | evaluation = dict(pipeline=eval_pipeline)
129 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/datasets/scannet_seg-3d-20class.py:
--------------------------------------------------------------------------------
1 | # dataset settings
2 | dataset_type = 'ScanNetSegDataset'
3 | data_root = './data/scannet/'
4 | class_names = ('wall', 'floor', 'cabinet', 'bed', 'chair', 'sofa', 'table',
5 | 'door', 'window', 'bookshelf', 'picture', 'counter', 'desk',
6 | 'curtain', 'refrigerator', 'showercurtrain', 'toilet', 'sink',
7 | 'bathtub', 'otherfurniture')
8 | num_points = 8192
9 | train_pipeline = [
10 | dict(
11 | type='LoadPointsFromFile',
12 | coord_type='DEPTH',
13 | shift_height=False,
14 | use_color=True,
15 | load_dim=6,
16 | use_dim=[0, 1, 2, 3, 4, 5]),
17 | dict(
18 | type='LoadAnnotations3D',
19 | with_bbox_3d=False,
20 | with_label_3d=False,
21 | with_mask_3d=False,
22 | with_seg_3d=True),
23 | dict(
24 | type='PointSegClassMapping',
25 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
26 | 33, 34, 36, 39),
27 | max_cat_id=40),
28 | dict(
29 | type='IndoorPatchPointSample',
30 | num_points=num_points,
31 | block_size=1.5,
32 | ignore_index=len(class_names),
33 | use_normalized_coord=False,
34 | enlarge_size=0.2,
35 | min_unique_num=None),
36 | dict(type='NormalizePointsColor', color_mean=None),
37 | dict(type='DefaultFormatBundle3D', class_names=class_names),
38 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
39 | ]
40 | test_pipeline = [
41 | dict(
42 | type='LoadPointsFromFile',
43 | coord_type='DEPTH',
44 | shift_height=False,
45 | use_color=True,
46 | load_dim=6,
47 | use_dim=[0, 1, 2, 3, 4, 5]),
48 | dict(type='NormalizePointsColor', color_mean=None),
49 | dict(
50 | # a wrapper in order to successfully call test function
51 | # actually we don't perform test-time-aug
52 | type='MultiScaleFlipAug3D',
53 | img_scale=(1333, 800),
54 | pts_scale_ratio=1,
55 | flip=False,
56 | transforms=[
57 | dict(
58 | type='GlobalRotScaleTrans',
59 | rot_range=[0, 0],
60 | scale_ratio_range=[1., 1.],
61 | translation_std=[0, 0, 0]),
62 | dict(
63 | type='RandomFlip3D',
64 | sync_2d=False,
65 | flip_ratio_bev_horizontal=0.0,
66 | flip_ratio_bev_vertical=0.0),
67 | dict(
68 | type='DefaultFormatBundle3D',
69 | class_names=class_names,
70 | with_label=False),
71 | dict(type='Collect3D', keys=['points'])
72 | ])
73 | ]
74 | # construct a pipeline for data and gt loading in show function
75 | # please keep its loading function consistent with test_pipeline (e.g. client)
76 | # we need to load gt seg_mask!
77 | eval_pipeline = [
78 | dict(
79 | type='LoadPointsFromFile',
80 | coord_type='DEPTH',
81 | shift_height=False,
82 | use_color=True,
83 | load_dim=6,
84 | use_dim=[0, 1, 2, 3, 4, 5]),
85 | dict(
86 | type='LoadAnnotations3D',
87 | with_bbox_3d=False,
88 | with_label_3d=False,
89 | with_mask_3d=False,
90 | with_seg_3d=True),
91 | dict(
92 | type='PointSegClassMapping',
93 | valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28,
94 | 33, 34, 36, 39),
95 | max_cat_id=40),
96 | dict(
97 | type='DefaultFormatBundle3D',
98 | with_label=False,
99 | class_names=class_names),
100 | dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
101 | ]
102 |
103 | data = dict(
104 | samples_per_gpu=8,
105 | workers_per_gpu=4,
106 | train=dict(
107 | type=dataset_type,
108 | data_root=data_root,
109 | ann_file=data_root + 'scannet_infos_train.pkl',
110 | pipeline=train_pipeline,
111 | classes=class_names,
112 | test_mode=False,
113 | ignore_index=len(class_names),
114 | scene_idxs=data_root + 'seg_info/train_resampled_scene_idxs.npy'),
115 | val=dict(
116 | type=dataset_type,
117 | data_root=data_root,
118 | ann_file=data_root + 'scannet_infos_val.pkl',
119 | pipeline=test_pipeline,
120 | classes=class_names,
121 | test_mode=True,
122 | ignore_index=len(class_names)),
123 | test=dict(
124 | type=dataset_type,
125 | data_root=data_root,
126 | ann_file=data_root + 'scannet_infos_val.pkl',
127 | pipeline=test_pipeline,
128 | classes=class_names,
129 | test_mode=True,
130 | ignore_index=len(class_names)))
131 |
132 | evaluation = dict(pipeline=eval_pipeline)
133 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/datasets/sunrgbd-3d-10class.py:
--------------------------------------------------------------------------------
1 | dataset_type = 'SUNRGBDDataset'
2 | data_root = 'data/sunrgbd/'
3 | class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
4 | 'night_stand', 'bookshelf', 'bathtub')
5 | train_pipeline = [
6 | dict(
7 | type='LoadPointsFromFile',
8 | coord_type='DEPTH',
9 | shift_height=True,
10 | load_dim=6,
11 | use_dim=[0, 1, 2]),
12 | dict(type='LoadAnnotations3D'),
13 | dict(
14 | type='RandomFlip3D',
15 | sync_2d=False,
16 | flip_ratio_bev_horizontal=0.5,
17 | ),
18 | dict(
19 | type='GlobalRotScaleTrans',
20 | rot_range=[-0.523599, 0.523599],
21 | scale_ratio_range=[0.85, 1.15],
22 | shift_height=True),
23 | dict(type='PointSample', num_points=20000),
24 | dict(type='DefaultFormatBundle3D', class_names=class_names),
25 | dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
26 | ]
27 | test_pipeline = [
28 | dict(
29 | type='LoadPointsFromFile',
30 | coord_type='DEPTH',
31 | shift_height=True,
32 | load_dim=6,
33 | use_dim=[0, 1, 2]),
34 | dict(
35 | type='MultiScaleFlipAug3D',
36 | img_scale=(1333, 800),
37 | pts_scale_ratio=1,
38 | flip=False,
39 | transforms=[
40 | dict(
41 | type='GlobalRotScaleTrans',
42 | rot_range=[0, 0],
43 | scale_ratio_range=[1., 1.],
44 | translation_std=[0, 0, 0]),
45 | dict(
46 | type='RandomFlip3D',
47 | sync_2d=False,
48 | flip_ratio_bev_horizontal=0.5,
49 | ),
50 | dict(type='PointSample', num_points=20000),
51 | dict(
52 | type='DefaultFormatBundle3D',
53 | class_names=class_names,
54 | with_label=False),
55 | dict(type='Collect3D', keys=['points'])
56 | ])
57 | ]
58 | # construct a pipeline for data and gt loading in show function
59 | # please keep its loading function consistent with test_pipeline (e.g. client)
60 | eval_pipeline = [
61 | dict(
62 | type='LoadPointsFromFile',
63 | coord_type='DEPTH',
64 | shift_height=False,
65 | load_dim=6,
66 | use_dim=[0, 1, 2]),
67 | dict(
68 | type='DefaultFormatBundle3D',
69 | class_names=class_names,
70 | with_label=False),
71 | dict(type='Collect3D', keys=['points'])
72 | ]
73 |
74 | data = dict(
75 | samples_per_gpu=16,
76 | workers_per_gpu=4,
77 | train=dict(
78 | type='RepeatDataset',
79 | times=5,
80 | dataset=dict(
81 | type=dataset_type,
82 | data_root=data_root,
83 | ann_file=data_root + 'sunrgbd_infos_train.pkl',
84 | pipeline=train_pipeline,
85 | classes=class_names,
86 | filter_empty_gt=False,
87 | # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
88 | # and box_type_3d='Depth' in sunrgbd and scannet dataset.
89 | box_type_3d='Depth')),
90 | val=dict(
91 | type=dataset_type,
92 | data_root=data_root,
93 | ann_file=data_root + 'sunrgbd_infos_val.pkl',
94 | pipeline=test_pipeline,
95 | classes=class_names,
96 | test_mode=True,
97 | box_type_3d='Depth'),
98 | test=dict(
99 | type=dataset_type,
100 | data_root=data_root,
101 | ann_file=data_root + 'sunrgbd_infos_val.pkl',
102 | pipeline=test_pipeline,
103 | classes=class_names,
104 | test_mode=True,
105 | box_type_3d='Depth'))
106 |
107 | evaluation = dict(pipeline=eval_pipeline)
108 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/default_runtime.py:
--------------------------------------------------------------------------------
1 | checkpoint_config = dict(interval=1)
2 | # yapf:disable push
3 | # By default we use textlogger hook and tensorboard
4 | # For more loggers see
5 | # https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook
6 | log_config = dict(
7 | interval=50,
8 | hooks=[
9 | dict(type='TextLoggerHook'),
10 | dict(type='TensorboardLoggerHook')
11 | ])
12 | # yapf:enable
13 | dist_params = dict(backend='nccl')
14 | log_level = 'INFO'
15 | work_dir = None
16 | load_from = None
17 | resume_from = None
18 | workflow = [('train', 1)]
19 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/3dssd.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='SSD3DNet',
3 | backbone=dict(
4 | type='PointNet2SAMSG',
5 | in_channels=4,
6 | num_points=(4096, 512, (256, 256)),
7 | radii=((0.2, 0.4, 0.8), (0.4, 0.8, 1.6), (1.6, 3.2, 4.8)),
8 | num_samples=((32, 32, 64), (32, 32, 64), (32, 32, 32)),
9 | sa_channels=(((16, 16, 32), (16, 16, 32), (32, 32, 64)),
10 | ((64, 64, 128), (64, 64, 128), (64, 96, 128)),
11 | ((128, 128, 256), (128, 192, 256), (128, 256, 256))),
12 | aggregation_channels=(64, 128, 256),
13 | fps_mods=(('D-FPS'), ('FS'), ('F-FPS', 'D-FPS')),
14 | fps_sample_range_lists=((-1), (-1), (512, -1)),
15 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
16 | sa_cfg=dict(
17 | type='PointSAModuleMSG',
18 | pool_mod='max',
19 | use_xyz=True,
20 | normalize_xyz=False)),
21 | bbox_head=dict(
22 | type='SSD3DHead',
23 | in_channels=256,
24 | vote_module_cfg=dict(
25 | in_channels=256,
26 | num_points=256,
27 | gt_per_seed=1,
28 | conv_channels=(128, ),
29 | conv_cfg=dict(type='Conv1d'),
30 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
31 | with_res_feat=False,
32 | vote_xyz_range=(3.0, 3.0, 2.0)),
33 | vote_aggregation_cfg=dict(
34 | type='PointSAModuleMSG',
35 | num_point=256,
36 | radii=(4.8, 6.4),
37 | sample_nums=(16, 32),
38 | mlp_channels=((256, 256, 256, 512), (256, 256, 512, 1024)),
39 | norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
40 | use_xyz=True,
41 | normalize_xyz=False,
42 | bias=True),
43 | pred_layer_cfg=dict(
44 | in_channels=1536,
45 | shared_conv_channels=(512, 128),
46 | cls_conv_channels=(128, ),
47 | reg_conv_channels=(128, ),
48 | conv_cfg=dict(type='Conv1d'),
49 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
50 | bias=True),
51 | conv_cfg=dict(type='Conv1d'),
52 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.1),
53 | objectness_loss=dict(
54 | type='CrossEntropyLoss',
55 | use_sigmoid=True,
56 | reduction='sum',
57 | loss_weight=1.0),
58 | center_loss=dict(
59 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
60 | dir_class_loss=dict(
61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
62 | dir_res_loss=dict(
63 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
64 | size_res_loss=dict(
65 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
66 | corner_loss=dict(
67 | type='SmoothL1Loss', reduction='sum', loss_weight=1.0),
68 | vote_loss=dict(type='SmoothL1Loss', reduction='sum', loss_weight=1.0)),
69 | # model training and testing settings
70 | train_cfg=dict(
71 | sample_mod='spec', pos_distance_thr=10.0, expand_dims_length=0.05),
72 | test_cfg=dict(
73 | nms_cfg=dict(type='nms', iou_thr=0.1),
74 | sample_mod='spec',
75 | score_thr=0.0,
76 | per_class_proposal=True,
77 | max_output_num=100))
78 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py:
--------------------------------------------------------------------------------
1 | voxel_size = [0.1, 0.1, 0.2]
2 | model = dict(
3 | type='CenterPoint',
4 | pts_voxel_layer=dict(
5 | max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)),
6 | pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
7 | pts_middle_encoder=dict(
8 | type='SparseEncoder',
9 | in_channels=5,
10 | sparse_shape=[41, 1024, 1024],
11 | output_channels=128,
12 | order=('conv', 'norm', 'act'),
13 | encoder_channels=((16, 16, 32), (32, 32, 64), (64, 64, 128), (128,
14 | 128)),
15 | encoder_paddings=((0, 0, 1), (0, 0, 1), (0, 0, [0, 1, 1]), (0, 0)),
16 | block_type='basicblock'),
17 | pts_backbone=dict(
18 | type='SECOND',
19 | in_channels=256,
20 | out_channels=[128, 256],
21 | layer_nums=[5, 5],
22 | layer_strides=[1, 2],
23 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
24 | conv_cfg=dict(type='Conv2d', bias=False)),
25 | pts_neck=dict(
26 | type='SECONDFPN',
27 | in_channels=[128, 256],
28 | out_channels=[256, 256],
29 | upsample_strides=[1, 2],
30 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
31 | upsample_cfg=dict(type='deconv', bias=False),
32 | use_conv_for_no_stride=True),
33 | pts_bbox_head=dict(
34 | type='CenterHead',
35 | in_channels=sum([256, 256]),
36 | tasks=[
37 | dict(num_class=1, class_names=['car']),
38 | dict(num_class=2, class_names=['truck', 'construction_vehicle']),
39 | dict(num_class=2, class_names=['bus', 'trailer']),
40 | dict(num_class=1, class_names=['barrier']),
41 | dict(num_class=2, class_names=['motorcycle', 'bicycle']),
42 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
43 | ],
44 | common_heads=dict(
45 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
46 | share_conv_channel=64,
47 | bbox_coder=dict(
48 | type='CenterPointBBoxCoder',
49 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
50 | max_num=500,
51 | score_threshold=0.1,
52 | out_size_factor=8,
53 | voxel_size=voxel_size[:2],
54 | code_size=9),
55 | separate_head=dict(
56 | type='SeparateHead', init_bias=-2.19, final_kernel=3),
57 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
58 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
59 | norm_bbox=True),
60 | # model training and testing settings
61 | train_cfg=dict(
62 | pts=dict(
63 | grid_size=[1024, 1024, 40],
64 | voxel_size=voxel_size,
65 | out_size_factor=8,
66 | dense_reg=1,
67 | gaussian_overlap=0.1,
68 | max_objs=500,
69 | min_radius=2,
70 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
71 | test_cfg=dict(
72 | pts=dict(
73 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
74 | max_per_img=500,
75 | max_pool_nms=False,
76 | min_radius=[4, 12, 10, 1, 0.85, 0.175],
77 | score_threshold=0.1,
78 | out_size_factor=8,
79 | voxel_size=voxel_size[:2],
80 | nms_type='rotate',
81 | pre_max_size=1000,
82 | post_max_size=83,
83 | nms_thr=0.2)))
84 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py:
--------------------------------------------------------------------------------
1 | voxel_size = [0.2, 0.2, 8]
2 | model = dict(
3 | type='CenterPoint',
4 | pts_voxel_layer=dict(
5 | max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)),
6 | pts_voxel_encoder=dict(
7 | type='PillarFeatureNet',
8 | in_channels=5,
9 | feat_channels=[64],
10 | with_distance=False,
11 | voxel_size=(0.2, 0.2, 8),
12 | norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
13 | legacy=False),
14 | pts_middle_encoder=dict(
15 | type='PointPillarsScatter', in_channels=64, output_shape=(512, 512)),
16 | pts_backbone=dict(
17 | type='SECOND',
18 | in_channels=64,
19 | out_channels=[64, 128, 256],
20 | layer_nums=[3, 5, 5],
21 | layer_strides=[2, 2, 2],
22 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
23 | conv_cfg=dict(type='Conv2d', bias=False)),
24 | pts_neck=dict(
25 | type='SECONDFPN',
26 | in_channels=[64, 128, 256],
27 | out_channels=[128, 128, 128],
28 | upsample_strides=[0.5, 1, 2],
29 | norm_cfg=dict(type='BN', eps=1e-3, momentum=0.01),
30 | upsample_cfg=dict(type='deconv', bias=False),
31 | use_conv_for_no_stride=True),
32 | pts_bbox_head=dict(
33 | type='CenterHead',
34 | in_channels=sum([128, 128, 128]),
35 | tasks=[
36 | dict(num_class=1, class_names=['car']),
37 | dict(num_class=2, class_names=['truck', 'construction_vehicle']),
38 | dict(num_class=2, class_names=['bus', 'trailer']),
39 | dict(num_class=1, class_names=['barrier']),
40 | dict(num_class=2, class_names=['motorcycle', 'bicycle']),
41 | dict(num_class=2, class_names=['pedestrian', 'traffic_cone']),
42 | ],
43 | common_heads=dict(
44 | reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
45 | share_conv_channel=64,
46 | bbox_coder=dict(
47 | type='CenterPointBBoxCoder',
48 | post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
49 | max_num=500,
50 | score_threshold=0.1,
51 | out_size_factor=4,
52 | voxel_size=voxel_size[:2],
53 | code_size=9),
54 | separate_head=dict(
55 | type='SeparateHead', init_bias=-2.19, final_kernel=3),
56 | loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
57 | loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
58 | norm_bbox=True),
59 | # model training and testing settings
60 | train_cfg=dict(
61 | pts=dict(
62 | grid_size=[512, 512, 1],
63 | voxel_size=voxel_size,
64 | out_size_factor=4,
65 | dense_reg=1,
66 | gaussian_overlap=0.1,
67 | max_objs=500,
68 | min_radius=2,
69 | code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
70 | test_cfg=dict(
71 | pts=dict(
72 | post_center_limit_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
73 | max_per_img=500,
74 | max_pool_nms=False,
75 | min_radius=[4, 12, 10, 1, 0.85, 0.175],
76 | score_threshold=0.1,
77 | pc_range=[-51.2, -51.2],
78 | out_size_factor=4,
79 | voxel_size=voxel_size[:2],
80 | nms_type='rotate',
81 | pre_max_size=1000,
82 | post_max_size=83,
83 | nms_thr=0.2)))
84 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/fcos3d.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='FCOSMono3D',
3 | pretrained='open-mmlab://detectron2/resnet101_caffe',
4 | backbone=dict(
5 | type='ResNet',
6 | depth=101,
7 | num_stages=4,
8 | out_indices=(0, 1, 2, 3),
9 | frozen_stages=1,
10 | norm_cfg=dict(type='BN', requires_grad=False),
11 | norm_eval=True,
12 | style='caffe'),
13 | neck=dict(
14 | type='FPN',
15 | in_channels=[256, 512, 1024, 2048],
16 | out_channels=256,
17 | start_level=1,
18 | add_extra_convs='on_output',
19 | num_outs=5,
20 | relu_before_extra_convs=True),
21 | bbox_head=dict(
22 | type='FCOSMono3DHead',
23 | num_classes=10,
24 | in_channels=256,
25 | stacked_convs=2,
26 | feat_channels=256,
27 | use_direction_classifier=True,
28 | diff_rad_by_sin=True,
29 | pred_attrs=True,
30 | pred_velo=True,
31 | dir_offset=0.7854, # pi/4
32 | strides=[8, 16, 32, 64, 128],
33 | group_reg_dims=(2, 1, 3, 1, 2), # offset, depth, size, rot, velo
34 | cls_branch=(256, ),
35 | reg_branch=(
36 | (256, ), # offset
37 | (256, ), # depth
38 | (256, ), # size
39 | (256, ), # rot
40 | () # velo
41 | ),
42 | dir_branch=(256, ),
43 | attr_branch=(256, ),
44 | loss_cls=dict(
45 | type='FocalLoss',
46 | use_sigmoid=True,
47 | gamma=2.0,
48 | alpha=0.25,
49 | loss_weight=1.0),
50 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
51 | loss_dir=dict(
52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
53 | loss_attr=dict(
54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 | loss_centerness=dict(
56 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
57 | norm_on_bbox=True,
58 | centerness_on_reg=True,
59 | center_sampling=True,
60 | conv_bias=True,
61 | dcn_on_last_conv=True),
62 | train_cfg=dict(
63 | allowed_border=0,
64 | code_weight=[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.05, 0.05],
65 | pos_weight=-1,
66 | debug=False),
67 | test_cfg=dict(
68 | use_rotate_nms=True,
69 | nms_across_levels=False,
70 | nms_pre=1000,
71 | nms_thr=0.8,
72 | score_thr=0.05,
73 | min_bbox_size=0,
74 | max_per_img=200))
75 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/groupfree3d.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='GroupFree3DNet',
3 | backbone=dict(
4 | type='PointNet2SASSG',
5 | in_channels=3,
6 | num_points=(2048, 1024, 512, 256),
7 | radius=(0.2, 0.4, 0.8, 1.2),
8 | num_samples=(64, 32, 16, 16),
9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
10 | (128, 128, 256)),
11 | fp_channels=((256, 256), (256, 288)),
12 | norm_cfg=dict(type='BN2d'),
13 | sa_cfg=dict(
14 | type='PointSAModule',
15 | pool_mod='max',
16 | use_xyz=True,
17 | normalize_xyz=True)),
18 | bbox_head=dict(
19 | type='GroupFree3DHead',
20 | in_channels=288,
21 | num_decoder_layers=6,
22 | num_proposal=256,
23 | transformerlayers=dict(
24 | type='BaseTransformerLayer',
25 | attn_cfgs=dict(
26 | type='GroupFree3DMHA',
27 | embed_dims=288,
28 | num_heads=8,
29 | attn_drop=0.1,
30 | dropout_layer=dict(type='Dropout', drop_prob=0.1)),
31 | ffn_cfgs=dict(
32 | embed_dims=288,
33 | feedforward_channels=2048,
34 | ffn_drop=0.1,
35 | act_cfg=dict(type='ReLU', inplace=True)),
36 | operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',
37 | 'norm')),
38 | pred_layer_cfg=dict(
39 | in_channels=288, shared_conv_channels=(288, 288), bias=True),
40 | sampling_objectness_loss=dict(
41 | type='FocalLoss',
42 | use_sigmoid=True,
43 | gamma=2.0,
44 | alpha=0.25,
45 | loss_weight=8.0),
46 | objectness_loss=dict(
47 | type='FocalLoss',
48 | use_sigmoid=True,
49 | gamma=2.0,
50 | alpha=0.25,
51 | loss_weight=1.0),
52 | center_loss=dict(
53 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
54 | dir_class_loss=dict(
55 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
56 | dir_res_loss=dict(
57 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
58 | size_class_loss=dict(
59 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
60 | size_res_loss=dict(
61 | type='SmoothL1Loss', beta=1.0, reduction='sum', loss_weight=10.0),
62 | semantic_loss=dict(
63 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
64 | # model training and testing settings
65 | train_cfg=dict(sample_mod='kps'),
66 | test_cfg=dict(
67 | sample_mod='kps',
68 | nms_thr=0.25,
69 | score_thr=0.0,
70 | per_class_proposal=True,
71 | prediction_stages='last'))
72 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py:
--------------------------------------------------------------------------------
1 | _base_ = './hv_pointpillars_fpn_nus.py'
2 |
3 | # model settings (based on nuScenes model settings)
4 | # Voxel size for voxel encoder
5 | # Usually voxel size is changed consistently with the point cloud range
6 | # If point cloud range is modified, do remember to change all related
7 | # keys in the config.
8 | model = dict(
9 | pts_voxel_layer=dict(
10 | max_num_points=20,
11 | point_cloud_range=[-80, -80, -5, 80, 80, 3],
12 | max_voxels=(60000, 60000)),
13 | pts_voxel_encoder=dict(
14 | feat_channels=[64], point_cloud_range=[-80, -80, -5, 80, 80, 3]),
15 | pts_middle_encoder=dict(output_shape=[640, 640]),
16 | pts_bbox_head=dict(
17 | num_classes=9,
18 | anchor_generator=dict(
19 | ranges=[[-80, -80, -1.8, 80, 80, -1.8]], custom_values=[]),
20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 | # model training settings (based on nuScenes model settings)
22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/hv_pointpillars_fpn_nus.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | # Voxel size for voxel encoder
3 | # Usually voxel size is changed consistently with the point cloud range
4 | # If point cloud range is modified, do remember to change all related
5 | # keys in the config.
6 | voxel_size = [0.25, 0.25, 8]
7 | model = dict(
8 | type='MVXFasterRCNN',
9 | pts_voxel_layer=dict(
10 | max_num_points=64,
11 | point_cloud_range=[-50, -50, -5, 50, 50, 3],
12 | voxel_size=voxel_size,
13 | max_voxels=(30000, 40000)),
14 | pts_voxel_encoder=dict(
15 | type='HardVFE',
16 | in_channels=4,
17 | feat_channels=[64, 64],
18 | with_distance=False,
19 | voxel_size=voxel_size,
20 | with_cluster_center=True,
21 | with_voxel_center=True,
22 | point_cloud_range=[-50, -50, -5, 50, 50, 3],
23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
24 | pts_middle_encoder=dict(
25 | type='PointPillarsScatter', in_channels=64, output_shape=[400, 400]),
26 | pts_backbone=dict(
27 | type='SECOND',
28 | in_channels=64,
29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
30 | layer_nums=[3, 5, 5],
31 | layer_strides=[2, 2, 2],
32 | out_channels=[64, 128, 256]),
33 | pts_neck=dict(
34 | type='FPN',
35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
36 | act_cfg=dict(type='ReLU'),
37 | in_channels=[64, 128, 256],
38 | out_channels=256,
39 | start_level=0,
40 | num_outs=3),
41 | pts_bbox_head=dict(
42 | type='Anchor3DHead',
43 | num_classes=10,
44 | in_channels=256,
45 | feat_channels=256,
46 | use_direction_classifier=True,
47 | anchor_generator=dict(
48 | type='AlignedAnchor3DRangeGenerator',
49 | ranges=[[-50, -50, -1.8, 50, 50, -1.8]],
50 | scales=[1, 2, 4],
51 | sizes=[
52 | [0.8660, 2.5981, 1.], # 1.5/sqrt(3)
53 | [0.5774, 1.7321, 1.], # 1/sqrt(3)
54 | [1., 1., 1.],
55 | [0.4, 0.4, 1],
56 | ],
57 | custom_values=[0, 0],
58 | rotations=[0, 1.57],
59 | reshape_out=True),
60 | assigner_per_size=False,
61 | diff_rad_by_sin=True,
62 | dir_offset=0.7854, # pi/4
63 | dir_limit_offset=0,
64 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
65 | loss_cls=dict(
66 | type='FocalLoss',
67 | use_sigmoid=True,
68 | gamma=2.0,
69 | alpha=0.25,
70 | loss_weight=1.0),
71 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
72 | loss_dir=dict(
73 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
74 | # model training and testing settings
75 | train_cfg=dict(
76 | pts=dict(
77 | assigner=dict(
78 | type='MaxIoUAssigner',
79 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
80 | pos_iou_thr=0.6,
81 | neg_iou_thr=0.3,
82 | min_pos_iou=0.3,
83 | ignore_iof_thr=-1),
84 | allowed_border=0,
85 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
86 | pos_weight=-1,
87 | debug=False)),
88 | test_cfg=dict(
89 | pts=dict(
90 | use_rotate_nms=True,
91 | nms_across_levels=False,
92 | nms_pre=1000,
93 | nms_thr=0.2,
94 | score_thr=0.05,
95 | min_bbox_size=0,
96 | max_num=500)))
97 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py:
--------------------------------------------------------------------------------
1 | _base_ = './hv_pointpillars_fpn_nus.py'
2 |
3 | # model settings (based on nuScenes model settings)
4 | # Voxel size for voxel encoder
5 | # Usually voxel size is changed consistently with the point cloud range
6 | # If point cloud range is modified, do remember to change all related
7 | # keys in the config.
8 | model = dict(
9 | pts_voxel_layer=dict(
10 | max_num_points=20,
11 | point_cloud_range=[-100, -100, -5, 100, 100, 3],
12 | max_voxels=(60000, 60000)),
13 | pts_voxel_encoder=dict(
14 | feat_channels=[64], point_cloud_range=[-100, -100, -5, 100, 100, 3]),
15 | pts_middle_encoder=dict(output_shape=[800, 800]),
16 | pts_bbox_head=dict(
17 | num_classes=9,
18 | anchor_generator=dict(
19 | ranges=[[-100, -100, -1.8, 100, 100, -1.8]], custom_values=[]),
20 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7)),
21 | # model training settings (based on nuScenes model settings)
22 | train_cfg=dict(pts=dict(code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])))
23 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py:
--------------------------------------------------------------------------------
1 | voxel_size = [0.16, 0.16, 4]
2 |
3 | model = dict(
4 | type='VoxelNet',
5 | voxel_layer=dict(
6 | max_num_points=32, # max_points_per_voxel
7 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1],
8 | voxel_size=voxel_size,
9 | max_voxels=(16000, 40000) # (training, testing) max_voxels
10 | ),
11 | voxel_encoder=dict(
12 | type='PillarFeatureNet',
13 | in_channels=4,
14 | feat_channels=[64],
15 | with_distance=False,
16 | voxel_size=voxel_size,
17 | point_cloud_range=[0, -39.68, -3, 69.12, 39.68, 1]),
18 | middle_encoder=dict(
19 | type='PointPillarsScatter', in_channels=64, output_shape=[496, 432]),
20 | backbone=dict(
21 | type='SECOND',
22 | in_channels=64,
23 | layer_nums=[3, 5, 5],
24 | layer_strides=[2, 2, 2],
25 | out_channels=[64, 128, 256]),
26 | neck=dict(
27 | type='SECONDFPN',
28 | in_channels=[64, 128, 256],
29 | upsample_strides=[1, 2, 4],
30 | out_channels=[128, 128, 128]),
31 | bbox_head=dict(
32 | type='Anchor3DHead',
33 | num_classes=3,
34 | in_channels=384,
35 | feat_channels=384,
36 | use_direction_classifier=True,
37 | anchor_generator=dict(
38 | type='Anchor3DRangeGenerator',
39 | ranges=[
40 | [0, -39.68, -0.6, 70.4, 39.68, -0.6],
41 | [0, -39.68, -0.6, 70.4, 39.68, -0.6],
42 | [0, -39.68, -1.78, 70.4, 39.68, -1.78],
43 | ],
44 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
45 | rotations=[0, 1.57],
46 | reshape_out=False),
47 | diff_rad_by_sin=True,
48 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
49 | loss_cls=dict(
50 | type='FocalLoss',
51 | use_sigmoid=True,
52 | gamma=2.0,
53 | alpha=0.25,
54 | loss_weight=1.0),
55 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
56 | loss_dir=dict(
57 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
58 | # model training and testing settings
59 | train_cfg=dict(
60 | assigner=[
61 | dict( # for Pedestrian
62 | type='MaxIoUAssigner',
63 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
64 | pos_iou_thr=0.5,
65 | neg_iou_thr=0.35,
66 | min_pos_iou=0.35,
67 | ignore_iof_thr=-1),
68 | dict( # for Cyclist
69 | type='MaxIoUAssigner',
70 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
71 | pos_iou_thr=0.5,
72 | neg_iou_thr=0.35,
73 | min_pos_iou=0.35,
74 | ignore_iof_thr=-1),
75 | dict( # for Car
76 | type='MaxIoUAssigner',
77 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
78 | pos_iou_thr=0.6,
79 | neg_iou_thr=0.45,
80 | min_pos_iou=0.45,
81 | ignore_iof_thr=-1),
82 | ],
83 | allowed_border=0,
84 | pos_weight=-1,
85 | debug=False),
86 | test_cfg=dict(
87 | use_rotate_nms=True,
88 | nms_across_levels=False,
89 | nms_thr=0.01,
90 | score_thr=0.1,
91 | min_bbox_size=0,
92 | nms_pre=100,
93 | max_num=50))
94 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | # Voxel size for voxel encoder
3 | # Usually voxel size is changed consistently with the point cloud range
4 | # If point cloud range is modified, do remember to change all related
5 | # keys in the config.
6 | voxel_size = [0.32, 0.32, 6]
7 | model = dict(
8 | type='MVXFasterRCNN',
9 | pts_voxel_layer=dict(
10 | max_num_points=20,
11 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
12 | voxel_size=voxel_size,
13 | max_voxels=(32000, 32000)),
14 | pts_voxel_encoder=dict(
15 | type='HardVFE',
16 | in_channels=5,
17 | feat_channels=[64],
18 | with_distance=False,
19 | voxel_size=voxel_size,
20 | with_cluster_center=True,
21 | with_voxel_center=True,
22 | point_cloud_range=[-74.88, -74.88, -2, 74.88, 74.88, 4],
23 | norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
24 | pts_middle_encoder=dict(
25 | type='PointPillarsScatter', in_channels=64, output_shape=[468, 468]),
26 | pts_backbone=dict(
27 | type='SECOND',
28 | in_channels=64,
29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
30 | layer_nums=[3, 5, 5],
31 | layer_strides=[1, 2, 2],
32 | out_channels=[64, 128, 256]),
33 | pts_neck=dict(
34 | type='SECONDFPN',
35 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
36 | in_channels=[64, 128, 256],
37 | upsample_strides=[1, 2, 4],
38 | out_channels=[128, 128, 128]),
39 | pts_bbox_head=dict(
40 | type='Anchor3DHead',
41 | num_classes=3,
42 | in_channels=384,
43 | feat_channels=384,
44 | use_direction_classifier=True,
45 | anchor_generator=dict(
46 | type='AlignedAnchor3DRangeGenerator',
47 | ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345],
48 | [-74.88, -74.88, -0.1188, 74.88, 74.88, -0.1188],
49 | [-74.88, -74.88, 0, 74.88, 74.88, 0]],
50 | sizes=[
51 | [2.08, 4.73, 1.77], # car
52 | [0.84, 1.81, 1.77], # cyclist
53 | [0.84, 0.91, 1.74] # pedestrian
54 | ],
55 | rotations=[0, 1.57],
56 | reshape_out=False),
57 | diff_rad_by_sin=True,
58 | dir_offset=0.7854, # pi/4
59 | dir_limit_offset=0,
60 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
61 | loss_cls=dict(
62 | type='FocalLoss',
63 | use_sigmoid=True,
64 | gamma=2.0,
65 | alpha=0.25,
66 | loss_weight=1.0),
67 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
68 | loss_dir=dict(
69 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
70 | # model training and testing settings
71 | train_cfg=dict(
72 | pts=dict(
73 | assigner=[
74 | dict( # car
75 | type='MaxIoUAssigner',
76 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
77 | pos_iou_thr=0.55,
78 | neg_iou_thr=0.4,
79 | min_pos_iou=0.4,
80 | ignore_iof_thr=-1),
81 | dict( # cyclist
82 | type='MaxIoUAssigner',
83 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
84 | pos_iou_thr=0.5,
85 | neg_iou_thr=0.3,
86 | min_pos_iou=0.3,
87 | ignore_iof_thr=-1),
88 | dict( # pedestrian
89 | type='MaxIoUAssigner',
90 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
91 | pos_iou_thr=0.5,
92 | neg_iou_thr=0.3,
93 | min_pos_iou=0.3,
94 | ignore_iof_thr=-1),
95 | ],
96 | allowed_border=0,
97 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
98 | pos_weight=-1,
99 | debug=False)),
100 | test_cfg=dict(
101 | pts=dict(
102 | use_rotate_nms=True,
103 | nms_across_levels=False,
104 | nms_pre=4096,
105 | nms_thr=0.25,
106 | score_thr=0.1,
107 | min_bbox_size=0,
108 | max_num=500)))
109 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/hv_second_secfpn_kitti.py:
--------------------------------------------------------------------------------
1 | voxel_size = [0.05, 0.05, 0.1]
2 |
3 | model = dict(
4 | type='VoxelNet',
5 | voxel_layer=dict(
6 | max_num_points=5,
7 | point_cloud_range=[0, -40, -3, 70.4, 40, 1],
8 | voxel_size=voxel_size,
9 | max_voxels=(16000, 40000)),
10 | voxel_encoder=dict(type='HardSimpleVFE'),
11 | middle_encoder=dict(
12 | type='SparseEncoder',
13 | in_channels=4,
14 | sparse_shape=[41, 1600, 1408],
15 | order=('conv', 'norm', 'act')),
16 | backbone=dict(
17 | type='SECOND',
18 | in_channels=256,
19 | layer_nums=[5, 5],
20 | layer_strides=[1, 2],
21 | out_channels=[128, 256]),
22 | neck=dict(
23 | type='SECONDFPN',
24 | in_channels=[128, 256],
25 | upsample_strides=[1, 2],
26 | out_channels=[256, 256]),
27 | bbox_head=dict(
28 | type='Anchor3DHead',
29 | num_classes=3,
30 | in_channels=512,
31 | feat_channels=512,
32 | use_direction_classifier=True,
33 | anchor_generator=dict(
34 | type='Anchor3DRangeGenerator',
35 | ranges=[
36 | [0, -40.0, -0.6, 70.4, 40.0, -0.6],
37 | [0, -40.0, -0.6, 70.4, 40.0, -0.6],
38 | [0, -40.0, -1.78, 70.4, 40.0, -1.78],
39 | ],
40 | sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
41 | rotations=[0, 1.57],
42 | reshape_out=False),
43 | diff_rad_by_sin=True,
44 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
45 | loss_cls=dict(
46 | type='FocalLoss',
47 | use_sigmoid=True,
48 | gamma=2.0,
49 | alpha=0.25,
50 | loss_weight=1.0),
51 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
52 | loss_dir=dict(
53 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
54 | # model training and testing settings
55 | train_cfg=dict(
56 | assigner=[
57 | dict( # for Pedestrian
58 | type='MaxIoUAssigner',
59 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
60 | pos_iou_thr=0.35,
61 | neg_iou_thr=0.2,
62 | min_pos_iou=0.2,
63 | ignore_iof_thr=-1),
64 | dict( # for Cyclist
65 | type='MaxIoUAssigner',
66 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
67 | pos_iou_thr=0.35,
68 | neg_iou_thr=0.2,
69 | min_pos_iou=0.2,
70 | ignore_iof_thr=-1),
71 | dict( # for Car
72 | type='MaxIoUAssigner',
73 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
74 | pos_iou_thr=0.6,
75 | neg_iou_thr=0.45,
76 | min_pos_iou=0.45,
77 | ignore_iof_thr=-1),
78 | ],
79 | allowed_border=0,
80 | pos_weight=-1,
81 | debug=False),
82 | test_cfg=dict(
83 | use_rotate_nms=True,
84 | nms_across_levels=False,
85 | nms_thr=0.01,
86 | score_thr=0.1,
87 | min_bbox_size=0,
88 | nms_pre=100,
89 | max_num=50))
90 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/hv_second_secfpn_waymo.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | # Voxel size for voxel encoder
3 | # Usually voxel size is changed consistently with the point cloud range
4 | # If point cloud range is modified, do remember to change all related
5 | # keys in the config.
6 | voxel_size = [0.08, 0.08, 0.1]
7 | model = dict(
8 | type='VoxelNet',
9 | voxel_layer=dict(
10 | max_num_points=10,
11 | point_cloud_range=[-76.8, -51.2, -2, 76.8, 51.2, 4],
12 | voxel_size=voxel_size,
13 | max_voxels=(80000, 90000)),
14 | voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
15 | middle_encoder=dict(
16 | type='SparseEncoder',
17 | in_channels=5,
18 | sparse_shape=[61, 1280, 1920],
19 | order=('conv', 'norm', 'act')),
20 | backbone=dict(
21 | type='SECOND',
22 | in_channels=384,
23 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
24 | layer_nums=[5, 5],
25 | layer_strides=[1, 2],
26 | out_channels=[128, 256]),
27 | neck=dict(
28 | type='SECONDFPN',
29 | norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
30 | in_channels=[128, 256],
31 | upsample_strides=[1, 2],
32 | out_channels=[256, 256]),
33 | bbox_head=dict(
34 | type='Anchor3DHead',
35 | num_classes=3,
36 | in_channels=512,
37 | feat_channels=512,
38 | use_direction_classifier=True,
39 | anchor_generator=dict(
40 | type='AlignedAnchor3DRangeGenerator',
41 | ranges=[[-76.8, -51.2, -0.0345, 76.8, 51.2, -0.0345],
42 | [-76.8, -51.2, 0, 76.8, 51.2, 0],
43 | [-76.8, -51.2, -0.1188, 76.8, 51.2, -0.1188]],
44 | sizes=[
45 | [2.08, 4.73, 1.77], # car
46 | [0.84, 0.91, 1.74], # pedestrian
47 | [0.84, 1.81, 1.77] # cyclist
48 | ],
49 | rotations=[0, 1.57],
50 | reshape_out=False),
51 | diff_rad_by_sin=True,
52 | dir_offset=0.7854, # pi/4
53 | dir_limit_offset=0,
54 | bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
55 | loss_cls=dict(
56 | type='FocalLoss',
57 | use_sigmoid=True,
58 | gamma=2.0,
59 | alpha=0.25,
60 | loss_weight=1.0),
61 | loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
62 | loss_dir=dict(
63 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
64 | # model training and testing settings
65 | train_cfg=dict(
66 | assigner=[
67 | dict( # car
68 | type='MaxIoUAssigner',
69 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
70 | pos_iou_thr=0.55,
71 | neg_iou_thr=0.4,
72 | min_pos_iou=0.4,
73 | ignore_iof_thr=-1),
74 | dict( # pedestrian
75 | type='MaxIoUAssigner',
76 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
77 | pos_iou_thr=0.5,
78 | neg_iou_thr=0.3,
79 | min_pos_iou=0.3,
80 | ignore_iof_thr=-1),
81 | dict( # cyclist
82 | type='MaxIoUAssigner',
83 | iou_calculator=dict(type='BboxOverlapsNearest3D'),
84 | pos_iou_thr=0.5,
85 | neg_iou_thr=0.3,
86 | min_pos_iou=0.3,
87 | ignore_iof_thr=-1)
88 | ],
89 | allowed_border=0,
90 | code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
91 | pos_weight=-1,
92 | debug=False),
93 | test_cfg=dict(
94 | use_rotate_nms=True,
95 | nms_across_levels=False,
96 | nms_pre=4096,
97 | nms_thr=0.25,
98 | score_thr=0.1,
99 | min_bbox_size=0,
100 | max_num=500))
101 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/imvotenet_image.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='ImVoteNet',
3 | img_backbone=dict(
4 | type='ResNet',
5 | depth=50,
6 | num_stages=4,
7 | out_indices=(0, 1, 2, 3),
8 | frozen_stages=1,
9 | norm_cfg=dict(type='BN', requires_grad=False),
10 | norm_eval=True,
11 | style='caffe'),
12 | img_neck=dict(
13 | type='FPN',
14 | in_channels=[256, 512, 1024, 2048],
15 | out_channels=256,
16 | num_outs=5),
17 | img_rpn_head=dict(
18 | type='RPNHead',
19 | in_channels=256,
20 | feat_channels=256,
21 | anchor_generator=dict(
22 | type='AnchorGenerator',
23 | scales=[8],
24 | ratios=[0.5, 1.0, 2.0],
25 | strides=[4, 8, 16, 32, 64]),
26 | bbox_coder=dict(
27 | type='DeltaXYWHBBoxCoder',
28 | target_means=[.0, .0, .0, .0],
29 | target_stds=[1.0, 1.0, 1.0, 1.0]),
30 | loss_cls=dict(
31 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
32 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
33 | img_roi_head=dict(
34 | type='StandardRoIHead',
35 | bbox_roi_extractor=dict(
36 | type='SingleRoIExtractor',
37 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
38 | out_channels=256,
39 | featmap_strides=[4, 8, 16, 32]),
40 | bbox_head=dict(
41 | type='Shared2FCBBoxHead',
42 | in_channels=256,
43 | fc_out_channels=1024,
44 | roi_feat_size=7,
45 | num_classes=10,
46 | bbox_coder=dict(
47 | type='DeltaXYWHBBoxCoder',
48 | target_means=[0., 0., 0., 0.],
49 | target_stds=[0.1, 0.1, 0.2, 0.2]),
50 | reg_class_agnostic=False,
51 | loss_cls=dict(
52 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
53 | loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
54 |
55 | # model training and testing settings
56 | train_cfg=dict(
57 | img_rpn=dict(
58 | assigner=dict(
59 | type='MaxIoUAssigner',
60 | pos_iou_thr=0.7,
61 | neg_iou_thr=0.3,
62 | min_pos_iou=0.3,
63 | match_low_quality=True,
64 | ignore_iof_thr=-1),
65 | sampler=dict(
66 | type='RandomSampler',
67 | num=256,
68 | pos_fraction=0.5,
69 | neg_pos_ub=-1,
70 | add_gt_as_proposals=False),
71 | allowed_border=-1,
72 | pos_weight=-1,
73 | debug=False),
74 | img_rpn_proposal=dict(
75 | nms_across_levels=False,
76 | nms_pre=2000,
77 | nms_post=1000,
78 | max_per_img=1000,
79 | nms=dict(type='nms', iou_threshold=0.7),
80 | min_bbox_size=0),
81 | img_rcnn=dict(
82 | assigner=dict(
83 | type='MaxIoUAssigner',
84 | pos_iou_thr=0.5,
85 | neg_iou_thr=0.5,
86 | min_pos_iou=0.5,
87 | match_low_quality=False,
88 | ignore_iof_thr=-1),
89 | sampler=dict(
90 | type='RandomSampler',
91 | num=512,
92 | pos_fraction=0.25,
93 | neg_pos_ub=-1,
94 | add_gt_as_proposals=True),
95 | pos_weight=-1,
96 | debug=False)),
97 | test_cfg=dict(
98 | img_rpn=dict(
99 | nms_across_levels=False,
100 | nms_pre=1000,
101 | nms_post=1000,
102 | max_per_img=1000,
103 | nms=dict(type='nms', iou_threshold=0.7),
104 | min_bbox_size=0),
105 | img_rcnn=dict(
106 | score_thr=0.05,
107 | nms=dict(type='nms', iou_threshold=0.5),
108 | max_per_img=100)))
109 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/mask_rcnn_r50_fpn.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='MaskRCNN',
4 | pretrained='torchvision://resnet50',
5 | backbone=dict(
6 | type='ResNet',
7 | depth=50,
8 | num_stages=4,
9 | out_indices=(0, 1, 2, 3),
10 | frozen_stages=1,
11 | norm_cfg=dict(type='BN', requires_grad=True),
12 | norm_eval=True,
13 | style='pytorch'),
14 | neck=dict(
15 | type='FPN',
16 | in_channels=[256, 512, 1024, 2048],
17 | out_channels=256,
18 | num_outs=5),
19 | rpn_head=dict(
20 | type='RPNHead',
21 | in_channels=256,
22 | feat_channels=256,
23 | anchor_generator=dict(
24 | type='AnchorGenerator',
25 | scales=[8],
26 | ratios=[0.5, 1.0, 2.0],
27 | strides=[4, 8, 16, 32, 64]),
28 | bbox_coder=dict(
29 | type='DeltaXYWHBBoxCoder',
30 | target_means=[.0, .0, .0, .0],
31 | target_stds=[1.0, 1.0, 1.0, 1.0]),
32 | loss_cls=dict(
33 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
34 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
35 | roi_head=dict(
36 | type='StandardRoIHead',
37 | bbox_roi_extractor=dict(
38 | type='SingleRoIExtractor',
39 | roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
40 | out_channels=256,
41 | featmap_strides=[4, 8, 16, 32]),
42 | bbox_head=dict(
43 | type='Shared2FCBBoxHead',
44 | in_channels=256,
45 | fc_out_channels=1024,
46 | roi_feat_size=7,
47 | num_classes=80,
48 | bbox_coder=dict(
49 | type='DeltaXYWHBBoxCoder',
50 | target_means=[0., 0., 0., 0.],
51 | target_stds=[0.1, 0.1, 0.2, 0.2]),
52 | reg_class_agnostic=False,
53 | loss_cls=dict(
54 | type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
55 | loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
56 | mask_roi_extractor=dict(
57 | type='SingleRoIExtractor',
58 | roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
59 | out_channels=256,
60 | featmap_strides=[4, 8, 16, 32]),
61 | mask_head=dict(
62 | type='FCNMaskHead',
63 | num_convs=4,
64 | in_channels=256,
65 | conv_out_channels=256,
66 | num_classes=80,
67 | loss_mask=dict(
68 | type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
69 | # model training and testing settings
70 | train_cfg=dict(
71 | rpn=dict(
72 | assigner=dict(
73 | type='MaxIoUAssigner',
74 | pos_iou_thr=0.7,
75 | neg_iou_thr=0.3,
76 | min_pos_iou=0.3,
77 | match_low_quality=True,
78 | ignore_iof_thr=-1),
79 | sampler=dict(
80 | type='RandomSampler',
81 | num=256,
82 | pos_fraction=0.5,
83 | neg_pos_ub=-1,
84 | add_gt_as_proposals=False),
85 | allowed_border=-1,
86 | pos_weight=-1,
87 | debug=False),
88 | rpn_proposal=dict(
89 | nms_across_levels=False,
90 | nms_pre=2000,
91 | nms_post=1000,
92 | max_num=1000,
93 | nms_thr=0.7,
94 | min_bbox_size=0),
95 | rcnn=dict(
96 | assigner=dict(
97 | type='MaxIoUAssigner',
98 | pos_iou_thr=0.5,
99 | neg_iou_thr=0.5,
100 | min_pos_iou=0.5,
101 | match_low_quality=True,
102 | ignore_iof_thr=-1),
103 | sampler=dict(
104 | type='RandomSampler',
105 | num=512,
106 | pos_fraction=0.25,
107 | neg_pos_ub=-1,
108 | add_gt_as_proposals=True),
109 | mask_size=28,
110 | pos_weight=-1,
111 | debug=False)),
112 | test_cfg=dict(
113 | rpn=dict(
114 | nms_across_levels=False,
115 | nms_pre=1000,
116 | nms_post=1000,
117 | max_num=1000,
118 | nms_thr=0.7,
119 | min_bbox_size=0),
120 | rcnn=dict(
121 | score_thr=0.05,
122 | nms=dict(type='nms', iou_threshold=0.5),
123 | max_per_img=100,
124 | mask_thr_binary=0.5)))
125 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/paconv_cuda_ssg.py:
--------------------------------------------------------------------------------
1 | _base_ = './paconv_ssg.py'
2 |
3 | model = dict(
4 | backbone=dict(
5 | sa_cfg=dict(
6 | type='PAConvCUDASAModule',
7 | scorenet_cfg=dict(mlp_channels=[8, 16, 16]))))
8 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/paconv_ssg.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='EncoderDecoder3D',
4 | backbone=dict(
5 | type='PointNet2SASSG',
6 | in_channels=9, # [xyz, rgb, normalized_xyz]
7 | num_points=(1024, 256, 64, 16),
8 | radius=(None, None, None, None), # use kNN instead of ball query
9 | num_samples=(32, 32, 32, 32),
10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 | 512)),
12 | fp_channels=(),
13 | norm_cfg=dict(type='BN2d', momentum=0.1),
14 | sa_cfg=dict(
15 | type='PAConvSAModule',
16 | pool_mod='max',
17 | use_xyz=True,
18 | normalize_xyz=False,
19 | paconv_num_kernels=[16, 16, 16],
20 | paconv_kernel_input='w_neighbor',
21 | scorenet_input='w_neighbor_dist',
22 | scorenet_cfg=dict(
23 | mlp_channels=[16, 16, 16],
24 | score_norm='softmax',
25 | temp_factor=1.0,
26 | last_bn=False))),
27 | decode_head=dict(
28 | type='PAConvHead',
29 | # PAConv model's decoder takes skip connections from beckbone
30 | # different from PointNet++, it also concats input features in the last
31 | # level of decoder, leading to `128 + 6` as the channel number
32 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
33 | (128 + 6, 128, 128, 128)),
34 | channels=128,
35 | dropout_ratio=0.5,
36 | conv_cfg=dict(type='Conv1d'),
37 | norm_cfg=dict(type='BN1d'),
38 | act_cfg=dict(type='ReLU'),
39 | loss_decode=dict(
40 | type='CrossEntropyLoss',
41 | use_sigmoid=False,
42 | class_weight=None, # should be modified with dataset
43 | loss_weight=1.0)),
44 | # correlation loss to regularize PAConv's kernel weights
45 | loss_regularization=dict(
46 | type='PAConvRegularizationLoss', reduction='sum', loss_weight=10.0),
47 | # model training and testing settings
48 | train_cfg=dict(),
49 | test_cfg=dict(mode='slide'))
50 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/pointnet2_msg.py:
--------------------------------------------------------------------------------
1 | _base_ = './pointnet2_ssg.py'
2 |
3 | # model settings
4 | model = dict(
5 | backbone=dict(
6 | _delete_=True,
7 | type='PointNet2SAMSG',
8 | in_channels=6, # [xyz, rgb], should be modified with dataset
9 | num_points=(1024, 256, 64, 16),
10 | radii=((0.05, 0.1), (0.1, 0.2), (0.2, 0.4), (0.4, 0.8)),
11 | num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
12 | sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
13 | 128)),
14 | ((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
15 | (256, 384, 512))),
16 | aggregation_channels=(None, None, None, None),
17 | fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
18 | fps_sample_range_lists=((-1), (-1), (-1), (-1)),
19 | dilated_group=(False, False, False, False),
20 | out_indices=(0, 1, 2, 3),
21 | sa_cfg=dict(
22 | type='PointSAModuleMSG',
23 | pool_mod='max',
24 | use_xyz=True,
25 | normalize_xyz=False)),
26 | decode_head=dict(
27 | fp_channels=((1536, 256, 256), (512, 256, 256), (352, 256, 128),
28 | (128, 128, 128, 128))))
29 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/pointnet2_ssg.py:
--------------------------------------------------------------------------------
1 | # model settings
2 | model = dict(
3 | type='EncoderDecoder3D',
4 | backbone=dict(
5 | type='PointNet2SASSG',
6 | in_channels=6, # [xyz, rgb], should be modified with dataset
7 | num_points=(1024, 256, 64, 16),
8 | radius=(0.1, 0.2, 0.4, 0.8),
9 | num_samples=(32, 32, 32, 32),
10 | sa_channels=((32, 32, 64), (64, 64, 128), (128, 128, 256), (256, 256,
11 | 512)),
12 | fp_channels=(),
13 | norm_cfg=dict(type='BN2d'),
14 | sa_cfg=dict(
15 | type='PointSAModule',
16 | pool_mod='max',
17 | use_xyz=True,
18 | normalize_xyz=False)),
19 | decode_head=dict(
20 | type='PointNet2Head',
21 | fp_channels=((768, 256, 256), (384, 256, 256), (320, 256, 128),
22 | (128, 128, 128, 128)),
23 | channels=128,
24 | dropout_ratio=0.5,
25 | conv_cfg=dict(type='Conv1d'),
26 | norm_cfg=dict(type='BN1d'),
27 | act_cfg=dict(type='ReLU'),
28 | loss_decode=dict(
29 | type='CrossEntropyLoss',
30 | use_sigmoid=False,
31 | class_weight=None, # should be modified with dataset
32 | loss_weight=1.0)),
33 | # model training and testing settings
34 | train_cfg=dict(),
35 | test_cfg=dict(mode='slide'))
36 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/models/votenet.py:
--------------------------------------------------------------------------------
1 | model = dict(
2 | type='VoteNet',
3 | backbone=dict(
4 | type='PointNet2SASSG',
5 | in_channels=4,
6 | num_points=(2048, 1024, 512, 256),
7 | radius=(0.2, 0.4, 0.8, 1.2),
8 | num_samples=(64, 32, 16, 16),
9 | sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
10 | (128, 128, 256)),
11 | fp_channels=((256, 256), (256, 256)),
12 | norm_cfg=dict(type='BN2d'),
13 | sa_cfg=dict(
14 | type='PointSAModule',
15 | pool_mod='max',
16 | use_xyz=True,
17 | normalize_xyz=True)),
18 | bbox_head=dict(
19 | type='VoteHead',
20 | vote_module_cfg=dict(
21 | in_channels=256,
22 | vote_per_seed=1,
23 | gt_per_seed=3,
24 | conv_channels=(256, 256),
25 | conv_cfg=dict(type='Conv1d'),
26 | norm_cfg=dict(type='BN1d'),
27 | norm_feats=True,
28 | vote_loss=dict(
29 | type='ChamferDistance',
30 | mode='l1',
31 | reduction='none',
32 | loss_dst_weight=10.0)),
33 | vote_aggregation_cfg=dict(
34 | type='PointSAModule',
35 | num_point=256,
36 | radius=0.3,
37 | num_sample=16,
38 | mlp_channels=[256, 128, 128, 128],
39 | use_xyz=True,
40 | normalize_xyz=True),
41 | pred_layer_cfg=dict(
42 | in_channels=128, shared_conv_channels=(128, 128), bias=True),
43 | conv_cfg=dict(type='Conv1d'),
44 | norm_cfg=dict(type='BN1d'),
45 | objectness_loss=dict(
46 | type='CrossEntropyLoss',
47 | class_weight=[0.2, 0.8],
48 | reduction='sum',
49 | loss_weight=5.0),
50 | center_loss=dict(
51 | type='ChamferDistance',
52 | mode='l2',
53 | reduction='sum',
54 | loss_src_weight=10.0,
55 | loss_dst_weight=10.0),
56 | dir_class_loss=dict(
57 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
58 | dir_res_loss=dict(
59 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
60 | size_class_loss=dict(
61 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
62 | size_res_loss=dict(
63 | type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
64 | semantic_loss=dict(
65 | type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
66 | # model training and testing settings
67 | train_cfg=dict(
68 | pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
69 | test_cfg=dict(
70 | sample_mod='seed',
71 | nms_thr=0.25,
72 | score_thr=0.05,
73 | per_class_proposal=True))
74 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/schedules/cosine.py:
--------------------------------------------------------------------------------
1 | # This schedule is mainly used by models with dynamic voxelization
2 | # optimizer
3 | lr = 0.003 # max learning rate
4 | optimizer = dict(
5 | type='AdamW',
6 | lr=lr,
7 | betas=(0.95, 0.99), # the momentum is change during training
8 | weight_decay=0.001)
9 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
10 |
11 | lr_config = dict(
12 | policy='CosineAnnealing',
13 | warmup='linear',
14 | warmup_iters=1000,
15 | warmup_ratio=1.0 / 10,
16 | min_lr_ratio=1e-5)
17 |
18 | momentum_config = None
19 |
20 | runner = dict(type='EpochBasedRunner', max_epochs=40)
21 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/schedules/cyclic_20e.py:
--------------------------------------------------------------------------------
1 | # For nuScenes dataset, we usually evaluate the model at the end of training.
2 | # Since the models are trained by 24 epochs by default, we set evaluation
3 | # interval to be 20. Please change the interval accordingly if you do not
4 | # use a default schedule.
5 | # optimizer
6 | # This schedule is mainly used by models on nuScenes dataset
7 | optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)
8 | # max_norm=10 is better for SECOND
9 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
10 | lr_config = dict(
11 | policy='cyclic',
12 | target_ratio=(10, 1e-4),
13 | cyclic_times=1,
14 | step_ratio_up=0.4,
15 | )
16 | momentum_config = dict(
17 | policy='cyclic',
18 | target_ratio=(0.85 / 0.95, 1),
19 | cyclic_times=1,
20 | step_ratio_up=0.4,
21 | )
22 |
23 | # runtime settings
24 | runner = dict(type='EpochBasedRunner', max_epochs=20)
25 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/schedules/cyclic_40e.py:
--------------------------------------------------------------------------------
1 | # The schedule is usually used by models trained on KITTI dataset
2 |
3 | # The learning rate set in the cyclic schedule is the initial learning rate
4 | # rather than the max learning rate. Since the target_ratio is (10, 1e-4),
5 | # the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
6 | lr = 0.0018
7 | # The optimizer follows the setting in SECOND.Pytorch, but here we use
8 | # the offcial AdamW optimizer implemented by PyTorch.
9 | optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
10 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
11 | # We use cyclic learning rate and momentum schedule following SECOND.Pytorch
12 | # https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa
13 | # We implement them in mmcv, for more details, please refer to
14 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa
15 | # https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa
16 | lr_config = dict(
17 | policy='cyclic',
18 | target_ratio=(10, 1e-4),
19 | cyclic_times=1,
20 | step_ratio_up=0.4,
21 | )
22 | momentum_config = dict(
23 | policy='cyclic',
24 | target_ratio=(0.85 / 0.95, 1),
25 | cyclic_times=1,
26 | step_ratio_up=0.4,
27 | )
28 | # Although the max_epochs is 40, this schedule is usually used we
29 | # RepeatDataset with repeat ratio N, thus the actual max epoch
30 | # number could be Nx40
31 | runner = dict(type='EpochBasedRunner', max_epochs=40)
32 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/schedules/mmdet_schedule_1x.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
3 | optimizer_config = dict(grad_clip=None)
4 | # learning policy
5 | lr_config = dict(
6 | policy='step',
7 | warmup='linear',
8 | warmup_iters=500,
9 | warmup_ratio=0.001,
10 | step=[8, 11])
11 | runner = dict(type='EpochBasedRunner', max_epochs=12)
12 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/schedules/schedule_2x.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | # This schedule is mainly used by models on nuScenes dataset
3 | optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
4 | # max_norm=10 is better for SECOND
5 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
6 | lr_config = dict(
7 | policy='step',
8 | warmup='linear',
9 | warmup_iters=1000,
10 | warmup_ratio=1.0 / 1000,
11 | step=[20, 23])
12 | momentum_config = None
13 | # runtime settings
14 | runner = dict(type='EpochBasedRunner', max_epochs=24)
15 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/schedules/schedule_3x.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | # This schedule is mainly used by models on indoor dataset,
3 | # e.g., VoteNet on SUNRGBD and ScanNet
4 | lr = 0.008 # max learning rate
5 | optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
6 | optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
7 | lr_config = dict(policy='step', warmup=None, step=[24, 32])
8 | # runtime settings
9 | runner = dict(type='EpochBasedRunner', max_epochs=36)
10 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/schedules/seg_cosine_150e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | # This schedule is mainly used on S3DIS dataset in segmentation task
3 | optimizer = dict(type='SGD', lr=0.2, weight_decay=0.0001, momentum=0.9)
4 | optimizer_config = dict(grad_clip=None)
5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=0.002)
6 | momentum_config = None
7 |
8 | # runtime settings
9 | runner = dict(type='EpochBasedRunner', max_epochs=150)
10 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/schedules/seg_cosine_200e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | # This schedule is mainly used on ScanNet dataset in segmentation task
3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.01)
4 | optimizer_config = dict(grad_clip=None)
5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
6 | momentum_config = None
7 |
8 | # runtime settings
9 | runner = dict(type='EpochBasedRunner', max_epochs=200)
10 |
--------------------------------------------------------------------------------
/mp3dbev/projects/configs/_base_/schedules/seg_cosine_50e.py:
--------------------------------------------------------------------------------
1 | # optimizer
2 | # This schedule is mainly used on S3DIS dataset in segmentation task
3 | optimizer = dict(type='Adam', lr=0.001, weight_decay=0.001)
4 | optimizer_config = dict(grad_clip=None)
5 | lr_config = dict(policy='CosineAnnealing', warmup=None, min_lr=1e-5)
6 | momentum_config = None
7 |
8 | # runtime settings
9 | runner = dict(type='EpochBasedRunner', max_epochs=50)
10 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/__init__.py:
--------------------------------------------------------------------------------
1 | from .core.bbox.assigners.hungarian_assigner_3d import HungarianAssigner3D
2 | from .core.bbox.coders.nms_free_coder import NMSFreeCoder
3 | from .core.bbox.match_costs import BBox3DL1Cost
4 | from .core.evaluation.eval_hooks import CustomDistEvalHook
5 | from .datasets.pipelines import (
6 | PhotoMetricDistortionMultiViewImage, PadMultiViewImage,
7 | NormalizeMultiviewImage, CustomCollect3D)
8 | from .models.backbones.vovnet import VoVNet
9 | from .models.utils import *
10 | from .models.opt.adamw import AdamW2
11 | from .bevformer import *
12 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .dense_heads import *
3 | from .detectors import *
4 | from .modules import *
5 | from .runner import *
6 | from .hooks import *
7 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .train import custom_train_model
2 | from .mmdet_train import custom_train_detector
3 | # from .test import custom_multi_gpu_test
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/apis/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/apis/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/apis/__pycache__/mmdet_train.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/apis/__pycache__/mmdet_train.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/apis/__pycache__/test.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/apis/__pycache__/test.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/apis/__pycache__/train.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/apis/__pycache__/train.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/apis/train.py:
--------------------------------------------------------------------------------
1 | # ---------------------------------------------
2 | # Copyright (c) OpenMMLab. All rights reserved.
3 | # ---------------------------------------------
4 | # Modified by Zhiqi Li
5 | # ---------------------------------------------
6 |
7 | from .mmdet_train import custom_train_detector
8 | from mmseg.apis import train_segmentor
9 | from mmdet.apis import train_detector
10 |
11 | def custom_train_model(model,
12 | dataset,
13 | cfg,
14 | distributed=False,
15 | validate=False,
16 | timestamp=None,
17 | eval_model=None,
18 | meta=None):
19 | """A function wrapper for launching model training according to cfg.
20 |
21 | Because we need different eval_hook in runner. Should be deprecated in the
22 | future.
23 | """
24 | if cfg.model.type in ['EncoderDecoder3D']:
25 | assert False
26 | else:
27 | custom_train_detector(
28 | model,
29 | dataset,
30 | cfg,
31 | distributed=distributed,
32 | validate=validate,
33 | timestamp=timestamp,
34 | eval_model=eval_model,
35 | meta=meta)
36 |
37 |
38 | def train_model(model,
39 | dataset,
40 | cfg,
41 | distributed=False,
42 | validate=False,
43 | timestamp=None,
44 | meta=None):
45 | """A function wrapper for launching model training according to cfg.
46 |
47 | Because we need different eval_hook in runner. Should be deprecated in the
48 | future.
49 | """
50 | if cfg.model.type in ['EncoderDecoder3D']:
51 | train_segmentor(
52 | model,
53 | dataset,
54 | cfg,
55 | distributed=distributed,
56 | validate=validate,
57 | timestamp=timestamp,
58 | meta=meta)
59 | else:
60 | train_detector(
61 | model,
62 | dataset,
63 | cfg,
64 | distributed=distributed,
65 | validate=validate,
66 | timestamp=timestamp,
67 | meta=meta)
68 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bevformer_head import BEVFormerHead
2 | from .bevformer_headmp import BEVFormerHeadmp
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/dense_heads/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/dense_heads/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/dense_heads/__pycache__/bevformer_head.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/dense_heads/__pycache__/bevformer_head.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/dense_heads/__pycache__/bevformer_headmp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/dense_heads/__pycache__/bevformer_headmp.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .bevformer import BEVFormer
2 | from .bevformermp import BEVFormermp
3 | from .bevformer_fp16 import BEVFormer_fp16
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/detectors/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/detectors/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/detectors/__pycache__/bevformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/detectors/__pycache__/bevformer.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/detectors/__pycache__/bevformer_fp16.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/detectors/__pycache__/bevformer_fp16.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/detectors/__pycache__/bevformermp.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/detectors/__pycache__/bevformermp.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | from .custom_hooks import TransferWeight
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/hooks/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/hooks/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/hooks/__pycache__/custom_hooks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/hooks/__pycache__/custom_hooks.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/hooks/custom_hooks.py:
--------------------------------------------------------------------------------
1 | from mmcv.runner.hooks.hook import HOOKS, Hook
2 | from projects.mmdet3d_plugin.models.utils import run_time
3 |
4 |
5 | @HOOKS.register_module()
6 | class TransferWeight(Hook):
7 |
8 | def __init__(self, every_n_inters=1):
9 | self.every_n_inters=every_n_inters
10 |
11 | def after_train_iter(self, runner):
12 | if self.every_n_inner_iters(runner, self.every_n_inters):
13 | runner.eval_model.load_state_dict(runner.model.state_dict())
14 |
15 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .transformer import PerceptionTransformer
2 | from .spatial_cross_attention import SpatialCrossAttention, MSDeformableAttention3D
3 | from .temporal_self_attention import TemporalSelfAttention
4 | from .encoder import BEVFormerEncoder, BEVFormerLayer
5 | from .decoder import DetectionTransformerDecoder
6 |
7 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/custom_base_transformer_layer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/custom_base_transformer_layer.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/decoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/decoder.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/encoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/encoder.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/multi_scale_deformable_attn_function.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/multi_scale_deformable_attn_function.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/spatial_cross_attention.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/spatial_cross_attention.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/temporal_self_attention.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/temporal_self_attention.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/transformer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/modules/__pycache__/transformer.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/runner/__init__.py:
--------------------------------------------------------------------------------
1 | # from .epoch_based_runner import EpochBasedRunner_video
2 | # it is not used
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/runner/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/runner/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/runner/__pycache__/epoch_based_runner.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/bevformer/runner/__pycache__/epoch_based_runner.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/bevformer/runner/epoch_based_runner.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | # ---------------------------------------------
3 | # Modified by Zhiqi Li
4 | # ---------------------------------------------
5 |
6 | import os.path as osp
7 | import torch
8 | import mmcv
9 | from mmcv.runner.base_runner import BaseRunner
10 | from mmcv.runner.epoch_based_runner import EpochBasedRunner
11 | from mmcv.runner.builder import RUNNERS
12 | from mmcv.runner.checkpoint import save_checkpoint
13 | from mmcv.runner.utils import get_host_info
14 | from pprint import pprint
15 | from mmcv.parallel.data_container import DataContainer
16 |
17 | # it is not used
18 |
19 | # @RUNNERS.register_module()
20 | # class EpochBasedRunner_video(EpochBasedRunner):
21 |
22 | # '''
23 | # # basic logic
24 |
25 | # input_sequence = [a, b, c] # given a sequence of samples
26 |
27 | # prev_bev = None
28 | # for each in input_sequcene[:-1]
29 | # prev_bev = eval_model(each, prev_bev)) # inference only.
30 |
31 | # model(input_sequcene[-1], prev_bev) # train the last sample.
32 | # '''
33 |
34 | # def __init__(self,
35 | # model,
36 | # eval_model=None,
37 | # batch_processor=None,
38 | # optimizer=None,
39 | # work_dir=None,
40 | # logger=None,
41 | # meta=None,
42 | # keys=['gt_bboxes_3d', 'gt_labels_3d', 'img'],
43 | # max_iters=None,
44 | # max_epochs=None):
45 | # super().__init__(model,
46 | # batch_processor,
47 | # optimizer,
48 | # work_dir,
49 | # logger,
50 | # meta,
51 | # max_iters,
52 | # max_epochs)
53 | # keys.append('img_metas')
54 | # self.keys = keys
55 | # self.eval_model = eval_model
56 | # self.eval_model.eval()
57 |
58 | # def run_iter(self, data_batch, train_mode, **kwargs):
59 | # if self.batch_processor is not None:
60 | # assert False
61 | # # outputs = self.batch_processor(
62 | # # self.model, data_batch, train_mode=train_mode, **kwargs)
63 | # elif train_mode:
64 | # # import ipdb;ipdb.set_trace()
65 | # num_samples = data_batch['img'].data[0].size(1)
66 | # data_list = []
67 | # prev_bev = None
68 | # for i in range(num_samples):
69 | # data = {}
70 | # for key in self.keys:
71 | # if key not in ['img_metas', 'img', 'points']:
72 | # data[key] = data_batch[key]
73 | # else:
74 | # if key == 'img':
75 | # data['img'] = DataContainer(data=[data_batch['img'].data[0][:, i]], cpu_only=data_batch['img'].cpu_only, stack=True)
76 | # elif key == 'img_metas':
77 | # data['img_metas'] = DataContainer(data=[[each[i] for each in data_batch['img_metas'].data[0]]], cpu_only=data_batch['img_metas'].cpu_only)
78 | # else:
79 | # assert False
80 | # data_list.append(data)
81 | # with torch.no_grad():
82 | # for i in range(num_samples-1):
83 | # if data_list[i]['img_metas'].data[0][0]['prev_bev_exists']:
84 | # data_list[i]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False)
85 | # prev_bev = self.eval_model.val_step(data_list[i], self.optimizer, **kwargs)
86 | # if data_list[-1]['img_metas'].data[0][0]['prev_bev_exists']:
87 | # data_list[-1]['prev_bev'] = DataContainer(data=[prev_bev], cpu_only=False)
88 | # outputs = self.model.train_step(data_list[-1], self.optimizer, **kwargs)
89 | # else:
90 | # assert False
91 | # # outputs = self.model.val_step(data_batch, self.optimizer, **kwargs)
92 |
93 | # if not isinstance(outputs, dict):
94 | # raise TypeError('"batch_processor()" or "model.train_step()"'
95 | # 'and "model.val_step()" must return a dict')
96 | # if 'log_vars' in outputs:
97 | # self.log_buffer.update(outputs['log_vars'], outputs['num_samples'])
98 | # self.outputs = outputs
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/core/bbox/__pycache__/util.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
1 | from .hungarian_assigner_3d import HungarianAssigner3D
2 |
3 | __all__ = ['HungarianAssigner3D']
4 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/core/bbox/assigners/__pycache__/hungarian_assigner_3d.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/bbox/coders/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_free_coder import NMSFreeCoder
2 |
3 | __all__ = ['NMSFreeCoder']
4 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/core/bbox/coders/__pycache__/nms_free_coder.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/bbox/match_costs/__init__.py:
--------------------------------------------------------------------------------
1 | from mmdet.core.bbox.match_costs import build_match_cost
2 | from .match_cost import BBox3DL1Cost
3 |
4 | __all__ = ['build_match_cost', 'BBox3DL1Cost']
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/core/bbox/match_costs/__pycache__/match_cost.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/bbox/match_costs/match_cost.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from mmdet.core.bbox.match_costs.builder import MATCH_COST
3 |
4 |
5 | @MATCH_COST.register_module()
6 | class BBox3DL1Cost(object):
7 | """BBox3DL1Cost.
8 | Args:
9 | weight (int | float, optional): loss_weight
10 | """
11 |
12 | def __init__(self, weight=1.):
13 | self.weight = weight
14 |
15 | def __call__(self, bbox_pred, gt_bboxes):
16 | """
17 | Args:
18 | bbox_pred (Tensor): Predicted boxes with normalized coordinates
19 | (cx, cy, w, h), which are all in range [0, 1]. Shape
20 | [num_query, 4].
21 | gt_bboxes (Tensor): Ground truth boxes with normalized
22 | coordinates (x1, y1, x2, y2). Shape [num_gt, 4].
23 | Returns:
24 | torch.Tensor: bbox_cost value with weight
25 | """
26 | bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)
27 | return bbox_cost * self.weight
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/bbox/util.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def normalize_bbox(bboxes, pc_range):
5 |
6 | cx = bboxes[..., 0:1]
7 | cy = bboxes[..., 1:2]
8 | cz = bboxes[..., 2:3]
9 | w = bboxes[..., 3:4].log()
10 | l = bboxes[..., 4:5].log()
11 | h = bboxes[..., 5:6].log()
12 |
13 | rot = bboxes[..., 6:7]
14 | if bboxes.size(-1) > 7:
15 | vx = bboxes[..., 7:8]
16 | vy = bboxes[..., 8:9]
17 | normalized_bboxes = torch.cat(
18 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos(), vx, vy), dim=-1
19 | )
20 | else:
21 | normalized_bboxes = torch.cat(
22 | (cx, cy, w, l, cz, h, rot.sin(), rot.cos()), dim=-1
23 | )
24 | return normalized_bboxes
25 |
26 | def denormalize_bbox(normalized_bboxes, pc_range):
27 | # rotation
28 | rot_sine = normalized_bboxes[..., 6:7]
29 |
30 | rot_cosine = normalized_bboxes[..., 7:8]
31 | rot = torch.atan2(rot_sine, rot_cosine)
32 |
33 | # center in the bev
34 | cx = normalized_bboxes[..., 0:1]
35 | cy = normalized_bboxes[..., 1:2]
36 | cz = normalized_bboxes[..., 4:5]
37 |
38 | # size
39 | w = normalized_bboxes[..., 2:3]
40 | l = normalized_bboxes[..., 3:4]
41 | h = normalized_bboxes[..., 5:6]
42 |
43 | w = w.exp()
44 | l = l.exp()
45 | h = h.exp()
46 | if normalized_bboxes.size(-1) > 8:
47 | # velocity
48 | vx = normalized_bboxes[:, 8:9]
49 | vy = normalized_bboxes[:, 9:10]
50 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot, vx, vy], dim=-1)
51 | else:
52 | denormalized_bboxes = torch.cat([cx, cy, cz, w, l, h, rot], dim=-1)
53 | return denormalized_bboxes
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .eval_hooks import CustomDistEvalHook
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/evaluation/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/core/evaluation/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/evaluation/__pycache__/eval_hooks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/core/evaluation/__pycache__/eval_hooks.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/core/evaluation/eval_hooks.py:
--------------------------------------------------------------------------------
1 |
2 | # Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,
3 | # in order to avoid strong version dependency, we did not directly
4 | # inherit EvalHook but BaseDistEvalHook.
5 |
6 | import bisect
7 | import os.path as osp
8 |
9 | import mmcv
10 | import torch.distributed as dist
11 | from mmcv.runner import DistEvalHook as BaseDistEvalHook
12 | from mmcv.runner import EvalHook as BaseEvalHook
13 | from torch.nn.modules.batchnorm import _BatchNorm
14 | from mmdet.core.evaluation.eval_hooks import DistEvalHook
15 |
16 |
17 | def _calc_dynamic_intervals(start_interval, dynamic_interval_list):
18 | assert mmcv.is_list_of(dynamic_interval_list, tuple)
19 |
20 | dynamic_milestones = [0]
21 | dynamic_milestones.extend(
22 | [dynamic_interval[0] for dynamic_interval in dynamic_interval_list])
23 | dynamic_intervals = [start_interval]
24 | dynamic_intervals.extend(
25 | [dynamic_interval[1] for dynamic_interval in dynamic_interval_list])
26 | return dynamic_milestones, dynamic_intervals
27 |
28 |
29 | class CustomDistEvalHook(BaseDistEvalHook):
30 |
31 | def __init__(self, *args, dynamic_intervals=None, **kwargs):
32 | super(CustomDistEvalHook, self).__init__(*args, **kwargs)
33 | self.use_dynamic_intervals = dynamic_intervals is not None
34 | if self.use_dynamic_intervals:
35 | self.dynamic_milestones, self.dynamic_intervals = \
36 | _calc_dynamic_intervals(self.interval, dynamic_intervals)
37 |
38 | def _decide_interval(self, runner):
39 | if self.use_dynamic_intervals:
40 | progress = runner.epoch if self.by_epoch else runner.iter
41 | step = bisect.bisect(self.dynamic_milestones, (progress + 1))
42 | # Dynamically modify the evaluation interval
43 | self.interval = self.dynamic_intervals[step - 1]
44 |
45 | def before_train_epoch(self, runner):
46 | """Evaluate the model only at the start of training by epoch."""
47 | self._decide_interval(runner)
48 | super().before_train_epoch(runner)
49 |
50 | def before_train_iter(self, runner):
51 | self._decide_interval(runner)
52 | super().before_train_iter(runner)
53 |
54 | def _do_evaluate(self, runner):
55 | """perform evaluation and save ckpt."""
56 | # Synchronization of BatchNorm's buffer (running_mean
57 | # and running_var) is not supported in the DDP of pytorch,
58 | # which may cause the inconsistent performance of models in
59 | # different ranks, so we broadcast BatchNorm's buffers
60 | # of rank 0 to other ranks to avoid this.
61 | if self.broadcast_bn_buffer:
62 | model = runner.model
63 | for name, module in model.named_modules():
64 | if isinstance(module,
65 | _BatchNorm) and module.track_running_stats:
66 | dist.broadcast(module.running_var, 0)
67 | dist.broadcast(module.running_mean, 0)
68 |
69 | if not self._should_evaluate(runner):
70 | return
71 |
72 | tmpdir = self.tmpdir
73 | if tmpdir is None:
74 | tmpdir = osp.join(runner.work_dir, '.eval_hook')
75 |
76 | from projects.mmdet3d_plugin.bevformer.apis.test import custom_multi_gpu_test # to solve circlur import
77 |
78 | results = custom_multi_gpu_test(
79 | runner.model,
80 | self.dataloader,
81 | tmpdir=tmpdir,
82 | gpu_collect=self.gpu_collect)
83 | if runner.rank == 0:
84 | print('\n')
85 | runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)
86 | # import ipdb;ipdb.set_trace()
87 | key_score = self.evaluate(runner, results)
88 |
89 | if self.save_best:
90 | self._save_ckpt(runner, key_score)
91 |
92 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .nuscenes_dataset import CustomNuScenesDataset
2 | from .mp3d_dataset import MP3DDataset
3 | from .builder import custom_build_dataset
4 |
5 | __all__ = [
6 | 'CustomNuScenesDataset','MP3DDataset'
7 | ]
8 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/__pycache__/builder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/__pycache__/builder.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/__pycache__/indoor_eval.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/__pycache__/indoor_eval.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/__pycache__/mp3d_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/__pycache__/mp3d_dataset.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/__pycache__/nuscenes_dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/__pycache__/nuscenes_dataset.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/__pycache__/nuscnes_eval.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/__pycache__/nuscnes_eval.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | from .transform_3d import (
2 | PadMultiViewImage, NormalizeMultiviewImage,
3 | PhotoMetricDistortionMultiViewImage, CustomCollect3D, RandomScaleImageMultiViewImage,
4 | CustomMP3D)
5 | from .formating import CustomDefaultFormatBundle3D
6 | __all__ = [
7 | 'PadMultiViewImage', 'NormalizeMultiviewImage',
8 | 'PhotoMetricDistortionMultiViewImage', 'CustomDefaultFormatBundle3D', 'CustomCollect3D', 'RandomScaleImageMultiViewImage',
9 | 'CustomMP3D'
10 | ]
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/compose.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/compose.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/formating.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/formating.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/transform_3d.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/pipelines/__pycache__/transform_3d.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/pipelines/compose.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import collections
3 |
4 | from mmcv.utils import build_from_cfg
5 |
6 | from mmdet.datasets.builder import PIPELINES as MMDET_PIPELINES
7 | from ..builder import PIPELINES
8 |
9 |
10 | @PIPELINES.register_module()
11 | class Compose:
12 | """Compose multiple transforms sequentially. The pipeline registry of
13 | mmdet3d separates with mmdet, however, sometimes we may need to use mmdet's
14 | pipeline. So the class is rewritten to be able to use pipelines from both
15 | mmdet3d and mmdet.
16 | Args:
17 | transforms (Sequence[dict | callable]): Sequence of transform object or
18 | config dict to be composed.
19 | """
20 |
21 | def __init__(self, transforms):
22 | assert isinstance(transforms, collections.abc.Sequence)
23 | self.transforms = []
24 | for transform in transforms:
25 | if isinstance(transform, dict):
26 | _, key = PIPELINES.split_scope_key(transform['type'])
27 | if key in PIPELINES._module_dict.keys():
28 | transform = build_from_cfg(transform, PIPELINES)
29 | else:
30 | transform = build_from_cfg(transform, MMDET_PIPELINES)
31 | self.transforms.append(transform)
32 | elif callable(transform):
33 | self.transforms.append(transform)
34 | else:
35 | raise TypeError('transform must be callable or a dict')
36 |
37 | def __call__(self, data):
38 | """Call function to apply transforms sequentially.
39 | Args:
40 | data (dict): A result dict contains the data to transform.
41 | Returns:
42 | dict: Transformed data.
43 | """
44 |
45 | for t in self.transforms:
46 | data = t(data)
47 | if data is None:
48 | return None
49 | return data
50 |
51 | def __repr__(self):
52 | format_string = self.__class__.__name__ + '('
53 | for t in self.transforms:
54 | format_string += '\n'
55 | format_string += f' {t}'
56 | format_string += '\n)'
57 | return format_string
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/pipelines/formating.py:
--------------------------------------------------------------------------------
1 |
2 | # Copyright (c) OpenMMLab. All rights reserved.
3 | import numpy as np
4 | from mmcv.parallel import DataContainer as DC
5 |
6 | from mmdet3d.core.bbox import BaseInstance3DBoxes
7 | from mmdet3d.core.points import BasePoints
8 | from mmdet.datasets.builder import PIPELINES
9 | from mmdet.datasets.pipelines import to_tensor
10 | from mmdet3d.datasets.pipelines import DefaultFormatBundle3D
11 |
12 | @PIPELINES.register_module()
13 | class CustomDefaultFormatBundle3D(DefaultFormatBundle3D):
14 | """Default formatting bundle.
15 | It simplifies the pipeline of formatting common fields for voxels,
16 | including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and
17 | "gt_semantic_seg".
18 | These fields are formatted as follows.
19 | - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
20 | - proposals: (1)to tensor, (2)to DataContainer
21 | - gt_bboxes: (1)to tensor, (2)to DataContainer
22 | - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
23 | - gt_labels: (1)to tensor, (2)to DataContainer
24 | """
25 |
26 | def __call__(self, results):
27 | """Call function to transform and format common fields in results.
28 | Args:
29 | results (dict): Result dict contains the data to convert.
30 | Returns:
31 | dict: The result dict contains the data that is formatted with
32 | default bundle.
33 | """
34 | # Format 3D data
35 | results = super(CustomDefaultFormatBundle3D, self).__call__(results)
36 | results['gt_map_masks'] = DC(
37 | to_tensor(results['gt_map_masks']), stack=True)
38 |
39 | return results
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/pipelines/loading.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/pipelines/loading.py
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .group_sampler import DistributedGroupSampler
2 | from .distributed_sampler import DistributedSampler
3 | from .sampler import SAMPLER, build_sampler
4 |
5 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/samplers/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/samplers/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/samplers/__pycache__/distributed_sampler.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/samplers/__pycache__/distributed_sampler.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/samplers/__pycache__/group_sampler.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/samplers/__pycache__/group_sampler.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/samplers/__pycache__/sampler.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/datasets/samplers/__pycache__/sampler.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/samplers/distributed_sampler.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch
4 | from torch.utils.data import DistributedSampler as _DistributedSampler
5 | from .sampler import SAMPLER
6 |
7 |
8 | @SAMPLER.register_module()
9 | class DistributedSampler(_DistributedSampler):
10 |
11 | def __init__(self,
12 | dataset=None,
13 | num_replicas=None,
14 | rank=None,
15 | shuffle=True,
16 | seed=0):
17 | super().__init__(
18 | dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
19 | # for the compatibility from PyTorch 1.3+
20 | self.seed = seed if seed is not None else 0
21 |
22 | def __iter__(self):
23 | # deterministically shuffle based on epoch
24 | if self.shuffle:
25 | assert False
26 | else:
27 | indices = torch.arange(len(self.dataset)).tolist()
28 |
29 | # add extra samples to make it evenly divisible
30 | # in case that indices is shorter than half of total_size
31 | indices = (indices *
32 | math.ceil(self.total_size / len(indices)))[:self.total_size]
33 | assert len(indices) == self.total_size
34 |
35 | # subsample
36 | per_replicas = self.total_size//self.num_replicas
37 | # indices = indices[self.rank:self.total_size:self.num_replicas]
38 | indices = indices[self.rank*per_replicas:(self.rank+1)*per_replicas]
39 | assert len(indices) == self.num_samples
40 |
41 | return iter(indices)
42 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/samplers/group_sampler.py:
--------------------------------------------------------------------------------
1 |
2 | # Copyright (c) OpenMMLab. All rights reserved.
3 | import math
4 |
5 | import numpy as np
6 | import torch
7 | from mmcv.runner import get_dist_info
8 | from torch.utils.data import Sampler
9 | from .sampler import SAMPLER
10 | import random
11 | from IPython import embed
12 |
13 |
14 | @SAMPLER.register_module()
15 | class DistributedGroupSampler(Sampler):
16 | """Sampler that restricts data loading to a subset of the dataset.
17 | It is especially useful in conjunction with
18 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
19 | process can pass a DistributedSampler instance as a DataLoader sampler,
20 | and load a subset of the original dataset that is exclusive to it.
21 | .. note::
22 | Dataset is assumed to be of constant size.
23 | Arguments:
24 | dataset: Dataset used for sampling.
25 | num_replicas (optional): Number of processes participating in
26 | distributed training.
27 | rank (optional): Rank of the current process within num_replicas.
28 | seed (int, optional): random seed used to shuffle the sampler if
29 | ``shuffle=True``. This number should be identical across all
30 | processes in the distributed group. Default: 0.
31 | """
32 |
33 | def __init__(self,
34 | dataset,
35 | samples_per_gpu=1,
36 | num_replicas=None,
37 | rank=None,
38 | seed=0):
39 | _rank, _num_replicas = get_dist_info()
40 | if num_replicas is None:
41 | num_replicas = _num_replicas
42 | if rank is None:
43 | rank = _rank
44 | self.dataset = dataset
45 | self.samples_per_gpu = samples_per_gpu
46 | self.num_replicas = num_replicas
47 | self.rank = rank
48 | self.epoch = 0
49 | self.seed = seed if seed is not None else 0
50 |
51 | assert hasattr(self.dataset, 'flag')
52 | self.flag = self.dataset.flag
53 | self.group_sizes = np.bincount(self.flag)
54 |
55 | self.num_samples = 0
56 | for i, j in enumerate(self.group_sizes):
57 | self.num_samples += int(
58 | math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
59 | self.num_replicas)) * self.samples_per_gpu
60 | self.total_size = self.num_samples * self.num_replicas
61 |
62 | def __iter__(self):
63 | # deterministically shuffle based on epoch
64 | g = torch.Generator()
65 | g.manual_seed(self.epoch + self.seed)
66 |
67 | indices = []
68 | for i, size in enumerate(self.group_sizes):
69 | if size > 0:
70 | indice = np.where(self.flag == i)[0]
71 | assert len(indice) == size
72 | # add .numpy() to avoid bug when selecting indice in parrots.
73 | # TODO: check whether torch.randperm() can be replaced by
74 | # numpy.random.permutation().
75 | indice = indice[list(
76 | torch.randperm(int(size), generator=g).numpy())].tolist()
77 | extra = int(
78 | math.ceil(
79 | size * 1.0 / self.samples_per_gpu / self.num_replicas)
80 | ) * self.samples_per_gpu * self.num_replicas - len(indice)
81 | # pad indice
82 | tmp = indice.copy()
83 | for _ in range(extra // size):
84 | indice.extend(tmp)
85 | indice.extend(tmp[:extra % size])
86 | indices.extend(indice)
87 |
88 | assert len(indices) == self.total_size
89 |
90 | indices = [
91 | indices[j] for i in list(
92 | torch.randperm(
93 | len(indices) // self.samples_per_gpu, generator=g))
94 | for j in range(i * self.samples_per_gpu, (i + 1) *
95 | self.samples_per_gpu)
96 | ]
97 |
98 | # subsample
99 | offset = self.num_samples * self.rank
100 | indices = indices[offset:offset + self.num_samples]
101 | assert len(indices) == self.num_samples
102 |
103 | return iter(indices)
104 |
105 | def __len__(self):
106 | return self.num_samples
107 |
108 | def set_epoch(self, epoch):
109 | self.epoch = epoch
110 |
111 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/datasets/samplers/sampler.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils.registry import Registry, build_from_cfg
2 |
3 | SAMPLER = Registry('sampler')
4 |
5 |
6 | def build_sampler(cfg, default_args):
7 | return build_from_cfg(cfg, SAMPLER, default_args)
8 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .vovnet import VoVNet
2 |
3 | __all__ = ['VoVNet']
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/backbones/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/models/backbones/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/backbones/__pycache__/vovnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/models/backbones/__pycache__/vovnet.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/hooks/__init__.py:
--------------------------------------------------------------------------------
1 | from .hooks import GradChecker
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/hooks/hooks.py:
--------------------------------------------------------------------------------
1 | from mmcv.runner.hooks.hook import HOOKS, Hook
2 | from projects.mmdet3d_plugin.models.utils import run_time
3 |
4 |
5 | @HOOKS.register_module()
6 | class GradChecker(Hook):
7 |
8 | def after_train_iter(self, runner):
9 | for key, val in runner.model.named_parameters():
10 | if val.grad == None and val.requires_grad:
11 | print('WARNNING: {key}\'s parameters are not be used!!!!'.format(key=key))
12 |
13 |
14 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/opt/__init__.py:
--------------------------------------------------------------------------------
1 | from .adamw import AdamW2
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/opt/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/models/opt/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/opt/__pycache__/adamw.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/models/opt/__pycache__/adamw.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .bricks import run_time
3 | from .grid_mask import GridMask
4 | from .position_embedding import RelPositionEmbedding
5 | from .visual import save_tensor
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/models/utils/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/utils/__pycache__/bricks.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/models/utils/__pycache__/bricks.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/utils/__pycache__/grid_mask.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/models/utils/__pycache__/grid_mask.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/utils/__pycache__/position_embedding.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/models/utils/__pycache__/position_embedding.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/utils/__pycache__/visual.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/projects/mmdet3d_plugin/models/utils/__pycache__/visual.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/utils/bricks.py:
--------------------------------------------------------------------------------
1 | import functools
2 | import time
3 | from collections import defaultdict
4 | import torch
5 | time_maps = defaultdict(lambda :0.)
6 | count_maps = defaultdict(lambda :0.)
7 | def run_time(name):
8 | def middle(fn):
9 | def wrapper(*args, **kwargs):
10 | torch.cuda.synchronize()
11 | start = time.time()
12 | res = fn(*args, **kwargs)
13 | torch.cuda.synchronize()
14 | time_maps['%s : %s'%(name, fn.__name__) ] += time.time()-start
15 | count_maps['%s : %s'%(name, fn.__name__) ] +=1
16 | print("%s : %s takes up %f "% (name, fn.__name__,time_maps['%s : %s'%(name, fn.__name__) ] /count_maps['%s : %s'%(name, fn.__name__) ] ))
17 | return res
18 | return wrapper
19 | return middle
20 |
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/utils/grid_mask.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | from PIL import Image
5 | from mmcv.runner import force_fp32, auto_fp16
6 |
7 | class Grid(object):
8 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
9 | self.use_h = use_h
10 | self.use_w = use_w
11 | self.rotate = rotate
12 | self.offset = offset
13 | self.ratio = ratio
14 | self.mode=mode
15 | self.st_prob = prob
16 | self.prob = prob
17 |
18 | def set_prob(self, epoch, max_epoch):
19 | self.prob = self.st_prob * epoch / max_epoch
20 |
21 | def __call__(self, img, label):
22 | if np.random.rand() > self.prob:
23 | return img, label
24 | h = img.size(1)
25 | w = img.size(2)
26 | self.d1 = 2
27 | self.d2 = min(h, w)
28 | hh = int(1.5*h)
29 | ww = int(1.5*w)
30 | d = np.random.randint(self.d1, self.d2)
31 | if self.ratio == 1:
32 | self.l = np.random.randint(1, d)
33 | else:
34 | self.l = min(max(int(d*self.ratio+0.5),1),d-1)
35 | mask = np.ones((hh, ww), np.float32)
36 | st_h = np.random.randint(d)
37 | st_w = np.random.randint(d)
38 | if self.use_h:
39 | for i in range(hh//d):
40 | s = d*i + st_h
41 | t = min(s+self.l, hh)
42 | mask[s:t,:] *= 0
43 | if self.use_w:
44 | for i in range(ww//d):
45 | s = d*i + st_w
46 | t = min(s+self.l, ww)
47 | mask[:,s:t] *= 0
48 |
49 | r = np.random.randint(self.rotate)
50 | mask = Image.fromarray(np.uint8(mask))
51 | mask = mask.rotate(r)
52 | mask = np.asarray(mask)
53 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
54 |
55 | mask = torch.from_numpy(mask).float()
56 | if self.mode == 1:
57 | mask = 1-mask
58 |
59 | mask = mask.expand_as(img)
60 | if self.offset:
61 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).float()
62 | offset = (1 - mask) * offset
63 | img = img * mask + offset
64 | else:
65 | img = img * mask
66 |
67 | return img, label
68 |
69 |
70 | class GridMask(nn.Module):
71 | def __init__(self, use_h, use_w, rotate = 1, offset=False, ratio = 0.5, mode=0, prob = 1.):
72 | super(GridMask, self).__init__()
73 | self.use_h = use_h
74 | self.use_w = use_w
75 | self.rotate = rotate
76 | self.offset = offset
77 | self.ratio = ratio
78 | self.mode = mode
79 | self.st_prob = prob
80 | self.prob = prob
81 | self.fp16_enable = False
82 | def set_prob(self, epoch, max_epoch):
83 | self.prob = self.st_prob * epoch / max_epoch #+ 1.#0.5
84 | @auto_fp16()
85 | def forward(self, x):
86 | if np.random.rand() > self.prob or not self.training:
87 | return x
88 | n,c,h,w = x.size()
89 | x = x.view(-1,h,w)
90 | hh = int(1.5*h)
91 | ww = int(1.5*w)
92 | d = np.random.randint(2, h)
93 | self.l = min(max(int(d*self.ratio+0.5),1),d-1)
94 | mask = np.ones((hh, ww), np.float32)
95 | st_h = np.random.randint(d)
96 | st_w = np.random.randint(d)
97 | if self.use_h:
98 | for i in range(hh//d):
99 | s = d*i + st_h
100 | t = min(s+self.l, hh)
101 | mask[s:t,:] *= 0
102 | if self.use_w:
103 | for i in range(ww//d):
104 | s = d*i + st_w
105 | t = min(s+self.l, ww)
106 | mask[:,s:t] *= 0
107 |
108 | r = np.random.randint(self.rotate)
109 | mask = Image.fromarray(np.uint8(mask))
110 | mask = mask.rotate(r)
111 | mask = np.asarray(mask)
112 | mask = mask[(hh-h)//2:(hh-h)//2+h, (ww-w)//2:(ww-w)//2+w]
113 |
114 | mask = torch.from_numpy(mask).to(x.dtype).cuda()
115 | if self.mode == 1:
116 | mask = 1-mask
117 | mask = mask.expand_as(x)
118 | if self.offset:
119 | offset = torch.from_numpy(2 * (np.random.rand(h,w) - 0.5)).to(x.dtype).cuda()
120 | x = x * mask + offset * (1 - mask)
121 | else:
122 | x = x * mask
123 |
124 | return x.view(n,c,h,w)
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/utils/position_embedding.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import math
4 |
5 | class RelPositionEmbedding(nn.Module):
6 | def __init__(self, num_pos_feats=64, pos_norm=True):
7 | super().__init__()
8 | self.num_pos_feats = num_pos_feats
9 | self.fc = nn.Linear(4, self.num_pos_feats,bias=False)
10 | #nn.init.orthogonal_(self.fc.weight)
11 | #self.fc.weight.requires_grad = False
12 | self.pos_norm = pos_norm
13 | if self.pos_norm:
14 | self.norm = nn.LayerNorm(self.num_pos_feats)
15 | def forward(self, tensor):
16 | #mask = nesttensor.mask
17 | B,C,H,W = tensor.shape
18 | #print('tensor.shape', tensor.shape)
19 | y_range = (torch.arange(H) / float(H - 1)).to(tensor.device)
20 | #y_axis = torch.stack((y_range, 1-y_range),dim=1)
21 | y_axis = torch.stack((torch.cos(y_range * math.pi), torch.sin(y_range * math.pi)), dim=1)
22 | y_axis = y_axis.reshape(H, 1, 2).repeat(1, W, 1).reshape(H * W, 2)
23 |
24 | x_range = (torch.arange(W) / float(W - 1)).to(tensor.device)
25 | #x_axis =torch.stack((x_range,1-x_range),dim=1)
26 | x_axis = torch.stack((torch.cos(x_range * math.pi), torch.sin(x_range * math.pi)), dim=1)
27 | x_axis = x_axis.reshape(1, W, 2).repeat(H, 1, 1).reshape(H * W, 2)
28 | x_pos = torch.cat((y_axis, x_axis), dim=1)
29 | x_pos = self.fc(x_pos)
30 |
31 | if self.pos_norm:
32 | x_pos = self.norm(x_pos)
33 | #print('xpos,', x_pos.max(),x_pos.min())
34 | return x_pos
--------------------------------------------------------------------------------
/mp3dbev/projects/mmdet3d_plugin/models/utils/visual.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchvision.utils import make_grid
3 | import torchvision
4 | import matplotlib.pyplot as plt
5 | import cv2
6 |
7 |
8 | def convert_color(img_path):
9 | plt.figure()
10 | img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
11 | plt.imsave(img_path, img, cmap=plt.get_cmap('viridis'))
12 | plt.close()
13 |
14 |
15 | def save_tensor(tensor, path, pad_value=254.0,):
16 | print('save_tensor', path)
17 | tensor = tensor.to(torch.float).detach().cpu()
18 | if tensor.type() == 'torch.BoolTensor':
19 | tensor = tensor*255
20 | if len(tensor.shape) == 3:
21 | tensor = tensor.unsqueeze(1)
22 | tensor = make_grid(tensor, pad_value=pad_value, normalize=False).permute(1, 2, 0).numpy().copy()
23 | torchvision.utils.save_image(torch.tensor(tensor).permute(2, 0, 1), path)
24 | convert_color(path)
25 |
--------------------------------------------------------------------------------
/mp3dbev/tools/analysis_tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/tools/analysis_tools/__init__.py
--------------------------------------------------------------------------------
/mp3dbev/tools/analysis_tools/benchmark.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import time
4 | import torch
5 | from mmcv import Config
6 | from mmcv.parallel import MMDataParallel
7 | from mmcv.runner import load_checkpoint, wrap_fp16_model
8 | import sys
9 | sys.path.append('.')
10 | from projects.mmdet3d_plugin.datasets.builder import build_dataloader
11 | from projects.mmdet3d_plugin.datasets import custom_build_dataset
12 | # from mmdet3d.datasets import build_dataloader, build_dataset
13 | from mmdet3d.models import build_detector
14 | #from tools.misc.fuse_conv_bn import fuse_module
15 |
16 |
17 | def parse_args():
18 | parser = argparse.ArgumentParser(description='MMDet benchmark a model')
19 | parser.add_argument('config', help='test config file path')
20 | parser.add_argument('--checkpoint', default=None, help='checkpoint file')
21 | parser.add_argument('--samples', default=2000, help='samples to benchmark')
22 | parser.add_argument(
23 | '--log-interval', default=50, help='interval of logging')
24 | parser.add_argument(
25 | '--fuse-conv-bn',
26 | action='store_true',
27 | help='Whether to fuse conv and bn, this will slightly increase'
28 | 'the inference speed')
29 | args = parser.parse_args()
30 | return args
31 |
32 |
33 | def main():
34 | args = parse_args()
35 |
36 | cfg = Config.fromfile(args.config)
37 | # set cudnn_benchmark
38 | if cfg.get('cudnn_benchmark', False):
39 | torch.backends.cudnn.benchmark = True
40 | cfg.model.pretrained = None
41 | cfg.data.test.test_mode = True
42 |
43 | # build the dataloader
44 | # TODO: support multiple images per gpu (only minor changes are needed)
45 | print(cfg.data.test)
46 | dataset = custom_build_dataset(cfg.data.test)
47 | data_loader = build_dataloader(
48 | dataset,
49 | samples_per_gpu=1,
50 | workers_per_gpu=cfg.data.workers_per_gpu,
51 | dist=False,
52 | shuffle=False)
53 |
54 | # build the model and load checkpoint
55 | cfg.model.train_cfg = None
56 | model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
57 | fp16_cfg = cfg.get('fp16', None)
58 | if fp16_cfg is not None:
59 | wrap_fp16_model(model)
60 | if args.checkpoint is not None:
61 | load_checkpoint(model, args.checkpoint, map_location='cpu')
62 | #if args.fuse_conv_bn:
63 | # model = fuse_module(model)
64 |
65 | model = MMDataParallel(model, device_ids=[0])
66 |
67 | model.eval()
68 |
69 | # the first several iterations may be very slow so skip them
70 | num_warmup = 5
71 | pure_inf_time = 0
72 |
73 | # benchmark with several samples and take the average
74 | for i, data in enumerate(data_loader):
75 | torch.cuda.synchronize()
76 | start_time = time.perf_counter()
77 | with torch.no_grad():
78 | model(return_loss=False, rescale=True, **data)
79 |
80 | torch.cuda.synchronize()
81 | elapsed = time.perf_counter() - start_time
82 |
83 | if i >= num_warmup:
84 | pure_inf_time += elapsed
85 | if (i + 1) % args.log_interval == 0:
86 | fps = (i + 1 - num_warmup) / pure_inf_time
87 | print(f'Done image [{i + 1:<3}/ {args.samples}], '
88 | f'fps: {fps:.1f} img / s')
89 |
90 | if (i + 1) == args.samples:
91 | pure_inf_time += elapsed
92 | fps = (i + 1 - num_warmup) / pure_inf_time
93 | print(f'Overall fps: {fps:.1f} img / s')
94 | break
95 |
96 |
97 | if __name__ == '__main__':
98 | main()
99 |
--------------------------------------------------------------------------------
/mp3dbev/tools/analysis_tools/get_params.py:
--------------------------------------------------------------------------------
1 | import torch
2 | file_path = './ckpts/bevformer_v4.pth'
3 | model = torch.load(file_path, map_location='cpu')
4 | all = 0
5 | for key in list(model['state_dict'].keys()):
6 | all += model['state_dict'][key].nelement()
7 | print(all)
8 |
9 | # smaller 63374123
10 | # v4 69140395
11 |
--------------------------------------------------------------------------------
/mp3dbev/tools/data_converter/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 |
--------------------------------------------------------------------------------
/mp3dbev/tools/data_converter/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/tools/data_converter/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/tools/data_converter/__pycache__/create_gt_database.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/tools/data_converter/__pycache__/create_gt_database.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/tools/data_converter/__pycache__/indoor_converter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/tools/data_converter/__pycache__/indoor_converter.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/tools/data_converter/__pycache__/kitti_converter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/tools/data_converter/__pycache__/kitti_converter.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/tools/data_converter/__pycache__/kitti_data_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/tools/data_converter/__pycache__/kitti_data_utils.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/tools/data_converter/__pycache__/lyft_converter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/tools/data_converter/__pycache__/lyft_converter.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/tools/data_converter/__pycache__/nuscenes_converter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DefaultRui/BEV-Scene-Graph/b73acc223f9ded311d6bee8b8117fe36c212fa55/mp3dbev/tools/data_converter/__pycache__/nuscenes_converter.cpython-38.pyc
--------------------------------------------------------------------------------
/mp3dbev/tools/data_converter/lyft_data_fixer.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import numpy as np
4 | import os
5 |
6 |
7 | def fix_lyft(root_folder='./data/lyft', version='v1.01'):
8 | # refer to https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110000 # noqa
9 | lidar_path = 'lidar/host-a011_lidar1_1233090652702363606.bin'
10 | root_folder = os.path.join(root_folder, f'{version}-train')
11 | lidar_path = os.path.join(root_folder, lidar_path)
12 | assert os.path.isfile(lidar_path), f'Please download the complete Lyft ' \
13 | f'dataset and make sure {lidar_path} is present.'
14 | points = np.fromfile(lidar_path, dtype=np.float32, count=-1)
15 | try:
16 | points.reshape([-1, 5])
17 | print(f'This fix is not required for version {version}.')
18 | except ValueError:
19 | new_points = np.array(list(points) + [100.0, 1.0], dtype='float32')
20 | new_points.tofile(lidar_path)
21 | print(f'Appended 100.0 and 1.0 to the end of {lidar_path}.')
22 |
23 |
24 | parser = argparse.ArgumentParser(description='Lyft dataset fixer arg parser')
25 | parser.add_argument(
26 | '--root-folder',
27 | type=str,
28 | default='./data/lyft',
29 | help='specify the root path of Lyft dataset')
30 | parser.add_argument(
31 | '--version',
32 | type=str,
33 | default='v1.01',
34 | help='specify Lyft dataset version')
35 | args = parser.parse_args()
36 |
37 | if __name__ == '__main__':
38 | fix_lyft(root_folder=args.root_folder, version=args.version)
39 |
--------------------------------------------------------------------------------
/mp3dbev/tools/dist_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CONFIG=$1
4 | CHECKPOINT=$2
5 | GPUS=$3
6 |
7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
9 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} --eval bbox
10 |
11 |
--------------------------------------------------------------------------------
/mp3dbev/tools/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CONFIG=$1
4 | GPUS=$2
5 |
6 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
7 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
8 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic
9 |
--------------------------------------------------------------------------------
/mp3dbev/tools/fp16/dist_train.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | CONFIG=$1
4 | GPUS=$2
5 | PORT=${PORT:-28508}
6 |
7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} --deterministic
10 |
--------------------------------------------------------------------------------
/mp3dbev/tools/misc/fuse_conv_bn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import torch
4 | from mmcv.runner import save_checkpoint
5 | from torch import nn as nn
6 |
7 | from mmdet.apis import init_model
8 |
9 |
10 | def fuse_conv_bn(conv, bn):
11 | """During inference, the functionary of batch norm layers is turned off but
12 | only the mean and var alone channels are used, which exposes the chance to
13 | fuse it with the preceding conv layers to save computations and simplify
14 | network structures."""
15 | conv_w = conv.weight
16 | conv_b = conv.bias if conv.bias is not None else torch.zeros_like(
17 | bn.running_mean)
18 |
19 | factor = bn.weight / torch.sqrt(bn.running_var + bn.eps)
20 | conv.weight = nn.Parameter(conv_w *
21 | factor.reshape([conv.out_channels, 1, 1, 1]))
22 | conv.bias = nn.Parameter((conv_b - bn.running_mean) * factor + bn.bias)
23 | return conv
24 |
25 |
26 | def fuse_module(m):
27 | last_conv = None
28 | last_conv_name = None
29 |
30 | for name, child in m.named_children():
31 | if isinstance(child, (nn.BatchNorm2d, nn.SyncBatchNorm)):
32 | if last_conv is None: # only fuse BN that is after Conv
33 | continue
34 | fused_conv = fuse_conv_bn(last_conv, child)
35 | m._modules[last_conv_name] = fused_conv
36 | # To reduce changes, set BN as Identity instead of deleting it.
37 | m._modules[name] = nn.Identity()
38 | last_conv = None
39 | elif isinstance(child, nn.Conv2d):
40 | last_conv = child
41 | last_conv_name = name
42 | else:
43 | fuse_module(child)
44 | return m
45 |
46 |
47 | def parse_args():
48 | parser = argparse.ArgumentParser(
49 | description='fuse Conv and BN layers in a model')
50 | parser.add_argument('config', help='config file path')
51 | parser.add_argument('checkpoint', help='checkpoint file path')
52 | parser.add_argument('out', help='output path of the converted model')
53 | args = parser.parse_args()
54 | return args
55 |
56 |
57 | def main():
58 | args = parse_args()
59 | # build the model from a config file and a checkpoint file
60 | model = init_model(args.config, args.checkpoint)
61 | # fuse conv and bn layers of the model
62 | fused_model = fuse_module(model)
63 | save_checkpoint(fused_model, args.out)
64 |
65 |
66 | if __name__ == '__main__':
67 | main()
68 |
--------------------------------------------------------------------------------
/mp3dbev/tools/misc/print_config.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | from mmcv import Config, DictAction
4 |
5 |
6 | def parse_args():
7 | parser = argparse.ArgumentParser(description='Print the whole config')
8 | parser.add_argument('config', help='config file path')
9 | parser.add_argument(
10 | '--options', nargs='+', action=DictAction, help='arguments in dict')
11 | args = parser.parse_args()
12 |
13 | return args
14 |
15 |
16 | def main():
17 | args = parse_args()
18 |
19 | cfg = Config.fromfile(args.config)
20 | if args.options is not None:
21 | cfg.merge_from_dict(args.options)
22 | print(f'Config:\n{cfg.pretty_text}')
23 |
24 |
25 | if __name__ == '__main__':
26 | main()
27 |
--------------------------------------------------------------------------------
/mp3dbev/tools/misc/visualize_results.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import mmcv
4 | from mmcv import Config
5 |
6 | from mmdet3d.datasets import build_dataset
7 |
8 |
9 | def parse_args():
10 | parser = argparse.ArgumentParser(
11 | description='MMDet3D visualize the results')
12 | parser.add_argument('config', help='test config file path')
13 | parser.add_argument('--result', help='results file in pickle format')
14 | parser.add_argument(
15 | '--show-dir', help='directory where visualize results will be saved')
16 | args = parser.parse_args()
17 |
18 | return args
19 |
20 |
21 | def main():
22 | args = parse_args()
23 |
24 | if args.result is not None and \
25 | not args.result.endswith(('.pkl', '.pickle')):
26 | raise ValueError('The results file must be a pkl file.')
27 |
28 | cfg = Config.fromfile(args.config)
29 | cfg.data.test.test_mode = True
30 |
31 | # build the dataset
32 | dataset = build_dataset(cfg.data.test)
33 | results = mmcv.load(args.result)
34 |
35 | if getattr(dataset, 'show', None) is not None:
36 | # data loading pipeline for showing
37 | eval_pipeline = cfg.get('eval_pipeline', {})
38 | if eval_pipeline:
39 | dataset.show(results, args.show_dir, pipeline=eval_pipeline)
40 | else:
41 | dataset.show(results, args.show_dir) # use default pipeline
42 | else:
43 | raise NotImplementedError(
44 | 'Show is not implemented for dataset {}!'.format(
45 | type(dataset).__name__))
46 |
47 |
48 | if __name__ == '__main__':
49 | main()
50 |
--------------------------------------------------------------------------------
/mp3dbev/tools/model_converters/publish_model.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import subprocess
4 | import torch
5 |
6 |
7 | def parse_args():
8 | parser = argparse.ArgumentParser(
9 | description='Process a checkpoint to be published')
10 | parser.add_argument('in_file', help='input checkpoint filename')
11 | parser.add_argument('out_file', help='output checkpoint filename')
12 | args = parser.parse_args()
13 | return args
14 |
15 |
16 | def process_checkpoint(in_file, out_file):
17 | checkpoint = torch.load(in_file, map_location='cpu')
18 | # remove optimizer for smaller file size
19 | if 'optimizer' in checkpoint:
20 | del checkpoint['optimizer']
21 | # if it is necessary to remove some sensitive data in checkpoint['meta'],
22 | # add the code here.
23 | torch.save(checkpoint, out_file)
24 | sha = subprocess.check_output(['sha256sum', out_file]).decode()
25 | final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
26 | subprocess.Popen(['mv', out_file, final_file])
27 |
28 |
29 | def main():
30 | args = parse_args()
31 | process_checkpoint(args.in_file, args.out_file)
32 |
33 |
34 | if __name__ == '__main__':
35 | main()
36 |
--------------------------------------------------------------------------------
/mp3dbev/tools/model_converters/regnet2mmdet.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) OpenMMLab. All rights reserved.
2 | import argparse
3 | import torch
4 | from collections import OrderedDict
5 |
6 |
7 | def convert_stem(model_key, model_weight, state_dict, converted_names):
8 | new_key = model_key.replace('stem.conv', 'conv1')
9 | new_key = new_key.replace('stem.bn', 'bn1')
10 | state_dict[new_key] = model_weight
11 | converted_names.add(model_key)
12 | print(f'Convert {model_key} to {new_key}')
13 |
14 |
15 | def convert_head(model_key, model_weight, state_dict, converted_names):
16 | new_key = model_key.replace('head.fc', 'fc')
17 | state_dict[new_key] = model_weight
18 | converted_names.add(model_key)
19 | print(f'Convert {model_key} to {new_key}')
20 |
21 |
22 | def convert_reslayer(model_key, model_weight, state_dict, converted_names):
23 | split_keys = model_key.split('.')
24 | layer, block, module = split_keys[:3]
25 | block_id = int(block[1:])
26 | layer_name = f'layer{int(layer[1:])}'
27 | block_name = f'{block_id - 1}'
28 |
29 | if block_id == 1 and module == 'bn':
30 | new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}'
31 | elif block_id == 1 and module == 'proj':
32 | new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}'
33 | elif module == 'f':
34 | if split_keys[3] == 'a_bn':
35 | module_name = 'bn1'
36 | elif split_keys[3] == 'b_bn':
37 | module_name = 'bn2'
38 | elif split_keys[3] == 'c_bn':
39 | module_name = 'bn3'
40 | elif split_keys[3] == 'a':
41 | module_name = 'conv1'
42 | elif split_keys[3] == 'b':
43 | module_name = 'conv2'
44 | elif split_keys[3] == 'c':
45 | module_name = 'conv3'
46 | new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}'
47 | else:
48 | raise ValueError(f'Unsupported conversion of key {model_key}')
49 | print(f'Convert {model_key} to {new_key}')
50 | state_dict[new_key] = model_weight
51 | converted_names.add(model_key)
52 |
53 |
54 | def convert(src, dst):
55 | """Convert keys in pycls pretrained RegNet models to mmdet style."""
56 | # load caffe model
57 | regnet_model = torch.load(src)
58 | blobs = regnet_model['model_state']
59 | # convert to pytorch style
60 | state_dict = OrderedDict()
61 | converted_names = set()
62 | for key, weight in blobs.items():
63 | if 'stem' in key:
64 | convert_stem(key, weight, state_dict, converted_names)
65 | elif 'head' in key:
66 | convert_head(key, weight, state_dict, converted_names)
67 | elif key.startswith('s'):
68 | convert_reslayer(key, weight, state_dict, converted_names)
69 |
70 | # check if all layers are converted
71 | for key in blobs:
72 | if key not in converted_names:
73 | print(f'not converted: {key}')
74 | # save checkpoint
75 | checkpoint = dict()
76 | checkpoint['state_dict'] = state_dict
77 | torch.save(checkpoint, dst)
78 |
79 |
80 | def main():
81 | parser = argparse.ArgumentParser(description='Convert model keys')
82 | parser.add_argument('src', help='src detectron model path')
83 | parser.add_argument('dst', help='save path')
84 | args = parser.parse_args()
85 | convert(args.src, args.dst)
86 |
87 |
88 | if __name__ == '__main__':
89 | main()
90 |
--------------------------------------------------------------------------------