├── .gitignore
├── LICENSE
├── README.md
├── config
├── config_eval_json.py
├── config_vis.py
└── nusc
│ ├── baseline
│ ├── baseline_120m.py
│ ├── baseline_120m_cam.py
│ ├── baseline_240m.py
│ ├── baseline_240m_cam.py
│ ├── baseline_60m.py
│ └── baseline_60m_cam.py
│ ├── hd_prior
│ ├── hd_120m.py
│ ├── hd_120m_cam.py
│ ├── hd_240m.py
│ ├── hd_240m_cam.py
│ ├── hd_60m.py
│ └── hd_60m_cam.py
│ ├── hd_prior_pretrain
│ ├── hd_pretrain_120m.py
│ ├── hd_pretrain_240m.py
│ └── hd_pretrain_60m.py
│ └── sd_prior
│ ├── sd_120m.py
│ ├── sd_120m_cam.py
│ ├── sd_240m.py
│ ├── sd_240m_cam.py
│ ├── sd_60m.py
│ └── sd_60m_cam.py
├── data_osm
├── __init__.py
├── av2_dataset.py
├── av2map_extractor.py
├── const.py
├── dataset.py
├── geo_opensfm.py
├── image.py
├── lidar.py
├── osm
│ ├── boston-seaport.cpg
│ ├── boston-seaport.dbf
│ ├── boston-seaport.prj
│ ├── boston-seaport.shp
│ ├── boston-seaport.shx
│ ├── sd_map_data_ATX.pkl
│ ├── sd_map_data_DTW.pkl
│ ├── sd_map_data_MIA.pkl
│ ├── sd_map_data_PAO.pkl
│ ├── sd_map_data_PIT.pkl
│ ├── sd_map_data_WDC.pkl
│ ├── singapore-hollandvillage.cpg
│ ├── singapore-hollandvillage.dbf
│ ├── singapore-hollandvillage.prj
│ ├── singapore-hollandvillage.shp
│ ├── singapore-hollandvillage.shx
│ ├── singapore-onenorth.cpg
│ ├── singapore-onenorth.dbf
│ ├── singapore-onenorth.prj
│ ├── singapore-onenorth.shp
│ ├── singapore-onenorth.shx
│ ├── singapore-queenstown.cpg
│ ├── singapore-queenstown.dbf
│ ├── singapore-queenstown.prj
│ ├── singapore-queenstown.shp
│ └── singapore-queenstown.shx
├── pipelines
│ ├── __init__.py
│ ├── formating.py
│ ├── loading.py
│ ├── transform.py
│ └── vectorize.py
├── rasterize.py
├── utils.py
└── vector_map.py
├── docs
├── getting_started.md
├── installation.md
└── visualization.md
├── environment.yml
├── figs
└── teaser.jpg
├── icon
├── car.png
└── car_gray.png
├── model
├── __init__.py
├── hdmapnet.py
├── lift_splat.py
├── pmapnet_hd.py
├── pmapnet_sd.py
└── utils
│ ├── VPN.py
│ ├── __init__.py
│ ├── base.py
│ ├── homography.py
│ ├── map_mae_head.py
│ ├── misc.py
│ ├── pointpillar.py
│ ├── position_encoding.py
│ ├── sdmap_cross_attn.py
│ ├── utils.py
│ └── voxel.py
├── requirements.txt
├── tools
├── config.py
├── eval.py
├── evaluate_json.py
├── evaluation
│ ├── AP.py
│ ├── __init__.py
│ ├── angle_diff.py
│ ├── chamfer_distance.py
│ ├── dataset.py
│ ├── iou.py
│ └── modules
│ │ ├── lpips.py
│ │ ├── networks.py
│ │ └── utils.py
├── export_json.py
├── loss.py
├── postprocess
│ ├── __init__.py
│ ├── cluster.py
│ ├── connect.py
│ └── vectorize.py
├── vis_map.py
├── vis_video_av2.py
└── vis_video_nus.py
├── train.py
└── train_HDPrior_pretrain.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | *.ipynb
6 |
7 | # C extensions
8 | *.so
9 |
10 | # Distribution / packaging
11 | .Python
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *.cover
48 | .hypothesis/
49 | .pytest_cache/
50 |
51 | # Translations
52 | *.mo
53 | *.pot
54 |
55 | # Django stuff:
56 | *.log
57 | local_settings.py
58 | db.sqlite3
59 |
60 | # Flask stuff:
61 | instance/
62 | .webassets-cache
63 |
64 | # Scrapy stuff:
65 | .scrapy
66 |
67 | # Sphinx documentation
68 | docs/_build/
69 |
70 | # PyBuilder
71 | target/
72 |
73 | # Jupyter Notebook
74 | .ipynb_checkpoints
75 |
76 | # pyenv
77 | .python-version
78 |
79 | # celery beat schedule file
80 | celerybeat-schedule
81 |
82 | # SageMath parsed files
83 | *.sage.py
84 |
85 | # Environments
86 | .env
87 | .venv
88 | env/
89 | venv/
90 | ENV/
91 | env.bak/
92 | venv.bak/
93 |
94 | # Spyder project settings
95 | .spyderproject
96 | .spyproject
97 |
98 | # Rope project settings
99 | .ropeproject
100 |
101 | # mkdocs documentation
102 | /site
103 |
104 | # mypy
105 | .mypy_cache/
106 |
107 | # cython generated cpp
108 | data
109 | .vscode
110 | .idea
111 |
112 | # custom
113 | # *.pkl
114 | *.gif
115 | *.pkl.json
116 | *.log.json
117 | work_dirs/
118 | debug_img/
119 | model_file/
120 | exps/
121 | *~
122 | mmdet3d/.mim
123 | mmdetection3d
124 | # Pytorch
125 | *.pth
126 |
127 | # demo
128 | demo/
129 | *.obj
130 | *.ply
131 | *.zip
132 | *.tar
133 | *.tar.gz
134 | *.json
135 |
136 | # datasets
137 | /datasets
138 | /data_ann
139 |
140 | # softlinks
141 | av2
142 | nuScenes
143 | dataset
144 |
145 | Work_dir
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
P-MapNet: Far-seeing Map Generator Enhanced by both SDMap and HDMap Priors
3 |
4 | [[RAL](https://ieeexplore.ieee.org/document/10643284)] [[Paper](https://arxiv.org/pdf/2403.10521.pdf)] [[Project Page](https://jike5.github.io/P-MapNet/)]
5 |
6 |
7 |
8 | 
9 | **Abstract:**
10 | Autonomous vehicles are gradually entering city roads today, with the help of high-definition maps (HDMaps). However, the reliance on HDMaps prevents autonomous vehicles from stepping into regions without this expensive digital infrastructure. This fact drives many researchers to study online HDMap construction algorithms, but the performance of these algorithms at far regions is still unsatisfying. We present P-MapNet, in which the letter P highlights the fact that we focus on incorporating map priors to improve model performance. Specifically, we exploit priors in both SDMap and HDMap. On one hand, we extract weakly aligned SDMap from OpenStreetMap, and encode it as an additional conditioning branch. Despite the misalignment challenge, our attention-based architecture adaptively attends to relevant SDMap skeletons and significantly improves performance. On the other hand, we exploit a masked autoencoder to capture the prior distribution of HDMap, which can serve as a refinement module to mitigate occlusions and artifacts. We benchmark on the nuScenes and Argoverse2 datasets.
11 | Through comprehensive experiments, we show that: (1) our SDMap prior can improve online map construction performance, using both rasterized (by up to +18.73 mIoU) and vectorized (by up to +8.50 mAP) output representations. (2) our HDMap prior can improve map perceptual metrics by up to 6.34%. (3)
12 | P-MapNet can be switched into different inference modes that covers different regions of the accuracy-efficiency trade-off landscape. (4) P-MapNet is a far-seeing solution that brings larger improvements on longer ranges.
13 |
14 | ## Model
15 |
16 | ### Results on nuScenes-val set
17 | We provide results on nuScenes-val set.
18 |
19 | | Range | Method | M | Div. | Ped. | Bound. | mIoU | Model | Config |
20 | |:-----------:|:--------:|:---:|:---:|:---:|:-----:|:--------:|:--------:|:--------:|
21 | | 60 × 30 | HDMapNet | L+C | 45.9 | 30.5 | 56.8 | 44.40 | [ckpt](https://drive.google.com/file/d/1yYCRk_as7Vhvi_rL5BxqVrmEf_u7mB3b/view?usp=drive_link) | [cfg](config/nusc/baseline/baseline_60m.py) |
22 | | 60 × 30 | P-MapNet(SD+HD Prio.) | L+C | **54.2** | **41.3** | **63.7** | **53.07** | [ckpt](https://drive.google.com/file/d/1hr9QNRDOWmiqZcW2L5WY_o_0aIZFIo0W/view?usp=drive_link) | [cfg](config/nusc/hd_prior/hd_60m.py) |
23 | | 120 × 60 | HDMapNet | L+C | 53.6 | 37.8 | 57.1 | 49.50 | [ckpt](https://drive.google.com/file/d/1L_3whc53FmEdGh8Fn1EVS7xquX0_xHZJ/view?usp=drive_link) | [cfg](config/nusc/baseline/baseline_120m.py) |
24 | | 120 × 60 | P-MapNet(SD+HD Prio.) | L+C | **65.3** | **52.0** | **68.0** | **61.77** | [ckpt](https://drive.google.com/file/d/1MG10vfqFDnf4sYiDqdO2274LlQB670ne/view?usp=drive_link) | [cfg](config/nusc/hd_prior/hd_120m.py) |
25 | | 240 × 60 | HDMapNet | L+C | 40.0 | 26.8 | 42.6 | 36.47 | [ckpt](https://drive.google.com/file/d/1oKjYPXVxu0MwDzrOJ97r-0b2GBnKxK12/view?usp=drive_link) | [cfg](config/nusc/baseline/baseline_240m.py) |
26 | | 240 × 60 | P-MapNet(SD+HD Prio.) | L+C | **53.0** | **42.6** | **54.2** | **49.93** | [ckpt](https://drive.google.com/file/d/1lcA9U9oWKYM9X20gblBaG16I2DBLt2yU/view?usp=drive_link) | [cfg](config/nusc/hd_prior/hd_240m.py) |
27 |
28 | > The model weights under **other settings** can be downloaded at [GoogleDrive](https://drive.google.com/drive/folders/1P6LuhsHy3yy4sGwlDCGT9tjVzYpcaqEb?usp=drive_link) or [百度云](https://pan.baidu.com/s/1OVI3aWgOGGg6_iGCs_gxDg?pwd=65aa).
29 |
30 | ## Getting Started
31 | - [Installation](docs/installation.md)
32 | - [Train and Eval](docs/getting_started.md)
33 | - [visualization](docs/visualization.md)
34 |
35 |
36 |
37 | ### TODO
38 | - [ ] Add Argoverse2 dataset model
39 |
40 | ### Citation
41 | If you found this paper or codebase useful, please cite our paper:
42 | ```
43 | @ARTICLE{10643284,
44 | author={Jiang, Zhou and Zhu, Zhenxin and Li, Pengfei and Gao, Huan-ang and Yuan, Tianyuan and Shi, Yongliang and Zhao, Hang and Zhao, Hao},
45 | journal={IEEE Robotics and Automation Letters},
46 | title={P-MapNet: Far-Seeing Map Generator Enhanced by Both SDMap and HDMap Priors},
47 | year={2024},
48 | volume={9},
49 | number={10},
50 | pages={8539-8546},
51 | keywords={Feature extraction;Skeleton;Laser radar;Generators;Encoding;Point cloud compression;Autonomous vehicles;Computer vision for transportation;semantic scene understanding;intelligent transportation systems},
52 | doi={10.1109/LRA.2024.3447450}}
53 |
54 | ```
55 |
--------------------------------------------------------------------------------
/config/config_eval_json.py:
--------------------------------------------------------------------------------
1 | result_path = './120_sd.json'
2 | dataroot = './dataset/nuScenes'
3 | version= 'v1.0-trainval' #'v1.0-mini'
4 |
5 | CD_threshold = 5
6 | threshold_iou = 0.1
7 | xbound = [-60.0, 60.0, 0.3]
8 | ybound = [-30.0, 30.0, 0.3]
9 | batch_size = 4
10 | eval_set = 'val' #'train', 'val', 'test', 'mini_train', 'mini_val'
11 | thickness = 5
12 | max_channel = 3
13 | bidirectional = False
14 |
--------------------------------------------------------------------------------
/config/config_vis.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval' #'v1.0-mini'
5 |
6 | xbound = [-60.0, 60.0, 0.3] #120m*60m, bev_size:400*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # Path
14 | vis_path = './vis_map'
15 | sd_map_path='./data_osm/osm'
16 |
17 | # CHECK_POINTS
18 | modelf = 'ckpt/fusion_120_sd_model23.pt'
19 |
20 | # Model
21 | model = 'pmapnet_sd'
22 |
23 | # Morphological_process mode in the vectorized post-process
24 | morpho_mode='MORPH_CLOSE' # 'MORPH_OPEN', 'None'
25 |
26 | batch_size = 1
27 | nworkers = 20
28 | gpus = [0]
29 |
30 |
31 | direction_pred = True
32 | instance_seg = True
33 | embedding_dim = 16
34 | delta_v = 0.5
35 | delta_d = 3.0
36 | angle_class = 36
37 |
38 | # Mask config
39 | mask_flag = False
40 | mask_ratio = -1 # random ratio
41 | patch_h = 20
42 | patch_w = 20
43 |
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/config/nusc/baseline/baseline_120m.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-60.0, 60.0, 0.3] #120m*60m, bev_size:400*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/baseline_120'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'HDMapNet_fusion'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/baseline/baseline_120m_cam.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-60.0, 60.0, 0.3] #120m*60m, bev_size:400*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/baseline_cam_120'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'HDMapNet_cam'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/baseline/baseline_240m.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-120.0, 120.0, 0.3] #240m*60m, bev_size:800*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/baseline_240'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'HDMapNet_fusion'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/baseline/baseline_240m_cam.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-120.0, 120.0, 0.3] #240m*60m, bev_size:800*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/baseline_cam_240'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'HDMapNet_cam'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/baseline/baseline_60m.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-30, 30.0, 0.15] # 60m*30m, bev_size:400*200
7 | ybound = [-15.0, 15.0, 0.15]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/baseline_60'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'HDMapNet_fusion'
18 | nepochs = 30
19 | batch_size = 16
20 | nworkers = 10
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/baseline/baseline_60m_cam.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-30, 30.0, 0.15] # 60m*30m, bev_size:400*200
7 | ybound = [-15.0, 15.0, 0.15]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/baseline_cam_60'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'HDMapNet_cam'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # NETWORK
34 | use_aux = True
35 | griding_num = 200
36 | backbone = '18'
37 |
38 | # LOSS
39 | scale_seg = 1.0
40 | scale_var = 0.1
41 | scale_dist = 0.1
42 | scale_direction = 0.1
43 |
44 | direction_pred = True
45 | instance_seg = True
46 | embedding_dim = 16
47 | delta_v = 0.5
48 | delta_d = 3.0
49 | angle_class = 36
50 |
51 | # Mask config
52 | mask_flag = False
53 | mask_ratio = -1 # random ratio
54 | patch_h = 20
55 | patch_w = 20
56 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/config/nusc/hd_prior/hd_120m.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-60.0, 60.0, 0.3] #120m*60m, bev_size:400*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/hd_120'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'pmapnet_hd'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/hd_prior/hd_120m_cam.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-60.0, 60.0, 0.3] #120m*60m, bev_size:400*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/hd_cam_120'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'pmapnet_hd_cam'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/hd_prior/hd_240m.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-120.0, 120.0, 0.3] #240m*60m, bev_size:800*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/hd_240'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'pmapnet_hd'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/hd_prior/hd_240m_cam.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-120.0, 120.0, 0.3] #240m*60m, bev_size:800*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/hd_cam_240'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'pmapnet_hd_cam'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/hd_prior/hd_60m.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-30.0, 30.0, 0.15] # 60m*30m, bev_size:400*200
7 | ybound = [-15.0, 15.0, 0.15]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/hd_60'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'pmapnet_hd'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/hd_prior/hd_60m_cam.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-30.0, 30.0, 0.15] #60m*30m, bev_size:400*200
7 | ybound = [-15.0, 15.0, 0.15]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/hd_cam_60'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'pmapnet_hd_cam'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # NETWORK
34 | use_aux = True
35 | griding_num = 200
36 | backbone = '18'
37 |
38 | # LOSS
39 | scale_seg = 1.0
40 | scale_var = 0.1
41 | scale_dist = 0.1
42 | scale_direction = 0.1
43 |
44 | direction_pred = True
45 | instance_seg = True
46 | embedding_dim = 16
47 | delta_v = 0.5
48 | delta_d = 3.0
49 | angle_class = 36
50 |
51 | # Mask config
52 | mask_flag = False
53 | mask_ratio = -1 # random ratio
54 | patch_h = 20
55 | patch_w = 20
56 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/config/nusc/hd_prior_pretrain/hd_pretrain_120m.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-60.0, 60.0, 0.3] #120m*60m, bev_size:400*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/pretrain_120'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'hdmapnet_pretrain'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = True
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/hd_prior_pretrain/hd_pretrain_240m.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-120.0, 120.0, 0.3] #240m*60m, bev_size:800*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/pretrain_240'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'hdmapnet_pretrain'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = True
48 | mask_ratio = 0.5 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/hd_prior_pretrain/hd_pretrain_60m.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-30, 30.0, 0.15] # 60m*30m, bev_size:400*200
7 | ybound = [-15.0, 15.0, 0.15]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/pretrain_60'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'hdmapnet_pretrain'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 10
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = ".Work_dir/pretrain_60/model0.pt"
32 | vit_base = 'ckpt/mae_finetuned_vit_base.pth' # download link: https://dl.fbaipublicfiles.com/mae/finetune/mae_finetuned_vit_base.pth
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = True
48 | mask_ratio = 0.5 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/sd_prior/sd_120m.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval' #'v1.0-mini'
5 |
6 | xbound = [-60.0, 60.0, 0.3] #120m*60m, bev_size:400*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/sd_120'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'pmapnet_sd'
18 | nepochs = 30
19 | batch_size = 4
20 | nworkers = 20
21 | gpus = [0]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/sd_prior/sd_120m_cam.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-60.0, 60.0, 0.3] #120m*60m, bev_size:400*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/sd_cam_120'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'pmapnet_sd_cam'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/sd_prior/sd_240m.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-120.0, 120.0, 0.3] #240m*60m, bev_size:800*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/sd_240'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'pmapnet_sd'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/sd_prior/sd_240m_cam.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-120.0, 120.0, 0.3] #240m*60m, bev_size:800*200
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/sd_cam_240'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'pmapnet_sd_cam'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = None
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 |
53 |
54 |
55 |
--------------------------------------------------------------------------------
/config/nusc/sd_prior/sd_60m.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-30, 30.0, 0.15] # 60m*30m, bev_size:400*200
7 | ybound = [-15.0, 15.0, 0.15]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/sd_60'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'pmapnet_sd'
18 | nepochs = 30
19 | batch_size = 2
20 | nworkers = 20
21 | gpus = [0]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = "./Work_dir/sd_60m/model11--.pt"
32 |
33 | # LOSS
34 | scale_seg = 1.0
35 | scale_var = 0.1
36 | scale_dist = 0.1
37 | scale_direction = 0.1
38 |
39 | direction_pred = True
40 | instance_seg = True
41 | embedding_dim = 16
42 | delta_v = 0.5
43 | delta_d = 3.0
44 | angle_class = 36
45 |
46 | # Mask config
47 | mask_flag = False
48 | mask_ratio = -1 # random ratio
49 | patch_h = 20
50 | patch_w = 20
51 |
52 | # JSON
53 | result_path = './Work_dir/sd_60m/submission.json'
54 | max_channel = 3
55 | bidirectional = False
56 | CD_threshold = 5
57 | threshold_iou = 0.1
--------------------------------------------------------------------------------
/config/nusc/sd_prior/sd_60m_cam.py:
--------------------------------------------------------------------------------
1 | # DATA
2 | dataset='nuScenes'
3 | dataroot = './dataset/nuScenes'
4 | version= 'v1.0-trainval'
5 |
6 | xbound = [-60.0, 60.0, 0.3]
7 | ybound = [-30.0, 30.0, 0.3]
8 |
9 | zbound = [-10.0, 10.0, 20.0]
10 | dbound = [4.0, 45.0, 1.0]
11 | image_size = [128, 352]
12 | thickness = 5
13 | # EXP
14 | logdir = './Work_dir/sd_cam_60'
15 | sd_map_path='./data_osm/osm'
16 | # TRAIN
17 | model = 'pmapnet_sd_cam'
18 | nepochs = 30
19 | batch_size = 8
20 | nworkers = 20
21 | gpus = [0, 1, 2, 3]
22 |
23 | # OPT
24 | lr = 5e-4
25 | weight_decay = 1e-7
26 | max_grad_norm = 5.0
27 | pos_weight = 2.13
28 | steplr = 10
29 |
30 | # CHECK_POINTS
31 | modelf = "Work_dir/nus/sd_60m_cam/cam_60_sd_model15.pt"
32 |
33 | # NETWORK
34 | use_aux = True
35 | griding_num = 200
36 | backbone = '18'
37 |
38 | # LOSS
39 | scale_seg = 1.0
40 | scale_var = 0.1
41 | scale_dist = 0.1
42 | scale_direction = 0.1
43 |
44 | direction_pred = True
45 | instance_seg = True
46 | embedding_dim = 16
47 | delta_v = 0.5
48 | delta_d = 3.0
49 | angle_class = 36
50 |
51 | # Mask config
52 | mask_flag = False
53 | mask_ratio = -1 # random ratio
54 | patch_h = 20
55 | patch_w = 20
56 |
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/data_osm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/__init__.py
--------------------------------------------------------------------------------
/data_osm/const.py:
--------------------------------------------------------------------------------
1 | MAP = ['boston-seaport', 'singapore-hollandvillage', 'singapore-onenorth', 'singapore-queenstown']
2 | CAMS = ['CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT']
3 | CLASS2LABEL = {
4 | 'road_divider': 0, # 道路分隔线
5 | 'lane_divider': 0, # 车道分隔线
6 | 'ped_crossing': 1, # 人行道
7 | 'contours': 2, # 轮廓线
8 | 'others': -1
9 | }
10 |
11 | NUM_CLASSES = 3
12 | IMG_ORIGIN_H = 900
13 | IMG_ORIGIN_W = 1600
14 |
--------------------------------------------------------------------------------
/data_osm/geo_opensfm.py:
--------------------------------------------------------------------------------
1 | """Copied from opensfm.geo to minimize hard dependencies."""
2 |
3 | from typing import Tuple
4 |
5 | import numpy as np
6 | from numpy import ndarray
7 |
8 | WGS84_a = 6378137.0
9 | WGS84_b = 6356752.314245
10 |
11 |
12 | def ecef_from_lla(lat, lon, alt: float) -> Tuple[float, ...]:
13 | """
14 | Compute ECEF XYZ from latitude, longitude and altitude.
15 |
16 | All using the WGS84 model.
17 | Altitude is the distance to the WGS84 ellipsoid.
18 | Check results here http://www.oc.nps.edu/oc2902w/coord/llhxyz.htm
19 |
20 | >>> lat, lon, alt = 10, 20, 30
21 | >>> x, y, z = ecef_from_lla(lat, lon, alt)
22 | >>> np.allclose(lla_from_ecef(x,y,z), [lat, lon, alt])
23 | True
24 | """
25 | a2 = WGS84_a**2
26 | b2 = WGS84_b**2
27 | lat = np.radians(lat)
28 | lon = np.radians(lon)
29 | L = 1.0 / np.sqrt(a2 * np.cos(lat) ** 2 + b2 * np.sin(lat) ** 2)
30 | x = (a2 * L + alt) * np.cos(lat) * np.cos(lon)
31 | y = (a2 * L + alt) * np.cos(lat) * np.sin(lon)
32 | z = (b2 * L + alt) * np.sin(lat)
33 | return x, y, z
34 |
35 |
36 | def lla_from_ecef(x, y, z):
37 | """
38 | Compute latitude, longitude and altitude from ECEF XYZ.
39 |
40 | All using the WGS84 model.
41 | Altitude is the distance to the WGS84 ellipsoid.
42 | """
43 | a = WGS84_a
44 | b = WGS84_b
45 | ea = np.sqrt((a**2 - b**2) / a**2)
46 | eb = np.sqrt((a**2 - b**2) / b**2)
47 | p = np.sqrt(x**2 + y**2)
48 | theta = np.arctan2(z * a, p * b)
49 | lon = np.arctan2(y, x)
50 | lat = np.arctan2(
51 | z + eb**2 * b * np.sin(theta) ** 3, p - ea**2 * a * np.cos(theta) ** 3
52 | )
53 | N = a / np.sqrt(1 - ea**2 * np.sin(lat) ** 2)
54 | alt = p / np.cos(lat) - N
55 | return np.degrees(lat), np.degrees(lon), alt
56 |
57 |
58 | def ecef_from_topocentric_transform(lat, lon, alt: float) -> ndarray:
59 | """
60 | Transformation from a topocentric frame at reference position to ECEF.
61 |
62 | The topocentric reference frame is a metric one with the origin
63 | at the given (lat, lon, alt) position, with the X axis heading east,
64 | the Y axis heading north and the Z axis vertical to the ellipsoid.
65 | >>> a = ecef_from_topocentric_transform(30, 20, 10)
66 | >>> b = ecef_from_topocentric_transform_finite_diff(30, 20, 10)
67 | >>> np.allclose(a, b)
68 | True
69 | """
70 | x, y, z = ecef_from_lla(lat, lon, alt)
71 | sa = np.sin(np.radians(lat))
72 | ca = np.cos(np.radians(lat))
73 | so = np.sin(np.radians(lon))
74 | co = np.cos(np.radians(lon))
75 | return np.array(
76 | [
77 | [-so, -sa * co, ca * co, x],
78 | [co, -sa * so, ca * so, y],
79 | [0, ca, sa, z],
80 | [0, 0, 0, 1],
81 | ]
82 | )
83 |
84 |
85 | def ecef_from_topocentric_transform_finite_diff(lat, lon, alt: float) -> ndarray:
86 | """
87 | Transformation from a topocentric frame at reference position to ECEF.
88 |
89 | The topocentric reference frame is a metric one with the origin
90 | at the given (lat, lon, alt) position, with the X axis heading east,
91 | the Y axis heading north and the Z axis vertical to the ellipsoid.
92 | """
93 | eps = 1e-2
94 | x, y, z = ecef_from_lla(lat, lon, alt)
95 | v1 = (
96 | (
97 | np.array(ecef_from_lla(lat, lon + eps, alt))
98 | - np.array(ecef_from_lla(lat, lon - eps, alt))
99 | )
100 | / 2
101 | / eps
102 | )
103 | v2 = (
104 | (
105 | np.array(ecef_from_lla(lat + eps, lon, alt))
106 | - np.array(ecef_from_lla(lat - eps, lon, alt))
107 | )
108 | / 2
109 | / eps
110 | )
111 | v3 = (
112 | (
113 | np.array(ecef_from_lla(lat, lon, alt + eps))
114 | - np.array(ecef_from_lla(lat, lon, alt - eps))
115 | )
116 | / 2
117 | / eps
118 | )
119 | v1 /= np.linalg.norm(v1)
120 | v2 /= np.linalg.norm(v2)
121 | v3 /= np.linalg.norm(v3)
122 | return np.array(
123 | [
124 | [v1[0], v2[0], v3[0], x],
125 | [v1[1], v2[1], v3[1], y],
126 | [v1[2], v2[2], v3[2], z],
127 | [0, 0, 0, 1],
128 | ]
129 | )
130 |
131 |
132 | def topocentric_from_lla(lat, lon, alt: float, reflat, reflon, refalt: float):
133 | """
134 | Transform from lat, lon, alt to topocentric XYZ.
135 |
136 | >>> lat, lon, alt = -10, 20, 100
137 | >>> np.allclose(topocentric_from_lla(lat, lon, alt, lat, lon, alt),
138 | ... [0,0,0])
139 | True
140 | >>> x, y, z = topocentric_from_lla(lat, lon, alt, 0, 0, 0)
141 | >>> np.allclose(lla_from_topocentric(x, y, z, 0, 0, 0),
142 | ... [lat, lon, alt])
143 | True
144 | """
145 | T = np.linalg.inv(ecef_from_topocentric_transform(reflat, reflon, refalt))
146 | x, y, z = ecef_from_lla(lat, lon, alt)
147 | tx = T[0, 0] * x + T[0, 1] * y + T[0, 2] * z + T[0, 3]
148 | ty = T[1, 0] * x + T[1, 1] * y + T[1, 2] * z + T[1, 3]
149 | tz = T[2, 0] * x + T[2, 1] * y + T[2, 2] * z + T[2, 3]
150 | return tx, ty, tz
151 |
152 |
153 | def lla_from_topocentric(x, y, z, reflat, reflon, refalt: float):
154 | """
155 | Transform from topocentric XYZ to lat, lon, alt.
156 | """
157 | T = ecef_from_topocentric_transform(reflat, reflon, refalt)
158 | ex = T[0, 0] * x + T[0, 1] * y + T[0, 2] * z + T[0, 3]
159 | ey = T[1, 0] * x + T[1, 1] * y + T[1, 2] * z + T[1, 3]
160 | ez = T[2, 0] * x + T[2, 1] * y + T[2, 2] * z + T[2, 3]
161 | return lla_from_ecef(ex, ey, ez)
162 |
163 |
164 | class TopocentricConverter(object):
165 | """Convert to and from a topocentric reference frame."""
166 |
167 | def __init__(self, reflat, reflon, refalt):
168 | """Init the converter given the reference origin."""
169 | self.lat = reflat
170 | self.lon = reflon
171 | self.alt = refalt
172 |
173 | def to_topocentric(self, lat, lon, alt):
174 | """Convert lat, lon, alt to topocentric x, y, z."""
175 | return topocentric_from_lla(lat, lon, alt, self.lat, self.lon, self.alt)
176 |
177 | def to_lla(self, x, y, z):
178 | """Convert topocentric x, y, z to lat, lon, alt."""
179 | return lla_from_topocentric(x, y, z, self.lat, self.lon, self.alt)
180 |
181 | def __eq__(self, o):
182 | return np.allclose([self.lat, self.lon, self.alt], (o.lat, o.lon, o.alt))
--------------------------------------------------------------------------------
/data_osm/image.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from PIL import Image
3 |
4 | import torch
5 | import torchvision
6 |
7 | class NormalizeInverse(torchvision.transforms.Normalize):
8 | # https://discuss.pytorch.org/t/simple-way-to-inverse-transform-normalization/4821/8
9 | def __init__(self, mean, std):
10 | mean = torch.as_tensor(mean)
11 | std = torch.as_tensor(std)
12 | std_inv = 1 / (std + 1e-7)
13 | mean_inv = -mean * std_inv
14 | super().__init__(mean=mean_inv, std=std_inv)
15 |
16 | def __call__(self, tensor):
17 | return super().__call__(tensor.clone())
18 |
19 |
20 | normalize_img = torchvision.transforms.Compose((
21 | torchvision.transforms.ToTensor(),
22 | torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
23 | std=[0.229, 0.224, 0.225]),
24 | ))
25 |
26 | normalize_tensor_img = torchvision.transforms.Compose((
27 | # torchvision.transforms.ToTensor(),
28 | torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
29 | std=[0.229, 0.224, 0.225]),
30 | ))
31 |
32 | denormalize_img = torchvision.transforms.Compose((
33 | NormalizeInverse(mean=[0.485, 0.456, 0.406],
34 | std=[0.229, 0.224, 0.225]),
35 | torchvision.transforms.ToPILImage(),
36 | ))
37 |
38 |
39 | def img_transform(img, resize, resize_dims):
40 | post_rot2 = torch.eye(2)
41 | post_tran2 = torch.zeros(2)
42 |
43 | img = img.resize(resize_dims) # resize到352*128
44 |
45 | rot_resize = torch.Tensor([[resize[0], 0],
46 | [0, resize[1]]])
47 | post_rot2 = rot_resize @ post_rot2
48 | post_tran2 = rot_resize @ post_tran2
49 |
50 | post_tran = torch.zeros(3)
51 | post_rot = torch.eye(3)
52 | post_tran[:2] = post_tran2
53 | post_rot[:2, :2] = post_rot2
54 | return img, post_rot, post_tran
55 |
56 |
57 | def get_rot(h):
58 | return torch.Tensor([
59 | [np.cos(h), np.sin(h)],
60 | [-np.sin(h), np.cos(h)],
61 | ])
62 |
63 | # def img_transform(img, resize, resize_dims, crop, flip, rotate):
64 | # post_rot2 = torch.eye(2)
65 | # post_tran2 = torch.zeros(2)
66 |
67 | # # adjust image
68 | # img = img.resize(resize_dims)
69 | # img = img.crop(crop)
70 | # if flip:
71 | # img = img.transpose(method=Image.FLIP_LEFT_RIGHT)
72 | # img = img.rotate(rotate)
73 |
74 | # # post-homography transformation
75 | # post_rot2 *= resize
76 | # post_tran2 -= torch.Tensor(crop[:2])
77 | # if flip:
78 | # A = torch.Tensor([[-1, 0], [0, 1]])
79 | # b = torch.Tensor([crop[2] - crop[0], 0])
80 | # post_rot2 = A.matmul(post_rot2)
81 | # post_tran2 = A.matmul(post_tran2) + b
82 | # A = get_rot(rotate/180*np.pi)
83 | # b = torch.Tensor([crop[2] - crop[0], crop[3] - crop[1]]) / 2
84 | # b = A.matmul(-b) + b
85 | # post_rot2 = A.matmul(post_rot2)
86 | # post_tran2 = A.matmul(post_tran2) + b
87 |
88 | # post_tran = torch.zeros(3)
89 | # post_rot = torch.eye(3)
90 | # post_tran[:2] = post_tran2
91 | # post_rot[:2, :2] = post_rot2
92 | # return img, post_rot, post_tran
93 |
94 |
--------------------------------------------------------------------------------
/data_osm/lidar.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | from functools import reduce
4 |
5 | from pyquaternion import Quaternion
6 |
7 | from nuscenes.utils.data_classes import LidarPointCloud
8 | from nuscenes.utils.geometry_utils import transform_matrix
9 |
10 |
11 | def get_lidar_data(nusc, sample_rec, nsweeps, min_distance):
12 | """
13 | Returns at most nsweeps of lidar in the ego frame.
14 | Returned tensor is 5(x, y, z, reflectance, dt) x N
15 | Adapted from https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/utils/data_classes.py#L56
16 | """
17 | points = np.zeros((5, 0))
18 |
19 | # Get reference pose and timestamp.
20 | ref_sd_token = sample_rec['data']['LIDAR_TOP']
21 | ref_sd_rec = nusc.get('sample_data', ref_sd_token)
22 | ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token'])
23 | ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token'])
24 | ref_time = 1e-6 * ref_sd_rec['timestamp']
25 |
26 | # Homogeneous transformation matrix from global to _current_ ego car frame.
27 | car_from_global = transform_matrix(ref_pose_rec['translation'], Quaternion(ref_pose_rec['rotation']),
28 | inverse=True)
29 |
30 | # Aggregate current and previous sweeps.
31 | sample_data_token = sample_rec['data']['LIDAR_TOP']
32 | current_sd_rec = nusc.get('sample_data', sample_data_token)
33 | for _ in range(nsweeps):
34 | # Load up the pointcloud and remove points close to the sensor.
35 | current_pc = LidarPointCloud.from_file(os.path.join(nusc.dataroot, current_sd_rec['filename']))
36 | current_pc.remove_close(min_distance)
37 |
38 | # Get past pose.
39 | current_pose_rec = nusc.get('ego_pose', current_sd_rec['ego_pose_token'])
40 | global_from_car = transform_matrix(current_pose_rec['translation'],
41 | Quaternion(current_pose_rec['rotation']), inverse=False)
42 |
43 | # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
44 | current_cs_rec = nusc.get('calibrated_sensor', current_sd_rec['calibrated_sensor_token'])
45 | car_from_current = transform_matrix(current_cs_rec['translation'], Quaternion(current_cs_rec['rotation']),
46 | inverse=False)
47 |
48 | # Fuse four transformation matrices into one and perform transform.
49 | trans_matrix = reduce(np.dot, [car_from_global, global_from_car, car_from_current])
50 | current_pc.transform(trans_matrix)
51 |
52 | # Add time vector which can be used as a temporal feature.
53 | time_lag = ref_time - 1e-6 * current_sd_rec['timestamp']
54 | times = time_lag * np.ones((1, current_pc.nbr_points()))
55 |
56 | new_points = np.concatenate((current_pc.points, times), 0)
57 | points = np.concatenate((points, new_points), 1)
58 |
59 | # Abort if there are no previous sweeps.
60 | if current_sd_rec['prev'] == '':
61 | break
62 | else:
63 | current_sd_rec = nusc.get('sample_data', current_sd_rec['prev'])
64 |
65 | return points
--------------------------------------------------------------------------------
/data_osm/osm/boston-seaport.cpg:
--------------------------------------------------------------------------------
1 | UTF-8
2 |
--------------------------------------------------------------------------------
/data_osm/osm/boston-seaport.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/boston-seaport.dbf
--------------------------------------------------------------------------------
/data_osm/osm/boston-seaport.prj:
--------------------------------------------------------------------------------
1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
2 |
--------------------------------------------------------------------------------
/data_osm/osm/boston-seaport.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/boston-seaport.shp
--------------------------------------------------------------------------------
/data_osm/osm/boston-seaport.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/boston-seaport.shx
--------------------------------------------------------------------------------
/data_osm/osm/sd_map_data_ATX.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/sd_map_data_ATX.pkl
--------------------------------------------------------------------------------
/data_osm/osm/sd_map_data_DTW.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/sd_map_data_DTW.pkl
--------------------------------------------------------------------------------
/data_osm/osm/sd_map_data_MIA.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/sd_map_data_MIA.pkl
--------------------------------------------------------------------------------
/data_osm/osm/sd_map_data_PAO.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/sd_map_data_PAO.pkl
--------------------------------------------------------------------------------
/data_osm/osm/sd_map_data_PIT.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/sd_map_data_PIT.pkl
--------------------------------------------------------------------------------
/data_osm/osm/sd_map_data_WDC.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/sd_map_data_WDC.pkl
--------------------------------------------------------------------------------
/data_osm/osm/singapore-hollandvillage.cpg:
--------------------------------------------------------------------------------
1 | UTF-8
2 |
--------------------------------------------------------------------------------
/data_osm/osm/singapore-hollandvillage.prj:
--------------------------------------------------------------------------------
1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
2 |
--------------------------------------------------------------------------------
/data_osm/osm/singapore-hollandvillage.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/singapore-hollandvillage.shp
--------------------------------------------------------------------------------
/data_osm/osm/singapore-hollandvillage.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/singapore-hollandvillage.shx
--------------------------------------------------------------------------------
/data_osm/osm/singapore-onenorth.cpg:
--------------------------------------------------------------------------------
1 | UTF-8
2 |
--------------------------------------------------------------------------------
/data_osm/osm/singapore-onenorth.prj:
--------------------------------------------------------------------------------
1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
2 |
--------------------------------------------------------------------------------
/data_osm/osm/singapore-onenorth.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/singapore-onenorth.shp
--------------------------------------------------------------------------------
/data_osm/osm/singapore-onenorth.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/singapore-onenorth.shx
--------------------------------------------------------------------------------
/data_osm/osm/singapore-queenstown.cpg:
--------------------------------------------------------------------------------
1 | UTF-8
2 |
--------------------------------------------------------------------------------
/data_osm/osm/singapore-queenstown.prj:
--------------------------------------------------------------------------------
1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]]
2 |
--------------------------------------------------------------------------------
/data_osm/osm/singapore-queenstown.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/singapore-queenstown.shp
--------------------------------------------------------------------------------
/data_osm/osm/singapore-queenstown.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/data_osm/osm/singapore-queenstown.shx
--------------------------------------------------------------------------------
/data_osm/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | from .loading import LoadMultiViewImagesFromFiles
2 | from .formating import FormatBundleMap
3 | from .transform import ResizeMultiViewImages, PadMultiViewImages, Normalize3D, PhotoMetricDistortionMultiViewImage
4 | from .vectorize import VectorizeMap
5 |
--------------------------------------------------------------------------------
/data_osm/pipelines/formating.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from mmcv.parallel import DataContainer as DC
3 | import torch
4 | import mmcv
5 | from collections.abc import Sequence
6 | # from mmdet3d.core.points import BasePoints
7 | # from mmdet.datasets.pipelines import to_tensor
8 |
9 | # copy from mmdet:https://mmdetection.readthedocs.io/en/v2.0.0/_modules/mmdet/datasets/pipelines/formating.html
10 | def to_tensor(data):
11 | """Convert objects of various python types to :obj:`torch.Tensor`.
12 |
13 | Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
14 | :class:`Sequence`, :class:`int` and :class:`float`.
15 | """
16 | if isinstance(data, torch.Tensor):
17 | return data
18 | elif isinstance(data, np.ndarray):
19 | return torch.from_numpy(data)
20 | elif isinstance(data, Sequence) and not mmcv.is_str(data):
21 | return torch.tensor(data)
22 | elif isinstance(data, int):
23 | return torch.LongTensor([data])
24 | elif isinstance(data, float):
25 | return torch.FloatTensor([data])
26 | else:
27 | raise TypeError(f'type {type(data)} cannot be converted to tensor.')
28 |
29 | class FormatBundleMap(object):
30 | """Format data for map tasks and then collect data for model input.
31 |
32 | These fields are formatted as follows.
33 |
34 | - img: (1) transpose, (2) to tensor, (3) to DataContainer (stack=True)
35 | - semantic_mask (if exists): (1) to tensor, (2) to DataContainer (stack=True)
36 | - vectors (if exists): (1) to DataContainer (cpu_only=True)
37 | - img_metas: (1) to DataContainer (cpu_only=True)
38 | """
39 |
40 | def __init__(self, process_img=True,
41 | keys=['img', 'semantic_mask', 'vectors'],
42 | meta_keys=['intrinsics', 'extrinsics']):
43 |
44 | self.process_img = process_img
45 | self.keys = keys
46 | self.meta_keys = meta_keys
47 |
48 | def __call__(self, results):
49 | """Call function to transform and format common fields in results.
50 |
51 | Args:
52 | results (dict): Result dict contains the data to convert.
53 |
54 | Returns:
55 | dict: The result dict contains the data that is formatted with
56 | default bundle.
57 | """
58 | # Format 3D data
59 | if 'points' in results:
60 | assert isinstance(results['points'], BasePoints)
61 | results['points'] = DC(results['points'].tensor)
62 |
63 | for key in ['voxels', 'coors', 'voxel_centers', 'num_points']:
64 | if key not in results:
65 | continue
66 | results[key] = DC(to_tensor(results[key]), stack=False)
67 |
68 | if 'img' in results and self.process_img:
69 | if isinstance(results['img'], list):
70 | # process multiple imgs in single frame
71 | imgs = [img.transpose(2, 0, 1) for img in results['img']]
72 | imgs = np.ascontiguousarray(np.stack(imgs, axis=0))
73 | results['img'] = DC(to_tensor(imgs), stack=True)
74 | else:
75 | img = np.ascontiguousarray(results['img'].transpose(2, 0, 1))
76 | results['img'] = DC(to_tensor(img), stack=True)
77 |
78 | if 'semantic_mask' in results:
79 | results['semantic_mask'] = DC(to_tensor(results['semantic_mask']), stack=True)
80 |
81 | if 'vectors' in results:
82 | # vectors may have different sizes
83 | vectors = results['vectors']
84 | results['vectors'] = DC(vectors, stack=False, cpu_only=True)
85 |
86 | if 'polys' in results:
87 | results['polys'] = DC(results['polys'], stack=False, cpu_only=True)
88 |
89 | return results
90 |
91 | def __repr__(self):
92 | """str: Return a string that describes the module."""
93 | repr_str = self.__class__.__name__
94 | repr_str += f'(process_img={self.process_img}, '
95 | return repr_str
96 |
--------------------------------------------------------------------------------
/data_osm/pipelines/loading.py:
--------------------------------------------------------------------------------
1 | import mmcv
2 | import numpy as np
3 |
4 | class LoadMultiViewImagesFromFiles(object):
5 | """Load multi channel images from a list of separate channel files.
6 |
7 | Expects results['img_filename'] to be a list of filenames.
8 |
9 | Args:
10 | to_float32 (bool): Whether to convert the img to float32.
11 | Defaults to False.
12 | color_type (str): Color type of the file. Defaults to 'unchanged'.
13 | """
14 |
15 | def __init__(self, to_float32=False, color_type='unchanged'):
16 | self.to_float32 = to_float32
17 | self.color_type = color_type
18 |
19 | def __call__(self, results):
20 | """Call function to load multi-view image from files.
21 |
22 | Args:
23 | results (dict): Result dict containing multi-view image filenames.
24 |
25 | Returns:
26 | dict: The result dict containing the multi-view image data. \
27 | Added keys and values are described below.
28 |
29 | - filename (str): Multi-view image filenames.
30 | - img (np.ndarray): Multi-view image arrays.
31 | - img_shape (tuple[int]): Shape of multi-view image arrays.
32 | - ori_shape (tuple[int]): Shape of original image arrays.
33 | - pad_shape (tuple[int]): Shape of padded image arrays.
34 | - scale_factor (float): Scale factor.
35 | - img_norm_cfg (dict): Normalization configuration of images.
36 | """
37 | filename = results['img_filenames']
38 | img = [mmcv.imread(name, self.color_type) for name in filename]
39 | if self.to_float32:
40 | img = [i.astype(np.float32) for i in img]
41 | results['img'] = img
42 | results['img_shape'] = [i.shape for i in img]
43 | results['ori_shape'] = [i.shape for i in img]
44 | # Set initial values for default meta_keys
45 | results['pad_shape'] = [i.shape for i in img]
46 | # results['scale_factor'] = 1.0
47 | num_channels = 1 if len(img[0].shape) < 3 else img[0].shape[2]
48 | results['img_norm_cfg'] = dict(
49 | mean=np.zeros(num_channels, dtype=np.float32),
50 | std=np.ones(num_channels, dtype=np.float32),
51 | to_rgb=False)
52 | results['img_fields'] = ['img']
53 | return results
54 |
55 | def __repr__(self):
56 | """str: Return a string that describes the module."""
57 | return f'{self.__class__.__name__} (to_float32={self.to_float32}, '\
58 | f"color_type='{self.color_type}')"
59 |
--------------------------------------------------------------------------------
/data_osm/pipelines/vectorize.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from shapely.geometry import LineString
3 | from numpy.typing import NDArray
4 | from typing import List, Tuple, Union, Dict
5 | from IPython import embed
6 |
7 | class VectorizeMap(object):
8 | """Generate vectoized map and put into `semantic_mask` key.
9 | Concretely, shapely geometry objects are converted into sample points (ndarray).
10 | We use args `sample_num`, `sample_dist`, `simplify` to specify sampling method.
11 |
12 | Args:
13 | roi_size (tuple or list): bev range .
14 | normalize (bool): whether to normalize points to range (0, 1).
15 | coords_dim (int): dimension of point coordinates.
16 | simplify (bool): whether to use simpily function. If true, `sample_num` \
17 | and `sample_dist` will be ignored.
18 | sample_num (int): number of points to interpolate from a polyline. Set to -1 to ignore.
19 | sample_dist (float): interpolate distance. Set to -1 to ignore.
20 | """
21 |
22 | def __init__(self,
23 | roi_size: Union[Tuple, List],
24 | normalize: bool,
25 | coords_dim: int,
26 | simplify: bool=False,
27 | sample_num: int=-1,
28 | sample_dist: float=-1,
29 | permute: bool=False
30 | ):
31 | self.coords_dim = coords_dim
32 | self.sample_num = sample_num
33 | self.sample_dist = sample_dist
34 | self.roi_size = np.array(roi_size)
35 | self.normalize = normalize
36 | self.simplify = simplify
37 | self.permute = permute
38 |
39 | if sample_dist > 0:
40 | assert sample_num < 0 and not simplify
41 | self.sample_fn = self.interp_fixed_dist
42 | elif sample_num > 0:
43 | assert sample_dist < 0 and not simplify
44 | self.sample_fn = self.interp_fixed_num
45 | else:
46 | assert simplify
47 |
48 | def interp_fixed_num(self, line: LineString) -> NDArray:
49 | ''' Interpolate a line to fixed number of points.
50 |
51 | Args:
52 | line (LineString): line
53 |
54 | Returns:
55 | points (array): interpolated points, shape (N, 2)
56 | '''
57 |
58 | distances = np.linspace(0, line.length, self.sample_num)
59 | sampled_points = np.array([list(line.interpolate(distance).coords)
60 | for distance in distances]).squeeze()
61 |
62 | return sampled_points
63 |
64 | def interp_fixed_dist(self, line: LineString) -> NDArray:
65 | ''' Interpolate a line at fixed interval.
66 |
67 | Args:
68 | line (LineString): line
69 |
70 | Returns:
71 | points (array): interpolated points, shape (N, 2)
72 | '''
73 |
74 | distances = list(np.arange(self.sample_dist, line.length, self.sample_dist))
75 | # make sure to sample at least two points when sample_dist > line.length
76 | distances = [0,] + distances + [line.length,]
77 |
78 | sampled_points = np.array([list(line.interpolate(distance).coords)
79 | for distance in distances]).squeeze()
80 |
81 | return sampled_points
82 |
83 | def get_vectorized_lines(self, map_geoms: Dict) -> Dict:
84 | ''' Vectorize map elements. Iterate over the input dict and apply the
85 | specified sample funcion.
86 |
87 | Args:
88 | line (LineString): line
89 |
90 | Returns:
91 | vectors (array): dict of vectorized map elements.
92 | '''
93 |
94 | vectors = {}
95 | for label, geom_list in map_geoms.items():
96 | vectors[label] = []
97 | for geom in geom_list:
98 | if geom.geom_type == 'LineString':
99 | if self.simplify:
100 | line = geom.simplify(0.2, preserve_topology=True)
101 | line = np.array(line.coords)
102 | else:
103 | line = self.sample_fn(geom)
104 | line = line[:, :self.coords_dim]
105 |
106 | if self.normalize:
107 | line = self.normalize_line(line)
108 | if self.permute:
109 | line = self.permute_line(line)
110 | vectors[label].append(line)
111 |
112 | elif geom.geom_type == 'Polygon':
113 | # polygon objects will not be vectorized
114 | continue
115 |
116 | else:
117 | raise ValueError('map geoms must be either LineString or Polygon!')
118 | return vectors
119 |
120 | def normalize_line(self, line: NDArray) -> NDArray:
121 | ''' Convert points to range (0, 1).
122 |
123 | Args:
124 | line (LineString): line
125 |
126 | Returns:
127 | normalized (array): normalized points.
128 | '''
129 |
130 | origin = -np.array([self.roi_size[0]/2, self.roi_size[1]/2])
131 |
132 | line[:, :2] = line[:, :2] - origin
133 |
134 | # transform from range [0, 1] to (0, 1)
135 | eps = 1e-5
136 | line[:, :2] = line[:, :2] / (self.roi_size + eps)
137 |
138 | return line
139 |
140 | def permute_line(self, line: np.ndarray, padding=1e5):
141 | '''
142 | (num_pts, 2) -> (num_permute, num_pts, 2)
143 | where num_permute = 2 * (num_pts - 1)
144 | '''
145 | is_closed = np.allclose(line[0], line[-1], atol=1e-3)
146 | num_points = len(line)
147 | permute_num = num_points - 1
148 | permute_lines_list = []
149 | if is_closed:
150 | pts_to_permute = line[:-1, :] # throw away replicate start end pts
151 | for shift_i in range(permute_num):
152 | permute_lines_list.append(np.roll(pts_to_permute, shift_i, axis=0))
153 | flip_pts_to_permute = np.flip(pts_to_permute, axis=0)
154 | for shift_i in range(permute_num):
155 | permute_lines_list.append(np.roll(flip_pts_to_permute, shift_i, axis=0))
156 | else:
157 | permute_lines_list.append(line)
158 | permute_lines_list.append(np.flip(line, axis=0))
159 |
160 | permute_lines_array = np.stack(permute_lines_list, axis=0)
161 |
162 | if is_closed:
163 | tmp = np.zeros((permute_num * 2, num_points, self.coords_dim))
164 | tmp[:, :-1, :] = permute_lines_array
165 | tmp[:, -1, :] = permute_lines_array[:, 0, :] # add replicate start end pts
166 | permute_lines_array = tmp
167 |
168 | else:
169 | # padding
170 | padding = np.full([permute_num * 2 - 2, num_points, self.coords_dim], padding)
171 | permute_lines_array = np.concatenate((permute_lines_array, padding), axis=0)
172 |
173 | return permute_lines_array
174 |
175 | def __call__(self, input_dict):
176 | map_geoms = input_dict['map_geoms']
177 | sd_map_data = input_dict.get('sd_vectors', None)
178 | input_dict['vectors'] = self.get_vectorized_lines(map_geoms)
179 | input_dict['sd_vectors'] = sd_map_data
180 | return input_dict
181 |
182 | def __repr__(self):
183 | repr_str = self.__class__.__name__
184 | repr_str += f'(simplify={self.simplify}, '
185 | repr_str += f'sample_num={self.sample_num}), '
186 | repr_str += f'sample_dist={self.sample_dist}), '
187 | repr_str += f'roi_size={self.roi_size})'
188 | repr_str += f'normalize={self.normalize})'
189 | repr_str += f'coords_dim={self.coords_dim})'
190 |
191 | return repr_str
--------------------------------------------------------------------------------
/docs/getting_started.md:
--------------------------------------------------------------------------------
1 | ## Getting Started
2 |
3 | ### Training
4 |
5 | Run `python train.py [config-file]`, for example:
6 |
7 | ```
8 | # Baseline model
9 | python train.py config/nusc/baseline/baseline_60m.py
10 | # SDMap Prior model
11 | python train.py config/nusc/sd_prior/sd_60m.py
12 | ```
13 |
14 | Explanation of some parameters in `[config-file]`:
15 | * `dataroot`: the path of your nuScenes data
16 | * `logdir`: the path where log files, checkpoints, etc., are saved
17 | * `model`: model name. Currently, the following models are supported: `HDMapNet_cam`, `HDMapNet_fusion`, `pmapnet_sd[_cam]`, `pmapnet_hd`, and `hdmapnet_pretrain`. You can find them in the [file](../model/__init__.py).
18 | * `batch_size`: this should be the sum of samples across all GPUs, where `sample_per_gpu` = `batch_size` / `gpu_nums`.
19 | * `gpus`: the number of GPUs you are using.
20 |
21 | ### Evaluation
22 |
23 | #### mIoU Metric
24 | To evaluate your model using the mIoU metric, you should first set the `modelf` in `[config-file]` to the path of your checkpoint, and then use the following command:
25 | ```
26 | python tools/eval.py [config-file]
27 | ```
28 |
29 | #### mAP Metric
30 |
31 | Before running the evaluation code, you should first obtain the `submission.json` file, which can be generated using the following command:
32 | ```
33 | python tools/export_json.py
34 | ```
35 | > Note: remember to set the value of `result_path` in `[config-file]`.
36 |
37 | Run `python tools/evaluate_json.py` for evaluation.
38 | ```
39 | python tools/evaluate_json.py
40 | ```
41 |
--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
1 | ### Environment
2 |
3 | 1. Create conda environment
4 | ```
5 | conda env create -f environment.yml
6 | conda activate pmapnet
7 | ```
8 | 2. Install pytorch
9 | ```
10 | pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html
11 | ```
12 | 3. Install dependencies
13 | ```
14 | pip install -r requirements.txt
15 | ```
16 |
17 | ### Datasets preparing
18 | Download [nuScenes dataset](https://www.nuscenes.org/) and put it to `dataset/` folder.
--------------------------------------------------------------------------------
/docs/visualization.md:
--------------------------------------------------------------------------------
1 | # Visualization
2 |
3 | We provide all the visualization scripts under `tools/vis_*.py`
4 |
5 | ## Visualize prediction
6 |
7 | - Set `modelf = /path/to/experiment/ckpt` in config file.
8 |
9 | ```shell
10 | python tools/vis_map.py /path/to/experiment/config
11 | ```
12 | **Notes**:
13 |
14 | - All the visualization samples will be saved in `P_MAPNET/Work_dir/experiment/vis` automatically. If you want to customize the saving path, you can add `vis_path = /customized_path` in config file.
15 |
16 | ## Merge them into video
17 |
18 | We also provide the script to merge the input, output and GT into video to benchmark the performance qualitatively.
19 |
20 | ```shell
21 | # visualize nuscenes dataset
22 | python tools/vis_video_nus.py /path/to/experiment/config path/to/experiment/vis
23 | #visualize argoverse2 dataset
24 | python tools/vis_video_av2.py /path/to/experiment/config path/to/experiment/vis
25 | ```
26 | **Notes**:
27 | - The video will be saved in `P-MAPNET/Work_dir/experiment/demo.mp4`
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: pmapnet
2 | channels:
3 | - defaults
4 | dependencies:
5 | - _libgcc_mutex=0.1=main
6 | - _openmp_mutex=5.1=1_gnu
7 | - asttokens=2.0.5=pyhd3eb1b0_0
8 | - backcall=0.2.0=pyhd3eb1b0_0
9 | - ca-certificates=2023.08.22=h06a4308_0
10 | - decorator=5.1.1=pyhd3eb1b0_0
11 | - executing=0.8.3=pyhd3eb1b0_0
12 | - ld_impl_linux-64=2.38=h1181459_1
13 | - libffi=3.4.4=h6a678d5_0
14 | - libgcc-ng=11.2.0=h1234567_1
15 | - libgomp=11.2.0=h1234567_1
16 | - libstdcxx-ng=11.2.0=h1234567_1
17 | - matplotlib-inline=0.1.6=py38h06a4308_0
18 | - ncurses=6.4=h6a678d5_0
19 | - openssl=3.0.11=h7f8727e_2
20 | - parso=0.8.3=pyhd3eb1b0_0
21 | - pexpect=4.8.0=pyhd3eb1b0_3
22 | - pickleshare=0.7.5=pyhd3eb1b0_1003
23 | - pip=23.2.1=py38h06a4308_0
24 | - ptyprocess=0.7.0=pyhd3eb1b0_2
25 | - pure_eval=0.2.2=pyhd3eb1b0_0
26 | - pygments=2.15.1=py38h06a4308_1
27 | - python=3.8.18=h955ad1f_0
28 | - readline=8.2=h5eee18b_0
29 | - setuptools=68.0.0=py38h06a4308_0
30 | - six=1.16.0=pyhd3eb1b0_1
31 | - sqlite=3.41.2=h5eee18b_0
32 | - stack_data=0.2.0=pyhd3eb1b0_0
33 | - tk=8.6.12=h1ccaba5_0
34 | - traitlets=5.7.1=py38h06a4308_0
35 | - typing_extensions=4.7.1=py38h06a4308_0
36 | - wheel=0.41.2=py38h06a4308_0
37 | - xz=5.4.2=h5eee18b_0
38 | - zlib=1.2.13=h5eee18b_0
--------------------------------------------------------------------------------
/figs/teaser.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/figs/teaser.jpg
--------------------------------------------------------------------------------
/icon/car.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/icon/car.png
--------------------------------------------------------------------------------
/icon/car_gray.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/icon/car_gray.png
--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .hdmapnet import HDMapNet
2 | from .lift_splat import LiftSplat
3 | from .pmapnet_sd import PMapNet_SD
4 | from .pmapnet_hd import PMapNet_HD, PMapNet_HD16, PMapNet_HD32
5 | from .utils.map_mae_head import vit_base_patch8, vit_base_patch16, vit_base_patch32
6 |
7 | def get_model(cfg, data_conf, instance_seg=True, embedded_dim=16, direction_pred=True, angle_class=36):
8 | patch_h = data_conf['ybound'][1] - data_conf['ybound'][0]
9 | patch_w = data_conf['xbound'][1] - data_conf['xbound'][0]
10 | canvas_h = int(patch_h / data_conf['ybound'][2])
11 | canvas_w = int(patch_w / data_conf['xbound'][2])
12 |
13 | method = cfg.model
14 | if "dataset" in cfg:
15 | if cfg.dataset == 'av2':
16 | data_conf.update({"num_cams":7})
17 |
18 | if method == 'lift_splat':
19 | model = LiftSplat(data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim)
20 |
21 | # HDMapNet model
22 | elif method == 'HDMapNet_cam':
23 | model = HDMapNet(data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=angle_class, lidar=False)
24 | elif method == 'HDMapNet_fusion':
25 | model = HDMapNet(data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=angle_class, lidar=True)
26 |
27 | # P-MapNet sd prior model
28 | elif method == 'pmapnet_sd':
29 | model = PMapNet_SD(data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=angle_class, lidar=True)
30 | elif method == 'pmapnet_sd_cam':
31 | model = PMapNet_SD(data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=angle_class, lidar=False)
32 |
33 | # P-MapNet hd prior model
34 | elif method == 'pmapnet_hd':
35 | model = PMapNet_HD(data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=angle_class, lidar=True)
36 | elif method == 'pmapnet_hd16':
37 | model = PMapNet_HD16(data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=angle_class, lidar=True)
38 | elif method == 'pmapnet_hd32':
39 | model = PMapNet_HD32(data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=angle_class, lidar=True)
40 | elif method == 'pmapnet_hd_cam':
41 | model = PMapNet_HD(data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=angle_class, lidar=False)
42 | elif method == 'pmapnet_hd_cam16':
43 | model = PMapNet_HD16(data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=angle_class, lidar=False)
44 |
45 | # P-MapNet hd pretrain model
46 | elif method == "hdmapnet_pretrain":
47 | model = vit_base_patch8(data_conf=data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=angle_class, lidar=True, img_size=(canvas_h, canvas_w))
48 | elif method == "hdmapnet_pretrain16":
49 | model = vit_base_patch16(data_conf=data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=angle_class, lidar=True, img_size=(canvas_h, canvas_w))
50 | elif method == "hdmapnet_pretrain32":
51 | model = vit_base_patch32(data_conf=data_conf, instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=angle_class, lidar=True, img_size=(canvas_h, canvas_w))
52 | else:
53 | raise NotImplementedError
54 |
55 | return model
56 |
--------------------------------------------------------------------------------
/model/hdmapnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | from .utils.homography import bilinear_sampler, IPM
5 | from .utils.utils import plane_grid_2d, get_rot_2d, cam_to_pixel
6 | from .utils.pointpillar import PointPillarEncoder
7 | from .utils.base import CamEncode, BevEncode
8 | from data_osm.utils import gen_dx_bx
9 |
10 |
11 | class ViewTransformation(nn.Module):
12 | def __init__(self, fv_size, bv_size, n_views=6):
13 | super(ViewTransformation, self).__init__()
14 | self.n_views = n_views
15 | self.hw_mat = []
16 | self.bv_size = bv_size
17 | fv_dim = fv_size[0] * fv_size[1]
18 | bv_dim = bv_size[0] * bv_size[1]
19 | for i in range(self.n_views):
20 | fc_transform = nn.Sequential(
21 | nn.Linear(fv_dim, bv_dim),
22 | nn.ReLU(),
23 | nn.Linear(bv_dim, bv_dim),
24 | nn.ReLU()
25 | )
26 | self.hw_mat.append(fc_transform)
27 | self.hw_mat = nn.ModuleList(self.hw_mat)
28 |
29 | def forward(self, feat):
30 | B, N, C, H, W = feat.shape
31 | feat = feat.view(B, N, C, H*W)
32 | outputs = []
33 | for i in range(N):
34 | output = self.hw_mat[i](feat[:, i]).view(B, C, self.bv_size[0], self.bv_size[1])
35 | outputs.append(output)
36 | outputs = torch.stack(outputs, 1)
37 | return outputs
38 |
39 |
40 | class HDMapNet(nn.Module):
41 | def __init__(self, data_conf, instance_seg=True, embedded_dim=16, direction_pred=True, direction_dim=36, lidar=False):
42 | super(HDMapNet, self).__init__()
43 | self.camC = 64
44 | self.downsample = 16
45 |
46 | dx, bx, nx = gen_dx_bx(data_conf['xbound'], data_conf['ybound'], data_conf['zbound'])
47 | final_H, final_W = nx[1].item(), nx[0].item()
48 |
49 | self.camencode = CamEncode(self.camC)
50 | fv_size = (data_conf['image_size'][0]//self.downsample, data_conf['image_size'][1]//self.downsample)
51 | bv_size = (final_H//5, final_W//5)
52 | num_cams = data_conf.get('num_cams', 6)
53 | print("num_cams: ", num_cams)
54 | self.view_fusion = ViewTransformation(fv_size=fv_size, bv_size=bv_size, n_views=num_cams)
55 |
56 | res_x = bv_size[1] * 3 // 4
57 | ipm_xbound = [-res_x, res_x, 4*res_x/final_W]
58 | ipm_ybound = [-res_x/2, res_x/2, 2*res_x/final_H]
59 | self.ipm = IPM(ipm_xbound, ipm_ybound, N=num_cams, C=self.camC, extrinsic=True)
60 | self.up_sampler = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
61 | # self.up_sampler = nn.Upsample(scale_factor=5, mode='bilinear', align_corners=True)
62 |
63 | self.lidar = lidar
64 | lidar_dim = 128
65 | if lidar:
66 | self.pp = PointPillarEncoder(lidar_dim, data_conf['xbound'], data_conf['ybound'], data_conf['zbound'])
67 | self.bevencode = BevEncode(inC=self.camC+lidar_dim, outC=data_conf['num_channels'], instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=direction_dim+1)
68 | else:
69 | self.bevencode = BevEncode(inC=self.camC, outC=data_conf['num_channels'], instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=direction_dim+1)
70 |
71 | def get_Ks_RTs_and_post_RTs(self, intrins, rots, trans, post_rots, post_trans):
72 | B, N, _, _ = intrins.shape
73 | Ks = torch.eye(4, device=intrins.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
74 |
75 | Rs = torch.eye(4, device=rots.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
76 | Rs[:, :, :3, :3] = rots.transpose(-1, -2).contiguous()
77 | Ts = torch.eye(4, device=trans.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
78 | Ts[:, :, :3, 3] = -trans
79 | RTs = Rs @ Ts
80 |
81 | post_RTs = None
82 |
83 | return Ks, RTs, post_RTs
84 |
85 | def get_cam_feats(self, x):
86 | B, N, C, imH, imW = x.shape
87 | x = x.view(B*N, C, imH, imW)
88 | x = self.camencode(x)
89 | x = x.view(B, N, self.camC, imH//self.downsample, imW//self.downsample)
90 | return x
91 |
92 | def forward(self, img, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans, yaw_pitch_roll, osm):
93 | x = self.get_cam_feats(img)
94 | # import pdb; pdb.set_trace()
95 | x = self.view_fusion(x)
96 | Ks, RTs, post_RTs = self.get_Ks_RTs_and_post_RTs(intrins, rots, trans, post_rots, post_trans)
97 | topdown = self.ipm(x, Ks, RTs, car_trans, yaw_pitch_roll, post_RTs)
98 | topdown = self.up_sampler(topdown)
99 | if self.lidar:
100 | lidar_feature = self.pp(lidar_data, lidar_mask)
101 | topdown = torch.cat([topdown, lidar_feature], dim=1)
102 | return self.bevencode(topdown)
103 |
--------------------------------------------------------------------------------
/model/lift_splat.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
3 | Licensed under the NVIDIA Source Code License. See LICENSE at https://github.com/nv-tlabs/lift-splat-shoot.
4 | Authors: Jonah Philion and Sanja Fidler
5 | """
6 |
7 | import torch
8 | from torch import nn
9 |
10 | from data_osm.utils import gen_dx_bx
11 | from .utils.base import CamEncode, BevEncode
12 |
13 |
14 | def cumsum_trick(x, geom_feats, ranks):
15 | x = x.cumsum(0)
16 | kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool)
17 | kept[:-1] = (ranks[1:] != ranks[:-1])
18 |
19 | x, geom_feats = x[kept], geom_feats[kept]
20 | x = torch.cat((x[:1], x[1:] - x[:-1]))
21 |
22 | return x, geom_feats
23 |
24 |
25 | class QuickCumsum(torch.autograd.Function):
26 | @staticmethod
27 | def forward(ctx, x, geom_feats, ranks):
28 | x = x.cumsum(0)
29 | kept = torch.ones(x.shape[0], device=x.device, dtype=torch.bool)
30 | kept[:-1] = (ranks[1:] != ranks[:-1])
31 |
32 | x, geom_feats = x[kept], geom_feats[kept]
33 | x = torch.cat((x[:1], x[1:] - x[:-1]))
34 |
35 | # save kept for backward
36 | ctx.save_for_backward(kept)
37 |
38 | # no gradient for geom_feats
39 | ctx.mark_non_differentiable(geom_feats)
40 |
41 | return x, geom_feats
42 |
43 | @staticmethod
44 | def backward(ctx, gradx, gradgeom):
45 | kept, = ctx.saved_tensors
46 | back = torch.cumsum(kept, 0)
47 | back[kept] -= 1
48 |
49 | val = gradx[back]
50 |
51 | return val, None, None
52 |
53 |
54 | class LiftSplat(nn.Module):
55 | def __init__(self, grid_conf, data_aug_conf, outC, instance_seg, embedded_dim):
56 | super(LiftSplat, self).__init__()
57 | self.grid_conf = grid_conf
58 | self.data_aug_conf = data_aug_conf
59 |
60 | dx, bx, nx = gen_dx_bx(self.grid_conf['xbound'],
61 | self.grid_conf['ybound'],
62 | self.grid_conf['zbound'],
63 | )
64 | self.dx = nn.Parameter(dx, requires_grad=False)
65 | self.bx = nn.Parameter(bx, requires_grad=False)
66 | self.nx = nn.Parameter(nx, requires_grad=False)
67 |
68 | self.downsample = 16
69 | self.camC = 64
70 | self.frustum = self.create_frustum()
71 | # D x H/downsample x D/downsample x 3
72 | self.D, _, _, _ = self.frustum.shape
73 | self.camencode = CamEncode(self.D, self.camC, self.downsample)
74 | self.bevencode = BevEncode(inC=self.camC, outC=outC, instance_seg=instance_seg, embedded_dim=embedded_dim)
75 |
76 | # toggle using QuickCumsum vs. autograd
77 | self.use_quickcumsum = True
78 |
79 | def create_frustum(self):
80 | # make grid in image plane
81 | ogfH, ogfW = self.data_aug_conf['final_dim']
82 | fH, fW = ogfH // self.downsample, ogfW // self.downsample
83 | ds = torch.arange(*self.grid_conf['dbound'], dtype=torch.float).view(-1, 1, 1).expand(-1, fH, fW)
84 | D, _, _ = ds.shape
85 | xs = torch.linspace(0, ogfW - 1, fW, dtype=torch.float).view(1, 1, fW).expand(D, fH, fW)
86 | ys = torch.linspace(0, ogfH - 1, fH, dtype=torch.float).view(1, fH, 1).expand(D, fH, fW)
87 |
88 | # D x H x W x 3
89 | frustum = torch.stack((xs, ys, ds), -1)
90 | return nn.Parameter(frustum, requires_grad=False)
91 |
92 | def get_geometry(self, rots, trans, intrins, post_rots, post_trans):
93 | """Determine the (x,y,z) locations (in the ego frame)
94 | of the points in the point cloud.
95 | Returns B x N x D x H/downsample x W/downsample x 3
96 | """
97 | B, N, _ = trans.shape
98 |
99 | # *undo* post-transformation
100 | # B x N x D x H x W x 3
101 | points = self.frustum - post_trans.view(B, N, 1, 1, 1, 3)
102 | points = torch.inverse(post_rots).view(B, N, 1, 1, 1, 3, 3).matmul(points.unsqueeze(-1))
103 |
104 | # cam_to_ego
105 | points = torch.cat((points[:, :, :, :, :, :2] * points[:, :, :, :, :, 2:3],
106 | points[:, :, :, :, :, 2:3]
107 | ), 5)
108 | combine = rots.matmul(torch.inverse(intrins))
109 | points = combine.view(B, N, 1, 1, 1, 3, 3).matmul(points).squeeze(-1)
110 | points += trans.view(B, N, 1, 1, 1, 3)
111 |
112 | return points
113 |
114 | def get_cam_feats(self, x):
115 | """Return B x N x D x H/downsample x W/downsample x C
116 | """
117 | B, N, C, imH, imW = x.shape
118 |
119 | x = x.view(B*N, C, imH, imW)
120 | x = self.camencode(x)
121 | x = x.view(B, N, self.camC, self.D, imH//self.downsample, imW//self.downsample)
122 | x = x.permute(0, 1, 3, 4, 5, 2)
123 |
124 | return x
125 |
126 | def voxel_pooling(self, geom_feats, x):
127 | B, N, D, H, W, C = x.shape
128 | Nprime = B*N*D*H*W
129 |
130 | # flatten x
131 | x = x.reshape(Nprime, C)
132 |
133 | # flatten indices
134 | # B x N x D x H/downsample x W/downsample x 3
135 | geom_feats = ((geom_feats - (self.bx - self.dx/2.)) / self.dx).long()
136 | geom_feats = geom_feats.view(Nprime, 3)
137 | batch_ix = torch.cat([torch.full([Nprime//B, 1], ix, device=x.device, dtype=torch.long) for ix in range(B)])
138 | geom_feats = torch.cat((geom_feats, batch_ix), 1) # x, y, z, b
139 |
140 | # filter out points that are outside box
141 | kept = (geom_feats[:, 0] >= 0) & (geom_feats[:, 0] < self.nx[0])\
142 | & (geom_feats[:, 1] >= 0) & (geom_feats[:, 1] < self.nx[1])\
143 | & (geom_feats[:, 2] >= 0) & (geom_feats[:, 2] < self.nx[2])
144 | x = x[kept]
145 | geom_feats = geom_feats[kept]
146 |
147 | # get tensors from the same voxel next to each other
148 | ranks = geom_feats[:, 0] * (self.nx[1] * self.nx[2] * B)\
149 | + geom_feats[:, 1] * (self.nx[2] * B)\
150 | + geom_feats[:, 2] * B\
151 | + geom_feats[:, 3]
152 | sorts = ranks.argsort()
153 | x, geom_feats, ranks = x[sorts], geom_feats[sorts], ranks[sorts]
154 |
155 | # cumsum trick
156 | if not self.use_quickcumsum:
157 | x, geom_feats = cumsum_trick(x, geom_feats, ranks)
158 | else:
159 | x, geom_feats = QuickCumsum.apply(x, geom_feats, ranks)
160 |
161 | # griddify (B x C x Z x X x Y)
162 | final = torch.zeros((B, C, self.nx[2], self.nx[1], self.nx[0]), device=x.device)
163 | final[geom_feats[:, 3], :, geom_feats[:, 2], geom_feats[:, 1], geom_feats[:, 0]] = x
164 |
165 | # collapse Z
166 | final = torch.cat(final.unbind(dim=2), 1)
167 |
168 | return final
169 |
170 | def get_voxels(self, x, rots, trans, intrins, post_rots, post_trans):
171 | # B x N x D x H/downsample x W/downsample x 3: (x,y,z) locations (in the ego frame)
172 | geom = self.get_geometry(rots, trans, intrins, post_rots, post_trans)
173 | # B x N x D x H/downsample x W/downsample x C: cam feats
174 | x = self.get_cam_feats(x)
175 |
176 | x = self.voxel_pooling(geom, x)
177 |
178 | return x
179 |
180 | def forward(self, points, points_mask, x, rots, trans, intrins, post_rots, post_trans, translation, yaw_pitch_roll):
181 | x = self.get_voxels(x, rots, trans, intrins, post_rots, post_trans)
182 | x = self.bevencode(x)
183 | return x
184 |
--------------------------------------------------------------------------------
/model/pmapnet_sd.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from .utils.homography import IPM
4 | from .utils.pointpillar import PointPillarEncoder
5 | from .utils.base import CamEncode, BevEncode
6 | from data_osm.utils import gen_dx_bx
7 | from .utils.sdmap_cross_attn import SDMapCrossAttn
8 | from .utils.position_encoding import PositionEmbeddingSine
9 |
10 | class ViewTransformation(nn.Module):
11 | def __init__(self, fv_size, bv_size, n_views=6):
12 | super(ViewTransformation, self).__init__()
13 | self.n_views = n_views
14 | self.hw_mat = []
15 | self.bv_size = bv_size
16 | fv_dim = fv_size[0] * fv_size[1]
17 | bv_dim = bv_size[0] * bv_size[1]
18 | for i in range(self.n_views):
19 | fc_transform = nn.Sequential(
20 | nn.Linear(fv_dim, bv_dim),
21 | nn.ReLU(),
22 | nn.Linear(bv_dim, bv_dim),
23 | nn.ReLU()
24 | )
25 | self.hw_mat.append(fc_transform)
26 | self.hw_mat = nn.ModuleList(self.hw_mat)
27 |
28 | def forward(self, feat):
29 | B, N, C, H, W = feat.shape
30 | feat = feat.view(B, N, C, H*W)
31 | outputs = []
32 | for i in range(N):
33 | output = self.hw_mat[i](feat[:, i]).view(B, C, self.bv_size[0], self.bv_size[1])
34 | outputs.append(output)
35 | outputs = torch.stack(outputs, 1)
36 | return outputs
37 |
38 |
39 | class PMapNet_SD(nn.Module):
40 | def __init__(self, data_conf, instance_seg=True, embedded_dim=16, direction_pred=True, direction_dim=36, lidar=False):
41 | super(PMapNet_SD, self).__init__()
42 |
43 | self.lidar = lidar
44 | self.camC = 64
45 | self.LiDARC = 128
46 | self.downsample = 16
47 |
48 | #cross attn params
49 | hidden_dim = 64
50 | self.position_embedding = PositionEmbeddingSine(hidden_dim//2, normalize=True)
51 |
52 | if lidar:
53 | feat_numchannels = self.camC+self.LiDARC
54 | self.pp = PointPillarEncoder(self.LiDARC, data_conf['xbound'], data_conf['ybound'], data_conf['zbound'])
55 | else:
56 | feat_numchannels = self.camC
57 |
58 | self.input_proj = nn.Conv2d(feat_numchannels, hidden_dim, kernel_size=1)
59 |
60 | # sdmap_cross_attn
61 | self.sdmap_crossattn = SDMapCrossAttn(d_model=hidden_dim, num_decoder_layers=2, dropout=0.1)
62 |
63 | dx, bx, nx = gen_dx_bx(data_conf['xbound'], data_conf['ybound'], data_conf['zbound'])
64 | final_H, final_W = nx[1].item(), nx[0].item()
65 |
66 | self.camencode = CamEncode(self.camC)
67 | fv_size = (data_conf['image_size'][0]//self.downsample, data_conf['image_size'][1]//self.downsample)
68 | bv_size = (final_H//5, final_W//5)
69 | num_cams = data_conf.get('num_cams', 6)
70 | # import pdb; pdb.set_trace()
71 | self.view_fusion = ViewTransformation(fv_size=fv_size, bv_size=bv_size, n_views=num_cams)
72 |
73 | res_x = bv_size[1] * 3 // 4
74 | ipm_xbound = [-res_x, res_x, 4*res_x/final_W]
75 | ipm_ybound = [-res_x/2, res_x/2, 2*res_x/final_H]
76 | self.ipm = IPM(ipm_xbound, ipm_ybound, N=num_cams, C=self.camC, extrinsic=True)
77 | self.up_sampler = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
78 |
79 | self.pool = nn.AvgPool2d(kernel_size=10, stride=10)
80 | self.conv_osm = self.nn_Sequential(1, hidden_dim // 2, hidden_dim)
81 | self.conv_bev = self.nn_Sequential(feat_numchannels, feat_numchannels, feat_numchannels)
82 | self.conv_up = self.nn_Sequential_Transpose(hidden_dim, feat_numchannels, feat_numchannels)
83 |
84 | self.bevencode = BevEncode(inC=feat_numchannels, outC=data_conf['num_channels'], instance_seg=instance_seg, embedded_dim=embedded_dim, direction_pred=direction_pred, direction_dim=direction_dim+1)
85 |
86 | def get_Ks_RTs_and_post_RTs(self, intrins, rots, trans, post_rots, post_trans):
87 | B, N, _, _ = intrins.shape
88 | Ks = torch.eye(4, device=intrins.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
89 |
90 | Rs = torch.eye(4, device=rots.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
91 | Rs[:, :, :3, :3] = rots.transpose(-1, -2).contiguous()
92 | Ts = torch.eye(4, device=trans.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
93 | Ts[:, :, :3, 3] = -trans
94 | RTs = Rs @ Ts
95 |
96 | post_RTs = None
97 |
98 | return Ks, RTs, post_RTs
99 |
100 | def get_cam_feats(self, x):
101 | B, N, C, imH, imW = x.shape
102 | x = x.view(B*N, C, imH, imW)
103 | x = self.camencode(x)
104 | x = x.view(B, N, self.camC, imH//self.downsample, imW//self.downsample)
105 | return x
106 |
107 | def nn_Sequential(self, in_dim=192, mid_dim=192, out_dim=192):
108 | return nn.Sequential(
109 | nn.Conv2d(in_dim, out_channels=mid_dim, kernel_size=4, stride=2, padding=1),
110 | nn.ReLU(),
111 | nn.Conv2d(mid_dim, out_channels=out_dim, kernel_size=4, stride=2, padding=1),
112 | nn.ReLU(),
113 | nn.Conv2d(out_dim, out_dim, kernel_size=4, stride=2, padding=1, bias=False),
114 | nn.BatchNorm2d(out_dim),
115 | nn.ReLU(inplace=True),
116 | )
117 |
118 | def nn_Sequential_Transpose(self, in_dim=192, mid_dim=192, out_dim=192):
119 | return nn.Sequential(
120 | nn.ConvTranspose2d(in_dim, out_channels=mid_dim, kernel_size=4, stride=2, padding=1),
121 | nn.ReLU(),
122 | nn.ConvTranspose2d(mid_dim, out_channels=out_dim, kernel_size=4, stride=2, padding=1),
123 | nn.ReLU(),
124 | nn.ConvTranspose2d(out_dim, out_dim, kernel_size=4, stride=2, padding=1, bias=False),
125 | nn.BatchNorm2d(out_dim),
126 | nn.ReLU(inplace=True),
127 | )
128 | def forward(self, img, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans, yaw_pitch_roll, osm):
129 | x = self.get_cam_feats(img)
130 | # import pdb; pdb.set_trace()
131 | x = self.view_fusion(x)
132 | Ks, RTs, post_RTs = self.get_Ks_RTs_and_post_RTs(intrins, rots, trans, post_rots, post_trans)
133 | topdown = self.ipm(x, Ks, RTs, car_trans, yaw_pitch_roll, post_RTs)
134 | topdown = self.up_sampler(topdown)
135 | if self.lidar:
136 | lidar_feature = self.pp(lidar_data, lidar_mask)
137 | topdown = torch.cat([topdown, lidar_feature], dim=1)
138 |
139 | bev_small = self.conv_bev(topdown)
140 |
141 | conv_osm = self.conv_osm(osm)
142 |
143 | bs,c,h,w = bev_small.shape
144 | self.mask = torch.zeros([1,h,w],dtype=torch.bool)
145 |
146 | pos = self.position_embedding(bev_small[-1], self.mask.to(bev_small.device)).to(bev_small.dtype)
147 | bs = bev_small.shape[0]
148 | pos = pos.repeat(bs, 1, 1, 1)
149 | bev_out = self.sdmap_crossattn(self.input_proj(bev_small), conv_osm, pos = pos)[0]
150 | bev_final = self.conv_up(bev_out)
151 | return self.bevencode(bev_final)
152 |
--------------------------------------------------------------------------------
/model/utils/VPN.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from .base import CamEncode, BevEncode
5 | from .pointpillar import PointPillarEncoder
6 |
7 |
8 | class TransformModule(nn.Module):
9 | def __init__(self, dim, num_view=6):
10 | super(TransformModule, self).__init__()
11 | self.num_view = num_view
12 | self.dim = dim
13 | self.mat_list = nn.ModuleList()
14 | for i in range(self.num_view):
15 | fc_transform = nn.Sequential(
16 | nn.Linear(dim * dim, dim * dim),
17 | nn.ReLU(),
18 | nn.Linear(dim * dim, dim * dim),
19 | nn.ReLU()
20 | )
21 | self.mat_list += [fc_transform]
22 |
23 | def forward(self, x):
24 | # shape x: B, V, C, H, W
25 | x = x.view(list(x.size()[:3]) + [self.dim * self.dim,])
26 | view_comb = self.mat_list[0](x[:, 0])
27 | for index in range(x.size(1))[1:]:
28 | view_comb += self.mat_list[index](x[:, index])
29 | view_comb = view_comb.view(list(view_comb.size()[:2]) + [self.dim, self.dim])
30 | return view_comb
31 |
32 |
33 | class VPNModel(nn.Module):
34 | def __init__(self, outC, camC=64, instance_seg=True, embedded_dim=16, extrinsic=False, lidar=False, xbound=None, ybound=None, zbound=None):
35 | super(VPNModel, self).__init__()
36 | self.camC = camC
37 | self.extrinsic = extrinsic
38 | self.downsample = 16
39 |
40 | self.camencode = CamEncode(camC)
41 | self.view_fusion = TransformModule(dim=(8, 22))
42 | self.up_sampler = nn.Upsample(size=(200, 400), mode='bilinear', align_corners=True)
43 | self.lidar = lidar
44 | if lidar:
45 | self.pp = PointPillarEncoder(128, xbound, ybound, zbound)
46 | self.bevencode = BevEncode(inC=camC+128, outC=outC, instance_seg=instance_seg, embedded_dim=embedded_dim)
47 | else:
48 | self.bevencode = BevEncode(inC=camC, outC=outC, instance_seg=instance_seg, embedded_dim=embedded_dim)
49 |
50 |
51 | def get_Ks_RTs_and_post_RTs(self, intrins, rots, trans, post_rots, post_trans):
52 | B, N, _, _ = intrins.shape
53 | Ks = torch.eye(4, device=intrins.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
54 | # Ks[:, :, :3, :3] = intrins
55 |
56 | Rs = torch.eye(4, device=rots.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
57 | Rs[:, :, :3, :3] = rots.transpose(-1, -2).contiguous()
58 | Ts = torch.eye(4, device=trans.device).view(1, 1, 4, 4).repeat(B, N, 1, 1)
59 | Ts[:, :, :3, 3] = -trans
60 | RTs = Rs @ Ts
61 |
62 | post_RTs = None
63 |
64 | return Ks, RTs, post_RTs
65 |
66 | def get_cam_feats(self, x):
67 | """Return B x N x D x H/downsample x W/downsample x C
68 | """
69 | B, N, C, imH, imW = x.shape
70 |
71 | x = x.view(B*N, C, imH, imW)
72 | x = self.camencode(x)
73 | x = x.view(B, N, self.camC, imH//self.downsample, imW//self.downsample)
74 | return x
75 |
76 | def forward(self, points, points_mask, x, rots, trans, intrins, post_rots, post_trans, translation, yaw_pitch_roll):
77 | x = self.get_cam_feats(x)
78 | x = self.view_fusion(x)
79 | topdown = x.mean(1)
80 | topdown = self.up_sampler(topdown)
81 | if self.lidar:
82 | lidar_feature = self.pp(points, points_mask)
83 | topdown = torch.cat([topdown, lidar_feature], dim=1)
84 | return self.bevencode(topdown)
85 |
--------------------------------------------------------------------------------
/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/model/utils/__init__.py
--------------------------------------------------------------------------------
/model/utils/base.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from efficientnet_pytorch import EfficientNet
5 | from torchvision.models.resnet import resnet18,resnet50
6 |
7 | class Up(nn.Module):
8 | def __init__(self, in_channels, out_channels, scale_factor=2):
9 | super().__init__()
10 |
11 | self.up = nn.Upsample(scale_factor=scale_factor, mode='bilinear',
12 | align_corners=True)
13 |
14 | self.conv = nn.Sequential(
15 | nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False),
16 | nn.BatchNorm2d(out_channels),
17 | nn.ReLU(inplace=True),
18 | nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
19 | nn.BatchNorm2d(out_channels),
20 | nn.ReLU(inplace=True)
21 | )
22 |
23 | def forward(self, x1, x2):
24 | x1 = self.up(x1)
25 | x1 = torch.cat([x2, x1], dim=1)
26 | return self.conv(x1)
27 |
28 | class CamEncode(nn.Module):
29 | def __init__(self, C):
30 | super(CamEncode, self).__init__()
31 | self.C = C
32 |
33 | self.trunk = EfficientNet.from_pretrained("efficientnet-b0")
34 | self.up1 = Up(320+112, self.C)
35 |
36 | def get_eff_depth(self, x):
37 | # adapted from https://github.com/lukemelas/EfficientNet-PyTorch/blob/master/efficientnet_pytorch/model.py#L231
38 | endpoints = dict()
39 |
40 | # Stem
41 | x = self.trunk._swish(self.trunk._bn0(self.trunk._conv_stem(x)))
42 | prev_x = x
43 |
44 | # Blocks
45 | for idx, block in enumerate(self.trunk._blocks):
46 | drop_connect_rate = self.trunk._global_params.drop_connect_rate
47 | if drop_connect_rate:
48 | drop_connect_rate *= float(idx) / len(self.trunk._blocks) # scale drop connect_rate
49 | x = block(x, drop_connect_rate=drop_connect_rate)
50 | if prev_x.size(2) > x.size(2):
51 | endpoints['reduction_{}'.format(len(endpoints)+1)] = prev_x
52 | prev_x = x
53 |
54 | # Head
55 | endpoints['reduction_{}'.format(len(endpoints)+1)] = x
56 | x = self.up1(endpoints['reduction_5'], endpoints['reduction_4'])
57 | return x
58 |
59 | def forward(self, x):
60 | return self.get_eff_depth(x)
61 |
62 | class maeDecode(nn.Module):
63 | def __init__(self, inC, outC, instance_seg=True, embedded_dim=16, direction_pred=True, direction_dim=37):
64 | super(maeDecode, self).__init__()
65 | trunk = resnet50(pretrained=False, zero_init_residual=True)
66 | self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3, bias=False)
67 |
68 | self.bn1 = trunk.bn1
69 | self.relu = trunk.relu
70 |
71 | self.layer1 = trunk.layer1
72 | self.layer2 = trunk.layer2
73 | self.layer3 = trunk.layer3
74 | # self.res50.conv1(),
75 | # self.res50.bn1(),
76 | # self.res50.relu(),
77 | # self.res50.maxpool(),
78 | # self.res50.layer1(),
79 | # self.res50.layer2(),
80 | # self.res50.layer3(),
81 | # self.res50.layer4(),
82 | # self.res50.avgpool()
83 | self.up1 = Up(1024 + 256, 256, scale_factor=4)
84 | self.up2 = nn.Sequential(
85 | nn.Upsample(scale_factor=2, mode='bilinear',
86 | align_corners=True),
87 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
88 | nn.BatchNorm2d(128),
89 | nn.ReLU(inplace=True),
90 | nn.Conv2d(128, outC, kernel_size=1, padding=0),
91 | )
92 |
93 | self.instance_seg = instance_seg
94 | if instance_seg:
95 | self.up1_embedded = Up(1024 + 256, 256, scale_factor=4)
96 | self.up2_embedded = nn.Sequential(
97 | nn.Upsample(scale_factor=2, mode='bilinear',
98 | align_corners=True),
99 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
100 | nn.BatchNorm2d(128),
101 | nn.ReLU(inplace=True),
102 | nn.Conv2d(128, embedded_dim, kernel_size=1, padding=0),
103 | )
104 |
105 | self.direction_pred = direction_pred
106 | if direction_pred:
107 | # self.up1_direction = Up(64 + 256, 256, scale_factor=4)
108 | self.up1_direction = Up(1024 + 256, 256, scale_factor=4)
109 | self.up2_direction = nn.Sequential(
110 | nn.Upsample(scale_factor=2, mode='bilinear',
111 | align_corners=True),
112 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
113 | nn.BatchNorm2d(128),
114 | nn.ReLU(inplace=True),
115 | nn.Conv2d(128, direction_dim, kernel_size=1, padding=0),
116 | )
117 |
118 | def forward(self, x): # x: torch.Size([bs, 128, 200, 400])
119 | x = self.conv1(x) # x: torch.Size([bs, 64, 100, 200])
120 | x = self.bn1(x)
121 | x = self.relu(x)
122 |
123 | x1 = self.layer1(x) # x1: torch.Size([bs, 256, 100, 200])
124 | x = self.layer2(x1) # x: torch.Size([bs, 512, 100, 200])
125 | x2 = self.layer3(x) # x2: torch.Size([bs, 1024, 25, 50])
126 |
127 | x = self.up1(x2, x1) # x: torch.Size([bs, 256, 100, 200])
128 | x = self.up2(x) # x: torch.Size([bs, 4, 200, 400])
129 |
130 | if self.instance_seg:
131 | x_embedded = self.up1_embedded(x2, x1)
132 | x_embedded = self.up2_embedded(x_embedded)
133 | else:
134 | x_embedded = None
135 |
136 | if self.direction_pred:
137 | x_direction = self.up1_embedded(x2, x1)
138 | x_direction = self.up2_direction(x_direction)
139 | else:
140 | x_direction = None
141 |
142 | return x, x_embedded, x_direction
143 |
144 | class BevEncode(nn.Module):
145 | def __init__(self, inC, outC, instance_seg=True, embedded_dim=16, direction_pred=True, direction_dim=37):
146 | super(BevEncode, self).__init__()
147 | trunk = resnet18(pretrained=False, zero_init_residual=True)
148 | self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3, bias=False)
149 | self.bn1 = trunk.bn1
150 | self.relu = trunk.relu
151 |
152 | self.layer1 = trunk.layer1
153 | self.layer2 = trunk.layer2
154 | self.layer3 = trunk.layer3
155 |
156 | self.up1 = Up(64 + 256, 256, scale_factor=4)
157 | self.up2 = nn.Sequential(
158 | nn.Upsample(scale_factor=2, mode='bilinear',
159 | align_corners=True),
160 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
161 | nn.BatchNorm2d(128),
162 | nn.ReLU(inplace=True),
163 | nn.Conv2d(128, outC, kernel_size=1, padding=0),
164 | )
165 |
166 | self.instance_seg = instance_seg
167 | if instance_seg:
168 | self.up1_embedded = Up(64 + 256, 256, scale_factor=4)
169 | self.up2_embedded = nn.Sequential(
170 | nn.Upsample(scale_factor=2, mode='bilinear',
171 | align_corners=True),
172 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
173 | nn.BatchNorm2d(128),
174 | nn.ReLU(inplace=True),
175 | nn.Conv2d(128, embedded_dim, kernel_size=1, padding=0),
176 | )
177 |
178 | self.direction_pred = direction_pred
179 | if direction_pred:
180 | self.up1_direction = Up(64 + 256, 256, scale_factor=4)
181 | self.up2_direction = nn.Sequential(
182 | nn.Upsample(scale_factor=2, mode='bilinear',
183 | align_corners=True),
184 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
185 | nn.BatchNorm2d(128),
186 | nn.ReLU(inplace=True),
187 | nn.Conv2d(128, direction_dim, kernel_size=1, padding=0),
188 | )
189 |
190 | def forward(self, x):
191 | x = self.conv1(x)
192 | x = self.bn1(x)
193 | x = self.relu(x)
194 |
195 | x1 = self.layer1(x)
196 | x = self.layer2(x1)
197 | x2 = self.layer3(x)
198 |
199 | x = self.up1(x2, x1)
200 | x = self.up2(x)
201 |
202 | if self.instance_seg:
203 | x_embedded = self.up1_embedded(x2, x1)
204 | x_embedded = self.up2_embedded(x_embedded)
205 | else:
206 | x_embedded = None
207 |
208 | if self.direction_pred:
209 | x_direction = self.up1_direction(x2, x1)
210 | x_direction = self.up2_direction(x_direction)
211 | else:
212 | x_direction = None
213 |
214 | return x, x_embedded, x_direction
215 |
216 | class BevEncode_bd(nn.Module):
217 | def __init__(self, inC, outC, instance_seg=True, embedded_dim=16, direction_pred=True, direction_dim=37):
218 | super(BevEncode_bd, self).__init__()
219 | trunk = resnet18(pretrained=False, zero_init_residual=True)
220 | self.conv1 = nn.Conv2d(inC, 64, kernel_size=7, stride=2, padding=3, bias=False)
221 | self.bn1 = trunk.bn1
222 | self.relu = trunk.relu
223 |
224 | self.layer1 = trunk.layer1
225 | self.layer2 = trunk.layer2
226 | self.layer3 = trunk.layer3
227 |
228 | self.up1 = Up(64 + 256, 256, scale_factor=4)
229 | self.up2 = nn.Sequential(
230 | nn.Upsample(scale_factor=2, mode='bilinear',
231 | align_corners=True),
232 | nn.Conv2d(256, 128, kernel_size=3, padding=1, bias=False),
233 | nn.BatchNorm2d(128),
234 | nn.ReLU(inplace=True),
235 | nn.Conv2d(128, outC, kernel_size=1, padding=0),
236 | )
237 |
238 |
239 | def forward(self, x):
240 | x = self.conv1(x)
241 | x = self.bn1(x)
242 | x = self.relu(x)
243 |
244 | x1 = self.layer1(x)
245 | x = self.layer2(x1)
246 | x2 = self.layer3(x)
247 |
248 | x = self.up1(x2, x1)
249 | x = self.up2(x)
250 |
251 | return x
252 |
253 |
--------------------------------------------------------------------------------
/model/utils/map_mae_head.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from functools import partial
4 | import timm.models.vision_transformer
5 | from .base import CamEncode, BevEncode
6 |
7 | class ConvBNReLU(nn.Module):
8 | def __init__(self, in_channel, out_channel, kernel_size=3, stride=1, padding=1,
9 | dilation=1, groups=1, bias=False, has_relu=True):
10 | super().__init__()
11 | self.conv = nn.Conv2d(in_channels=in_channel, out_channels=out_channel, kernel_size=kernel_size,
12 | stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
13 | self.bn = nn.BatchNorm2d(out_channel)
14 | self.relu = nn.ReLU(inplace=True)
15 | self.has_relu = has_relu
16 |
17 | def forward(self, x):
18 | feat = self.conv(x)
19 | feat = self.bn(feat)
20 | if self.has_relu:
21 | return self.relu(feat)
22 | return feat
23 |
24 | class PatchEmbed(nn.Module):
25 | """ Image to Patch Embedding
26 | """
27 | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
28 | super().__init__()
29 | if isinstance(img_size, int):
30 | img_size = (img_size, img_size)
31 | if isinstance(patch_size, int):
32 | patch_size = (patch_size, patch_size)
33 | num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
34 | self.img_size = img_size
35 | self.patch_size = patch_size
36 | self.num_patches = num_patches
37 |
38 | self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
39 |
40 | def forward(self, x):
41 | B, C, H, W = x.shape
42 | # FIXME look at relaxing size constraints
43 | assert H == self.img_size[0] and W == self.img_size[1], \
44 | f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
45 | x = self.proj(x)
46 | return x
47 |
48 |
49 | class MapVisionTransformer(timm.models.vision_transformer.VisionTransformer):
50 | """ Vision Transformer with support for global average pooling
51 | """
52 | def __init__(self,
53 | data_conf=None,
54 | instance_seg=True,
55 | embedded_dim=16,
56 | direction_pred=True,
57 | direction_dim=36,
58 | lidar=None,
59 | **kwargs):
60 | super(MapVisionTransformer, self).__init__(**kwargs)
61 | self.bev_head = BevEncode(inC=kwargs['embed_dim'],
62 | outC=data_conf['num_channels'],
63 | instance_seg=instance_seg,
64 | embedded_dim=embedded_dim,
65 | direction_pred=direction_pred,
66 | direction_dim=direction_dim+1)
67 | patch_h = data_conf['ybound'][1] - data_conf['ybound'][0] # 30.0
68 | patch_w = data_conf['xbound'][1] - data_conf['xbound'][0] # 60.0
69 | self.canvas_h = int(patch_h / data_conf['ybound'][2]) # 200
70 | self.canvas_w = int(patch_w / data_conf['xbound'][2]) # 400
71 | self.conv_up = nn.Sequential(
72 | nn.ConvTranspose2d(kwargs['embed_dim'], kwargs['embed_dim'], kernel_size=4, stride=2, padding=1),
73 | nn.ConvTranspose2d(kwargs['embed_dim'], kwargs['embed_dim'], kernel_size=4, stride=2, padding=1),
74 | nn.Upsample(size=(self.canvas_h, self.canvas_w), mode='bilinear', align_corners=False),
75 | ConvBNReLU(kwargs['embed_dim'], kwargs['embed_dim'], 1, stride=1, padding=0, has_relu=False),
76 | )
77 | self.map_patch_embed = PatchEmbed(kwargs['img_size'], kwargs['patch_size'], kwargs['in_chans'], kwargs['embed_dim'])
78 |
79 | def forward_features(self, x):
80 | B = x.shape[0] # (b,c,h,w)
81 | # import pdb; pdb.set_trace()
82 | x = self.map_patch_embed(x) # (b,dim,12,25)
83 |
84 | _, dim, h, w = x.shape
85 | x = x.flatten(2).transpose(1, 2) # (b,n,dim)
86 | x = x + self.pos_embed[:, :-1]
87 | x = self.pos_drop(x)
88 |
89 | for blk in self.blocks:
90 | x = blk(x)
91 |
92 | x = self.norm(x)
93 | outcome = x.permute(0,2,1).reshape(B, dim, h, w)
94 | outcome = self.conv_up(outcome)
95 | return outcome
96 |
97 | def forward(self, x):
98 | x = self.forward_features(x)
99 | x = self.bev_head(x)
100 | return x
101 |
102 |
103 | def vit_base_patch8(**kwargs):
104 | model = MapVisionTransformer(
105 | patch_size=8,
106 | embed_dim=768,
107 | depth=12,
108 | num_heads=12,
109 | mlp_ratio=4,
110 | qkv_bias=True,
111 | in_chans=4,
112 | **kwargs)
113 | return model
114 |
115 | def vit_base_patch16(**kwargs):
116 | model = MapVisionTransformer(
117 | patch_size=16,
118 | embed_dim=768,
119 | depth=12,
120 | num_heads=12,
121 | mlp_ratio=4,
122 | qkv_bias=True,
123 | in_chans=4,
124 | **kwargs)
125 | return model
126 |
127 | def vit_base_patch32(**kwargs):
128 | model = MapVisionTransformer(
129 | patch_size=32,
130 | embed_dim=768,
131 | depth=12,
132 | num_heads=12,
133 | mlp_ratio=4,
134 | qkv_bias=True,
135 | in_chans=4,
136 | **kwargs)
137 | return model
--------------------------------------------------------------------------------
/model/utils/pointpillar.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch_scatter
4 |
5 | from .voxel import points_to_voxels
6 |
7 |
8 | class PillarBlock(nn.Module):
9 | def __init__(self, idims=64, dims=64, num_layers=1,
10 | stride=1):
11 | super(PillarBlock, self).__init__()
12 | layers = []
13 | self.idims = idims
14 | self.stride = stride
15 | for i in range(num_layers):
16 | layers.append(nn.Conv2d(self.idims, dims, 3, stride=self.stride,
17 | padding=1, bias=False))
18 | layers.append(nn.BatchNorm2d(dims))
19 | layers.append(nn.ReLU(inplace=True))
20 | self.idims = dims
21 | self.stride = 1
22 | self.layers = nn.Sequential(*layers)
23 |
24 | def forward(self, x):
25 | return self.layers(x)
26 |
27 |
28 | class PointNet(nn.Module):
29 | def __init__(self, idims=64, odims=64):
30 | super(PointNet, self).__init__()
31 | self.pointnet = nn.Sequential(
32 | nn.Conv1d(idims, odims, kernel_size=1, bias=False),
33 | nn.BatchNorm1d(odims),
34 | nn.ReLU(inplace=True)
35 | )
36 |
37 | def forward(self, points_feature, points_mask):
38 | batch_size, num_points, num_dims = points_feature.shape
39 | points_feature = points_feature.permute(0, 2, 1)
40 | mask = points_mask.view(batch_size, 1, num_points)
41 | return self.pointnet(points_feature) * mask
42 |
43 |
44 | class PointPillar(nn.Module):
45 | def __init__(self, C, xbound, ybound, zbound, embedded_dim=16, direction_dim=37):
46 | super(PointPillar, self).__init__()
47 | self.xbound = xbound
48 | self.ybound = ybound
49 | self.zbound = zbound
50 | self.embedded_dim = embedded_dim
51 | self.pn = PointNet(15, 64)
52 | self.block1 = PillarBlock(64, dims=64, num_layers=2, stride=1)
53 | self.block2 = PillarBlock(64, dims=128, num_layers=3, stride=2)
54 | self.block3 = PillarBlock(128, 256, num_layers=3, stride=2)
55 | self.up1 = nn.Sequential(
56 | nn.Conv2d(64, 64, 3, padding=1, bias=False),
57 | nn.BatchNorm2d(64),
58 | nn.ReLU(inplace=True)
59 | )
60 | self.up2 = nn.Sequential(
61 | nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
62 | nn.Conv2d(128, 128, 3, stride=1, padding=1, bias=False),
63 | nn.BatchNorm2d(128),
64 | nn.ReLU(inplace=True)
65 | )
66 | self.up3 = nn.Sequential(
67 | nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True),
68 | nn.Conv2d(256, 256, 3, stride=1, padding=1, bias=False),
69 | nn.BatchNorm2d(256),
70 | nn.ReLU(inplace=True)
71 | )
72 | self.conv_out = nn.Sequential(
73 | nn.Conv2d(448, 256, 3, padding=1, bias=False),
74 | nn.BatchNorm2d(256),
75 | nn.ReLU(inplace=True),
76 | nn.Conv2d(256, 128, 3, padding=1, bias=False),
77 | nn.BatchNorm2d(128),
78 | nn.ReLU(inplace=True),
79 | nn.Conv2d(128, C, 1),
80 | )
81 | self.instance_conv_out = nn.Sequential(
82 | nn.Conv2d(448, 256, 3, padding=1, bias=False),
83 | nn.BatchNorm2d(256),
84 | nn.ReLU(inplace=True),
85 | nn.Conv2d(256, 128, 3, padding=1, bias=False),
86 | nn.BatchNorm2d(128),
87 | nn.ReLU(inplace=True),
88 | nn.Conv2d(128, embedded_dim, 1),
89 | )
90 | self.direction_conv_out = nn.Sequential(
91 | nn.Conv2d(448, 256, 3, padding=1, bias=False),
92 | nn.BatchNorm2d(256),
93 | nn.ReLU(inplace=True),
94 | nn.Conv2d(256, 128, 3, padding=1, bias=False),
95 | nn.BatchNorm2d(128),
96 | nn.ReLU(inplace=True),
97 | nn.Conv2d(128, direction_dim, 1),
98 | )
99 |
100 | def forward(self, points, points_mask,
101 | x, rots, trans, intrins, post_rots, post_trans, translation, yaw_pitch_roll):
102 | points_xyz = points[:, :, :3]
103 | points_feature = points[:, :, 3:]
104 | voxels = points_to_voxels(
105 | points_xyz, points_mask, self.xbound, self.ybound, self.zbound
106 | )
107 | points_feature = torch.cat(
108 | [points, # 5
109 | torch.unsqueeze(voxels['voxel_point_count'], dim=-1), # 1
110 | voxels['local_points_xyz'], # 3
111 | voxels['point_centroids'], # 3
112 | points_xyz - voxels['voxel_centers'], # 3
113 | ], dim=-1
114 | )
115 | points_feature = self.pn(points_feature, voxels['points_mask'])
116 | voxel_feature = torch_scatter.scatter_mean(
117 | points_feature,
118 | torch.unsqueeze(voxels['voxel_indices'], dim=1),
119 | dim=2,
120 | dim_size=voxels['num_voxels'])
121 | batch_size = points.size(0)
122 | voxel_feature = voxel_feature.view(batch_size, -1, voxels['grid_size'][0], voxels['grid_size'][1])
123 | voxel_feature1 = self.block1(voxel_feature)
124 | voxel_feature2 = self.block2(voxel_feature1)
125 | voxel_feature3 = self.block3(voxel_feature2)
126 | voxel_feature1 = self.up1(voxel_feature1)
127 | voxel_feature2 = self.up2(voxel_feature2)
128 | voxel_feature3 = self.up3(voxel_feature3)
129 | voxel_feature = torch.cat([voxel_feature1, voxel_feature2, voxel_feature3], dim=1)
130 | return self.conv_out(voxel_feature).transpose(3, 2), self.instance_conv_out(voxel_feature).transpose(3, 2), self.direction_conv_out(voxel_feature).transpose(3, 2)
131 |
132 |
133 | class PointPillarEncoder(nn.Module):
134 | def __init__(self, C, xbound, ybound, zbound):
135 | super(PointPillarEncoder, self).__init__()
136 | self.xbound = xbound
137 | self.ybound = ybound
138 | self.zbound = zbound
139 | self.pn = PointNet(15, 64)
140 | self.block1 = PillarBlock(64, dims=64, num_layers=2, stride=1)
141 | self.block2 = PillarBlock(64, dims=128, num_layers=3, stride=2)
142 | self.block3 = PillarBlock(128, 256, num_layers=3, stride=2)
143 | self.up1 = nn.Sequential(
144 | nn.Conv2d(64, 64, 3, padding=1, bias=False),
145 | nn.BatchNorm2d(64),
146 | nn.ReLU(inplace=True)
147 | )
148 | self.up2 = nn.Sequential(
149 | nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True),
150 | nn.Conv2d(128, 128, 3, stride=1, padding=1, bias=False),
151 | nn.BatchNorm2d(128),
152 | nn.ReLU(inplace=True)
153 | )
154 | self.up3 = nn.Sequential(
155 | nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True),
156 | nn.Conv2d(256, 256, 3, stride=1, padding=1, bias=False),
157 | nn.BatchNorm2d(256),
158 | nn.ReLU(inplace=True)
159 | )
160 | self.conv_out = nn.Sequential(
161 | nn.Conv2d(448, 256, 3, padding=1, bias=False),
162 | nn.BatchNorm2d(256),
163 | nn.ReLU(inplace=True),
164 | nn.Conv2d(256, 128, 3, padding=1, bias=False),
165 | nn.BatchNorm2d(128),
166 | nn.ReLU(inplace=True),
167 | nn.Conv2d(128, C, 1),
168 | )
169 |
170 | def forward(self, points, points_mask):
171 | points_xyz = points[:, :, :3]
172 | points_feature = points[:, :, 3:]
173 | voxels = points_to_voxels(
174 | points_xyz, points_mask, self.xbound, self.ybound, self.zbound
175 | )
176 | points_feature = torch.cat(
177 | [points, # 5
178 | torch.unsqueeze(voxels['voxel_point_count'], dim=-1), # 1
179 | voxels['local_points_xyz'], # 3
180 | voxels['point_centroids'], # 3
181 | points_xyz - voxels['voxel_centers'], # 3
182 | ], dim=-1
183 | )
184 | points_feature = self.pn(points_feature, voxels['points_mask'])
185 | voxel_feature = torch_scatter.scatter_mean(
186 | points_feature,
187 | torch.unsqueeze(voxels['voxel_indices'], dim=1),
188 | dim=2,
189 | dim_size=voxels['num_voxels'])
190 | batch_size = points.size(0)
191 | voxel_feature = voxel_feature.view(batch_size, -1, voxels['grid_size'][0], voxels['grid_size'][1])
192 | voxel_feature1 = self.block1(voxel_feature)
193 | voxel_feature2 = self.block2(voxel_feature1)
194 | voxel_feature3 = self.block3(voxel_feature2)
195 | voxel_feature1 = self.up1(voxel_feature1)
196 | voxel_feature2 = self.up2(voxel_feature2)
197 | voxel_feature3 = self.up3(voxel_feature3)
198 | voxel_feature = torch.cat([voxel_feature1, voxel_feature2, voxel_feature3], dim=1)
199 | return self.conv_out(voxel_feature).transpose(3, 2)
200 |
--------------------------------------------------------------------------------
/model/utils/position_encoding.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | """
3 | Various positional encodings for the transformer.
4 | """
5 | import math
6 | import torch
7 | from torch import nn
8 |
9 | from .misc import NestedTensor
10 |
11 |
12 | class PositionEmbeddingSine(nn.Module):
13 | """
14 | This is a more standard version of the position embedding, very similar to the one
15 | used by the Attention is all you need paper, generalized to work on images.
16 | """
17 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
18 | super().__init__()
19 | self.num_pos_feats = num_pos_feats
20 | self.temperature = temperature
21 | self.normalize = normalize
22 | if scale is not None and normalize is False:
23 | raise ValueError("normalize should be True if scale is passed")
24 | if scale is None:
25 | scale = 2 * math.pi
26 | self.scale = scale
27 |
28 | def forward(self, x, mask):
29 |
30 | assert mask is not None
31 | not_mask = ~mask
32 | y_embed = not_mask.cumsum(1, dtype=torch.float32)
33 | x_embed = not_mask.cumsum(2, dtype=torch.float32)
34 | if self.normalize:
35 | eps = 1e-6
36 | y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
37 | x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
38 |
39 | dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
40 | dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
41 |
42 | pos_x = x_embed[:, :, :, None] / dim_t
43 | pos_y = y_embed[:, :, :, None] / dim_t
44 | pos_x = torch.stack((pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4).flatten(3)
45 | pos_y = torch.stack((pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4).flatten(3)
46 | pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
47 | return pos
48 |
49 |
50 | class PositionEmbeddingLearned(nn.Module):
51 | """
52 | Absolute pos embedding, learned.
53 | """
54 | def __init__(self, num_pos_feats=256):
55 | super().__init__()
56 | self.row_embed = nn.Embedding(50, num_pos_feats)
57 | self.col_embed = nn.Embedding(50, num_pos_feats)
58 | self.reset_parameters()
59 |
60 | def reset_parameters(self):
61 | nn.init.uniform_(self.row_embed.weight)
62 | nn.init.uniform_(self.col_embed.weight)
63 |
64 | def forward(self, x):
65 | # x = tensor_list.tensors
66 | h, w = x.shape[-2:]
67 | i = torch.arange(w, device=x.device)
68 | j = torch.arange(h, device=x.device)
69 | x_emb = self.col_embed(i)
70 | y_emb = self.row_embed(j)
71 | pos = torch.cat([
72 | x_emb.unsqueeze(0).repeat(h, 1, 1),
73 | y_emb.unsqueeze(1).repeat(1, w, 1),
74 | ], dim=-1).permute(2, 0, 1).unsqueeze(0).repeat(x.shape[0], 1, 1, 1)
75 | return pos
76 |
77 |
78 | def build_position_encoding(args):
79 | N_steps = args.hidden_dim // 2
80 | if args.position_embedding in ('v2', 'sine'):
81 | # TODO find a better way of exposing other arguments
82 | position_embedding = PositionEmbeddingSine(N_steps, normalize=True)
83 | elif args.position_embedding in ('v3', 'learned'):
84 | position_embedding = PositionEmbeddingLearned(N_steps)
85 | else:
86 | raise ValueError(f"not supported {args.position_embedding}")
87 |
88 | return position_embedding
89 |
--------------------------------------------------------------------------------
/model/utils/sdmap_cross_attn.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | """
3 | DETR Transformer class.
4 |
5 | Copy-paste from torch.nn.Transformer with modifications:
6 | * positional encodings are passed in MHattention
7 | * extra LN at the end of encoder is removed
8 | * decoder returns a stack of activations from all decoding layers
9 | """
10 | import copy
11 | from typing import Optional, List
12 |
13 | import torch
14 | import torch.nn.functional as F
15 | from torch import nn, Tensor
16 |
17 |
18 | class SDMapCrossAttn(nn.Module):
19 |
20 | def __init__(self, d_model=256, nhead=8, num_encoder_layers=2,
21 | num_decoder_layers=2, dim_feedforward=192, dropout=0.1,
22 | activation="relu", normalize_before=False,
23 | return_intermediate_dec=False):
24 | super().__init__()
25 |
26 | self.return_intermediate = return_intermediate_dec
27 | self.norm = nn.LayerNorm(d_model)
28 |
29 |
30 |
31 | decoder_layer = SDMapCrossAttnLayer(d_model, nhead, dim_feedforward,
32 | dropout, activation)
33 |
34 | self.layers = _get_clones(decoder_layer, num_decoder_layers)
35 | self.num_layers = num_decoder_layers
36 |
37 | self._reset_parameters()
38 |
39 | self.d_model = d_model
40 | self.nhead = nhead
41 |
42 | def _reset_parameters(self):
43 | for p in self.parameters():
44 | if p.dim() > 1:
45 | nn.init.xavier_uniform_(p)
46 |
47 |
48 | def forward(self, bev, sdmap,
49 | tgt_mask: Optional[Tensor] = None,
50 | memory_mask: Optional[Tensor] = None,
51 | tgt_key_padding_mask: Optional[Tensor] = None,
52 | memory_key_padding_mask: Optional[Tensor] = None,
53 | pos: Optional[Tensor] = None,
54 | query_pos: Optional[Tensor] = None):
55 |
56 | assert bev.shape == sdmap.shape
57 | bs, c, h, w = bev.shape
58 | bev = bev.flatten(2).permute(2, 0, 1)
59 | sdmap = sdmap.flatten(2).permute(2, 0, 1)
60 | pos = pos.flatten(2).permute(2, 0, 1)
61 |
62 | output = bev
63 |
64 | intermediate = []
65 |
66 | for layer in self.layers:
67 | output = layer(output, sdmap, tgt_mask=tgt_mask,
68 | memory_mask=memory_mask,
69 | tgt_key_padding_mask=tgt_key_padding_mask,
70 | memory_key_padding_mask=memory_key_padding_mask,
71 | pos=pos, query_pos=query_pos)
72 | if self.return_intermediate:
73 | intermediate.append(self.norm(output))
74 |
75 | if self.norm is not None:
76 | output = self.norm(output)
77 | if self.return_intermediate:
78 | intermediate.pop()
79 | intermediate.append(output)
80 |
81 | if self.return_intermediate:
82 | return torch.stack(intermediate)
83 |
84 | bew_feat = output.view(h,w,bs,c).permute(2,3,0,1)
85 |
86 | return bew_feat.unsqueeze(0)
87 |
88 |
89 | class SDMapCrossAttnLayer(nn.Module):
90 | def __init__(self, d_model, nhead, dim_feedforward=192, dropout=0.1,
91 | activation="relu"):
92 | super().__init__()
93 | self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
94 | self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
95 | # Implementation of Feedforward model
96 | self.linear1 = nn.Linear(d_model, dim_feedforward)
97 | self.dropout = nn.Dropout(dropout)
98 | self.linear2 = nn.Linear(dim_feedforward, d_model)
99 |
100 | self.norm1 = nn.LayerNorm(d_model)
101 | self.norm2 = nn.LayerNorm(d_model)
102 | self.norm3 = nn.LayerNorm(d_model)
103 | self.dropout1 = nn.Dropout(dropout)
104 | self.dropout2 = nn.Dropout(dropout)
105 | self.dropout3 = nn.Dropout(dropout)
106 |
107 | self.activation = _get_activation_fn(activation)
108 |
109 | def with_pos_embed(self, tensor, pos: Optional[Tensor]):
110 | return tensor if pos is None else tensor + pos
111 |
112 | def forward_post(self, bev, sdmap,
113 | bev_mask: Optional[Tensor] = None,
114 | sdmap_mask: Optional[Tensor] = None,
115 | tgt_key_padding_mask: Optional[Tensor] = None,
116 | sdmap_key_padding_mask: Optional[Tensor] = None,
117 | pos: Optional[Tensor] = None,
118 | query_pos: Optional[Tensor] = None):
119 |
120 | q = k = self.with_pos_embed(bev, pos)
121 | bev2 = self.self_attn(q, k, value=bev, attn_mask=sdmap_mask,
122 | key_padding_mask=tgt_key_padding_mask)[0]
123 | bev = bev + self.dropout1(bev2)
124 | bev = self.norm1(bev)
125 |
126 | bev2 = self.multihead_attn(query=self.with_pos_embed(bev, pos),
127 | key=self.with_pos_embed(sdmap, pos),
128 | # key=sdmap,
129 | value=sdmap, attn_mask=sdmap_mask,
130 | key_padding_mask=sdmap_key_padding_mask)[0]
131 | bev = bev + self.dropout2(bev2)
132 | bev = self.norm2(bev)
133 |
134 | bev2 = self.linear2(self.dropout(self.activation(self.linear1(bev))))
135 | bev = bev + self.dropout3(bev2)
136 | bev = self.norm3(bev)
137 |
138 | return bev
139 |
140 |
141 | def forward(self, tgt, memory,
142 | tgt_mask: Optional[Tensor] = None,
143 | memory_mask: Optional[Tensor] = None,
144 | tgt_key_padding_mask: Optional[Tensor] = None,
145 | memory_key_padding_mask: Optional[Tensor] = None,
146 | pos: Optional[Tensor] = None,
147 | query_pos: Optional[Tensor] = None):
148 |
149 | return self.forward_post(tgt, memory, tgt_mask, memory_mask,
150 | tgt_key_padding_mask, memory_key_padding_mask, pos, query_pos)
151 |
152 |
153 | def _get_clones(module, N):
154 | return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
155 |
156 |
157 | def build_transformer(args):
158 | return SDMapCrossAttn(
159 | d_model=args.hidden_dim,
160 | dropout=args.dropout,
161 | nhead=args.nheads,
162 | dim_feedforward=args.dim_feedforward,
163 | num_encoder_layers=args.enc_layers,
164 | num_decoder_layers=args.dec_layers,
165 | normalize_before=args.pre_norm,
166 | return_intermediate_dec=True,
167 | )
168 |
169 |
170 | def _get_activation_fn(activation):
171 | """Return an activation function given a string"""
172 | if activation == "relu":
173 | return F.relu
174 | if activation == "gelu":
175 | return F.gelu
176 | if activation == "glu":
177 | return F.glu
178 | raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
--------------------------------------------------------------------------------
/model/utils/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | def plane_grid_2d(xbound, ybound):
4 | xmin, xmax = xbound[0], xbound[1]
5 | num_x = int((xbound[1] - xbound[0]) / xbound[2])
6 | ymin, ymax = ybound[0], ybound[1]
7 | num_y = int((ybound[1] - ybound[0]) / ybound[2])
8 |
9 | y = torch.linspace(xmin, xmax, num_x).cuda()
10 | x = torch.linspace(ymin, ymax, num_y).cuda()
11 | y, x = torch.meshgrid(x, y)
12 | x = x.flatten()
13 | y = y.flatten()
14 |
15 | coords = torch.stack([x, y], axis=0)
16 | return coords
17 |
18 |
19 | def cam_to_pixel(points, xbound, ybound):
20 | new_points = torch.zeros_like(points)
21 | new_points[..., 0] = (points[..., 0] - xbound[0]) / xbound[2]
22 | new_points[..., 1] = (points[..., 1] - ybound[0]) / ybound[2]
23 | return new_points
24 |
25 |
26 | def get_rot_2d(yaw):
27 | sin_yaw = torch.sin(yaw)
28 | cos_yaw = torch.cos(yaw)
29 | rot = torch.zeros(list(yaw.shape) + [2, 2]).cuda()
30 | rot[..., 0, 0] = cos_yaw
31 | rot[..., 0, 1] = sin_yaw
32 | rot[..., 1, 0] = -sin_yaw
33 | rot[..., 1, 1] = cos_yaw
34 | return rot
35 |
36 |
37 |
--------------------------------------------------------------------------------
/model/utils/voxel.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch_scatter
4 |
5 |
6 | def pad_or_trim_to_np(x, shape, pad_val=0):
7 | shape = np.asarray(shape)
8 | pad = shape - np.minimum(np.shape(x), shape)
9 | zeros = np.zeros_like(pad)
10 | x = np.pad(x, np.stack([zeros, pad], axis=1), constant_values=pad_val)
11 | return x[:shape[0], :shape[1]]
12 |
13 |
14 | def raval_index(coords, dims):
15 | dims = torch.cat((dims, torch.ones(1, device=dims.device)), dim=0)[1:]
16 | dims = torch.flip(dims, dims=[0])
17 | dims = torch.cumprod(dims, dim=0) / dims[0]
18 | multiplier = torch.flip(dims, dims=[0])
19 | indices = torch.sum(coords * multiplier, dim=1)
20 | return indices
21 |
22 |
23 | def points_to_voxels(
24 | points_xyz,
25 | points_mask,
26 | grid_range_x,
27 | grid_range_y,
28 | grid_range_z
29 | ):
30 | batch_size, num_points, _ = points_xyz.shape
31 | voxel_size_x = grid_range_x[2]
32 | voxel_size_y = grid_range_y[2]
33 | voxel_size_z = grid_range_z[2]
34 | grid_size = np.asarray([
35 | (grid_range_x[1]-grid_range_x[0]) / voxel_size_x,
36 | (grid_range_y[1]-grid_range_y[0]) / voxel_size_y,
37 | (grid_range_z[1]-grid_range_z[0]) / voxel_size_z
38 | ]).astype('int32')
39 | voxel_size = np.asarray([voxel_size_x, voxel_size_y, voxel_size_z])
40 | voxel_size = torch.Tensor(voxel_size).to(points_xyz.device)
41 | num_voxels = grid_size[0] * grid_size[1] * grid_size[2]
42 | grid_offset = torch.Tensor([grid_range_x[0], grid_range_y[0], grid_range_z[0]]).to(points_xyz.device)
43 | shifted_points_xyz = points_xyz - grid_offset
44 | voxel_xyz = shifted_points_xyz / voxel_size
45 | voxel_coords = voxel_xyz.int()
46 | grid_size = torch.from_numpy(grid_size).to(points_xyz.device)
47 | grid_size = grid_size.int()
48 | zeros = torch.zeros_like(grid_size)
49 | voxel_paddings = ((points_mask < 1.0) |
50 | torch.any((voxel_coords >= grid_size) |
51 | (voxel_coords < zeros), dim=-1))
52 | voxel_indices = raval_index(
53 | torch.reshape(voxel_coords, [batch_size * num_points, 3]), grid_size)
54 | voxel_indices = torch.reshape(voxel_indices, [batch_size, num_points])
55 | voxel_indices = torch.where(voxel_paddings,
56 | torch.zeros_like(voxel_indices),
57 | voxel_indices)
58 | voxel_centers = ((0.5 + voxel_coords.float()) * voxel_size + grid_offset)
59 | voxel_coords = torch.where(torch.unsqueeze(voxel_paddings, dim=-1),
60 | torch.zeros_like(voxel_coords),
61 | voxel_coords)
62 | voxel_xyz = torch.where(torch.unsqueeze(voxel_paddings, dim=-1),
63 | torch.zeros_like(voxel_xyz),
64 | voxel_xyz)
65 | voxel_paddings = voxel_paddings.float()
66 |
67 | voxel_indices = voxel_indices.long()
68 | points_per_voxel = torch_scatter.scatter_sum(
69 | torch.ones((batch_size, num_points), dtype=voxel_coords.dtype, device=voxel_coords.device) * (1-voxel_paddings),
70 | voxel_indices,
71 | dim=1,
72 | dim_size=num_voxels
73 | )
74 |
75 | voxel_point_count = torch.gather(points_per_voxel,
76 | dim=1,
77 | index=voxel_indices)
78 |
79 |
80 | voxel_centroids = torch_scatter.scatter_mean(
81 | points_xyz,
82 | voxel_indices,
83 | dim=1,
84 | dim_size=num_voxels)
85 | point_centroids = torch.gather(voxel_centroids, dim=1, index=torch.unsqueeze(voxel_indices, dim=-1).repeat(1, 1, 3))
86 | local_points_xyz = points_xyz - point_centroids
87 |
88 | result = {
89 | 'local_points_xyz': local_points_xyz,
90 | 'shifted_points_xyz': shifted_points_xyz,
91 | 'point_centroids': point_centroids,
92 | 'points_xyz': points_xyz,
93 | 'grid_offset': grid_offset,
94 | 'voxel_coords': voxel_coords,
95 | 'voxel_centers': voxel_centers,
96 | 'voxel_indices': voxel_indices,
97 | 'voxel_paddings': voxel_paddings,
98 | 'points_mask': 1 - voxel_paddings,
99 | 'num_voxels': num_voxels,
100 | 'grid_size': grid_size,
101 | 'voxel_xyz': voxel_xyz,
102 | 'voxel_size': voxel_size,
103 | 'voxel_point_count': voxel_point_count,
104 | 'points_per_voxel': points_per_voxel
105 | }
106 |
107 |
108 | return result
109 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==2.0.0
2 | addict==2.4.0
3 | attrs==23.1.0
4 | black==23.11.0
5 | cachetools==5.3.2
6 | certifi==2023.11.17
7 | charset-normalizer==3.3.2
8 | click==8.1.7
9 | click-plugins==1.1.1
10 | cligj==0.7.2
11 | contourpy==1.1.1
12 | cycler==0.12.1
13 | descartes==1.1.0
14 | efficientnet-pytorch==0.7.1
15 | exceptiongroup==1.2.0
16 | filelock==3.13.1
17 | fiona==1.9.5
18 | fire==0.5.0
19 | flake8==6.1.0
20 | fonttools==4.45.1
21 | fsspec==2023.12.1
22 | geopandas==0.13.2
23 | google-auth==2.23.4
24 | google-auth-oauthlib==1.0.0
25 | grpcio==1.59.3
26 | huggingface-hub==0.19.4
27 | idna==3.6
28 | imageio==2.33.0
29 | importlib-metadata==6.8.0
30 | importlib-resources==6.1.1
31 | iniconfig==2.0.0
32 | ipython==8.12.2
33 | jedi==0.19.1
34 | joblib==1.3.2
35 | kiwisolver==1.4.5
36 | llvmlite==0.36.0
37 | lyft-dataset-sdk==0.0.8
38 | markdown==3.5.1
39 | markupsafe==2.1.3
40 | matplotlib==3.5.3
41 | mccabe==0.7.0
42 | mmcls==0.25.0
43 | mmcv-full==1.6.0
44 | mmdet==2.28.2
45 | mmsegmentation==0.30.0
46 | mypy-extensions==1.0.0
47 | networkx==2.2
48 | numba==0.53.0
49 | numpy==1.23.5
50 | nuscenes-devkit==1.1.11
51 | nvidia-ml-py==12.535.133
52 | nvitop==1.3.1
53 | oauthlib==3.2.2
54 | opencv-python==4.8.1.78
55 | packaging==23.2
56 | pandas==2.0.3
57 | pathspec==0.11.2
58 | pillow==10.1.0
59 | platformdirs==4.0.0
60 | plotly==5.18.0
61 | pluggy==1.3.0
62 | plyfile==1.0.2
63 | prettytable==3.9.0
64 | prompt-toolkit==3.0.41
65 | protobuf==4.25.1
66 | psutil==5.9.6
67 | pyasn1==0.5.1
68 | pyasn1-modules==0.3.0
69 | pycocotools==2.0.7
70 | pycodestyle==2.11.1
71 | pyflakes==3.1.0
72 | pyparsing==3.1.1
73 | pyproj==3.5.0
74 | pyquaternion==0.9.9
75 | pytest==7.4.3
76 | python-dateutil==2.8.2
77 | pytz==2023.3.post1
78 | pywavelets==1.4.1
79 | pyyaml==6.0.1
80 | requests==2.31.0
81 | requests-oauthlib==1.3.1
82 | rsa==4.9
83 | safetensors==0.4.1
84 | scikit-image==0.19.3
85 | scikit-learn==1.3.2
86 | scipy==1.10.1
87 | shapely==1.8.5.post1
88 | some-package==0.1
89 | tenacity==8.2.3
90 | tensorboard==2.13.0
91 | tensorboard-data-server==0.7.2
92 | tensorboardx==2.6.2.2
93 | termcolor==2.3.0
94 | terminaltables==3.1.10
95 | threadpoolctl==3.2.0
96 | tifffile==2023.7.10
97 | timm==0.9.12
98 | tomli==2.0.1
99 | torch-scatter==2.0.9
100 | tqdm==4.66.1
101 | trimesh==2.35.39
102 | tzdata==2023.3
103 | urllib3==2.1.0
104 | wcwidth==0.2.12
105 | werkzeug==3.0.1
106 | yapf==0.40.2
107 | zipp==3.17.0
--------------------------------------------------------------------------------
/tools/eval.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import tqdm
3 | import os
4 | import sys
5 | currentPath = os.path.split(os.path.realpath(__file__))[0]
6 | sys.path.append(currentPath + '/..')
7 | import torch
8 | from tools.config import Config
9 | from tools.evaluation.iou import get_batch_iou
10 | from tools.evaluation import lpips
11 | from data_osm.dataset import semantic_dataset
12 | from data_osm.const import NUM_CLASSES
13 | from model import get_model
14 | from tools.postprocess.vectorize import vectorize
15 | from collections import OrderedDict
16 | import matplotlib.pyplot as plt
17 | import numpy as np
18 | import warnings
19 | warnings.filterwarnings("ignore")
20 |
21 | def onehot_encoding(logits, dim=1):
22 | max_idx = torch.argmax(logits, dim, keepdim=True)
23 | one_hot = logits.new_full(logits.shape, 0)
24 | one_hot.scatter_(dim, max_idx, 1)
25 | return one_hot
26 |
27 | # eval only pre-train mae
28 | def eval_pretrain(bevencode_bd, val_loader):
29 | bevencode_bd.eval()
30 | total_intersects = 0
31 | total_union = 0
32 |
33 | with torch.no_grad():
34 | total_epe = 0
35 | index = 0
36 | for (imgs, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans,
37 | yaw_pitch_roll, semantic_gt, instance_gt, direction_gt,osm_masks,
38 | osm_vectors, masked_map, timestamp, scene_id) in tqdm.tqdm(val_loader):
39 |
40 | semantic, embedding, direction = bevencode_bd(masked_map.cuda().float())
41 | semantic_gt = semantic_gt.cuda().float()
42 | intersects, union = get_batch_iou(onehot_encoding(semantic.cuda()), semantic_gt)
43 | total_intersects += intersects
44 | total_union += union
45 | index = index + 1
46 | return total_intersects / (total_union + 1e-7)
47 |
48 |
49 | def eval_iou(model, val_loader):
50 | model.eval()
51 | total_intersects = 0
52 | total_union = 0
53 | with torch.no_grad():
54 | for (imgs, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans,
55 | yaw_pitch_roll, semantic_gt, instance_gt, direction_gt,osm_masks,
56 | osm_vectors, masked_map, timestamp, scene_id) in tqdm.tqdm(val_loader):
57 |
58 | semantic, embedding, direction = model(imgs.cuda(), trans.cuda(), rots.cuda(), intrins.cuda(),
59 | post_trans.cuda(), post_rots.cuda(), lidar_data.cuda(),
60 | lidar_mask.cuda(), car_trans.cuda(), yaw_pitch_roll.cuda(), osm_masks.float().cuda())
61 |
62 | semantic_gt = semantic_gt.cuda().float()
63 | device = semantic_gt.device
64 | if semantic.device != device:
65 | semantic = semantic.to(device)
66 | embedding = embedding.to(device)
67 | direction = direction.to(device)
68 |
69 | intersects, union = get_batch_iou(onehot_encoding(semantic), semantic_gt)
70 | total_intersects += intersects
71 | total_union += union
72 | return total_intersects / (total_union + 1e-7)
73 |
74 |
75 | def eval_all(model, val_loader):
76 | model.eval()
77 | total_intersects = 0
78 | total_union = 0
79 | i=0
80 | lpipss1 = []
81 | with torch.no_grad():
82 | for imgs, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans, yaw_pitch_roll, semantic_gt, instance_gt, direction_gt,osm_masks, osm_vectors, masks_bd_osm, mask_bd, timestamp, scene_ids in tqdm.tqdm(val_loader):
83 |
84 |
85 | semantic, embedding, direction = model(imgs.cuda(), trans.cuda(), rots.cuda(), intrins.cuda(),
86 | post_trans.cuda(), post_rots.cuda(), lidar_data.cuda(),
87 | lidar_mask.cuda(), car_trans.cuda(), yaw_pitch_roll.cuda(), osm_masks.float().cuda())
88 |
89 | gt = semantic_gt[:,1:4,:,:].clone().cuda()
90 | pred = semantic[:,1:4,:,:].clone().cuda()
91 |
92 | lpipss1.append(lpips(pred.float(), gt.float(), net_type='alex') / pred.shape[0])
93 |
94 | semantic_gt = semantic_gt.cuda().float()
95 |
96 | device = semantic_gt.device
97 | if semantic.device != device:
98 | semantic = semantic.to(device)
99 | intersects, union = get_batch_iou(onehot_encoding(semantic), semantic_gt)
100 | total_intersects += intersects
101 | total_union += union
102 | i+=1
103 | print(" LPIPS1: {:>12.7f}".format(torch.tensor(lpipss1).mean(), ".5"))
104 | print(" IOU: {:>12.7f}".format((total_intersects / (total_union + 1e-7))), ".5")
105 | # return (total_intersects / (total_union + 1e-7))
106 |
107 |
108 | def main(args):
109 | data_conf = {
110 | 'num_channels': NUM_CLASSES + 1,
111 | 'image_size': cfg.image_size,
112 | 'xbound': cfg.xbound,
113 | 'ybound': cfg.ybound,
114 | 'zbound': cfg.zbound,
115 | 'dbound': cfg.dbound,
116 | 'thickness': cfg.thickness,
117 | 'angle_class': cfg.angle_class,
118 | 'patch_w': cfg.patch_w,
119 | 'patch_h': cfg.patch_h,
120 | 'mask_ratio': cfg.mask_ratio,
121 | 'mask_flag': cfg.mask_flag,
122 | 'sd_map_path': cfg.sd_map_path,
123 | }
124 |
125 | train_loader, val_loader = semantic_dataset(args, args.version, args.dataroot, data_conf,
126 | args.batch_size, args.nworkers, cfg.dataset)
127 | model = get_model(args, data_conf, args.instance_seg, args.embedding_dim, args.direction_pred, args.angle_class)
128 |
129 | state_dict_model = torch.load(args.modelf)
130 | new_state_dict = OrderedDict()
131 | for k, v in state_dict_model.items():
132 | name = k[7:]
133 | new_state_dict[name] = v
134 | model.load_state_dict(new_state_dict)
135 | model.cuda()
136 |
137 | if "pretrain" in str(args.config):
138 | print(eval_pretrain(model, val_loader))
139 | else:
140 | print(eval_iou(model, val_loader))
141 |
142 |
143 | if __name__ == '__main__':
144 | parser = argparse.ArgumentParser(description='Evaluate HDMap Construction Results..')
145 | parser.add_argument("config", help = 'path to config file', type=str, default=None)
146 | args = parser.parse_args()
147 | cfg = Config.fromfile(args.config)
148 |
149 | main(cfg)
150 |
--------------------------------------------------------------------------------
/tools/evaluate_json.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import tqdm
3 | import argparse
4 | from config import Config
5 | import sys
6 | import os
7 | currentPath = os.path.split(os.path.realpath(__file__))[0]
8 | sys.path.append(currentPath + '/..')
9 | from tools.evaluation.dataset import PMapNetEvalDataset
10 | from tools.evaluation.chamfer_distance import semantic_mask_chamfer_dist_cum
11 | from tools.evaluation.AP import instance_mask_AP
12 | from tools.evaluation.iou import get_batch_iou
13 |
14 | SAMPLED_RECALLS = torch.linspace(0.1, 1, 10)
15 | # THRESHOLDS = [0.2, 0.5, 1.0]
16 | THRESHOLDS = [0.5, 1.0, 1.5]
17 |
18 | def get_val_info(args):
19 | data_conf = {
20 | 'xbound': args.xbound,
21 | 'ybound': args.ybound,
22 | 'thickness': args.thickness,
23 | 'sd_map_path': args.sd_map_path
24 | }
25 |
26 | dataset = PMapNetEvalDataset(
27 | args.version, args.dataroot, 'val', args.result_path, data_conf)
28 |
29 | data_loader = torch.utils.data.DataLoader(
30 | dataset, batch_size=args.batch_size, shuffle=False, drop_last=False)
31 |
32 | total_CD1 = torch.zeros(args.max_channel).cuda()
33 | total_CD2 = torch.zeros(args.max_channel).cuda()
34 | total_CD_num1 = torch.zeros(args.max_channel).cuda()
35 | total_CD_num2 = torch.zeros(args.max_channel).cuda()
36 | total_intersect = torch.zeros(args.max_channel).cuda()
37 | total_union = torch.zeros(args.max_channel).cuda()
38 | AP_matrix = torch.zeros((args.max_channel, len(THRESHOLDS))).cuda()
39 | AP_count_matrix = torch.zeros((args.max_channel, len(THRESHOLDS))).cuda()
40 |
41 |
42 | print('running eval...')
43 | for pred_map, confidence_level, gt_map in tqdm.tqdm(data_loader):
44 |
45 | pred_map = pred_map.cuda()
46 | confidence_level = confidence_level.cuda()
47 | gt_map = gt_map.cuda()
48 |
49 |
50 | intersect, union = get_batch_iou(pred_map, gt_map)
51 | CD1, CD2, num1, num2 = semantic_mask_chamfer_dist_cum(
52 | pred_map, gt_map, args.xbound[2], args.ybound[2], threshold=args.CD_threshold)
53 |
54 | instance_mask_AP(AP_matrix, AP_count_matrix, pred_map, gt_map, args.xbound[2], args.ybound[2],
55 | confidence_level, THRESHOLDS, sampled_recalls=SAMPLED_RECALLS, bidirectional=args.bidirectional, threshold_iou=args.threshold_iou)
56 |
57 | total_intersect += intersect.cuda()
58 | total_union += union.cuda()
59 | total_CD1 += CD1
60 | total_CD2 += CD2
61 | total_CD_num1 += num1
62 | total_CD_num2 += num2
63 |
64 |
65 | CD_pred = total_CD1 / total_CD_num1
66 | CD_label = total_CD2 / total_CD_num2
67 | CD = (total_CD1 + total_CD2) / (total_CD_num1 +total_CD_num2)
68 | AP = AP_matrix / AP_count_matrix
69 |
70 | return {
71 | 'iou': total_intersect / total_union,
72 | 'CD_pred': CD_pred,
73 | 'CD_label': CD_label,
74 | 'CD': CD,
75 | 'AP': AP,
76 | }
77 |
78 |
79 | if __name__ == '__main__':
80 | parser = argparse.ArgumentParser(description='Evaluate Vectorized HDMap Construction Results.')
81 | parser.add_argument("config", help = 'path to config file', type=str, default=None)
82 |
83 | args = parser.parse_args()
84 | cfg = Config.fromfile(args.config)
85 |
86 | print(get_val_info(cfg))
87 |
88 |
--------------------------------------------------------------------------------
/tools/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | from .modules.lpips import LPIPS
4 |
5 | def lpips(x: torch.Tensor,
6 | y: torch.Tensor,
7 | net_type: str = 'alex',
8 | version: str = '0.1'):
9 | r"""Function that measures
10 | Learned Perceptual Image Patch Similarity (LPIPS).
11 |
12 | Arguments:
13 | x, y (torch.Tensor): the input tensors to compare.
14 | net_type (str): the network type to compare the features:
15 | 'alex' | 'squeeze' | 'vgg'. Default: 'alex'.
16 | version (str): the version of LPIPS. Default: 0.1.
17 | """
18 | device = x.device
19 | criterion = LPIPS(net_type, version).to(device)
20 | return criterion(x, y)
--------------------------------------------------------------------------------
/tools/evaluation/angle_diff.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def onehot_encoding_spread(logits, dim=1):
5 | max_idx = torch.argmax(logits, dim, keepdim=True)
6 | one_hot = logits.new_full(logits.shape, 0)
7 | one_hot.scatter_(dim, max_idx, 1)
8 | one_hot.scatter_(dim, torch.clamp(max_idx-1, min=0), 1)
9 | one_hot.scatter_(dim, torch.clamp(max_idx-2, min=0), 1)
10 | one_hot.scatter_(dim, torch.clamp(max_idx+1, max=logits.shape[dim]-1), 1)
11 | one_hot.scatter_(dim, torch.clamp(max_idx+2, max=logits.shape[dim]-1), 1)
12 |
13 | return one_hot
14 |
15 |
16 | def get_pred_top2_direction(direction, dim=1):
17 | direction = torch.softmax(direction, dim)
18 | idx1 = torch.argmax(direction, dim)
19 | idx1_onehot_spread = onehot_encoding_spread(direction, dim)
20 | idx1_onehot_spread = idx1_onehot_spread.bool()
21 | direction[idx1_onehot_spread] = 0
22 | idx2 = torch.argmax(direction, dim)
23 | direction = torch.stack([idx1, idx2], dim) - 1
24 | return direction
25 |
26 |
27 | def calc_angle_diff(pred_mask, gt_mask, angle_class):
28 | per_angle = float(360. / angle_class)
29 | eval_mask = 1 - gt_mask[:, 0]
30 | pred_direction = get_pred_top2_direction(pred_mask, dim=1).float()
31 | gt_direction = (torch.topk(gt_mask, 2, dim=1)[1] - 1).float()
32 |
33 | pred_direction *= per_angle
34 | gt_direction *= per_angle
35 | pred_direction = pred_direction[:, :, None, :, :].repeat(1, 1, 2, 1, 1)
36 | gt_direction = gt_direction[:, None, :, :, :].repeat(1, 2, 1, 1, 1)
37 | diff_mask = torch.abs(pred_direction - gt_direction)
38 | diff_mask = torch.min(diff_mask, 360 - diff_mask)
39 | diff_mask = torch.min(diff_mask[:, 0, 0] + diff_mask[:, 1, 1], diff_mask[:, 1, 0] + diff_mask[:, 0, 1]) / 2
40 | return ((eval_mask * diff_mask).sum() / (eval_mask.sum() + 1e-6)).item()
41 |
--------------------------------------------------------------------------------
/tools/evaluation/chamfer_distance.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def chamfer_distance(source_pc, target_pc, threshold, cum=False, bidirectional=True):
5 | dist = torch.cdist(source_pc.float(), target_pc.float())
6 | dist1, _ = torch.min(dist, 2)
7 | dist2, _ = torch.min(dist, 1)
8 | if cum:
9 | len1 = dist1.shape[-1]
10 | len2 = dist2.shape[-1]
11 | dist1 = dist1.sum(-1)
12 | dist2 = dist2.sum(-1)
13 | return dist1, dist2, len1, len2
14 | dist1 = dist1.mean(-1)
15 | dist2 = dist2.mean(-1)
16 | if bidirectional:
17 | return min((dist1 + dist2) / 2, threshold)
18 | else:
19 | #return min(dist1, threshold), min(dist2, threshold)
20 | return min(dist1, threshold)
21 |
22 |
23 | def semantic_mask_chamfer_dist_cum(seg_pred, seg_label, scale_x, scale_y, threshold):
24 | # seg_label: N, C, H, W
25 | # seg_pred: N, C, H, W
26 | N, C, H, W = seg_label.shape
27 |
28 | cum_CD1 = torch.zeros(C, device=seg_label.device)
29 | cum_CD2 = torch.zeros(C, device=seg_label.device)
30 | cum_num1 = torch.zeros(C, device=seg_label.device)
31 | cum_num2 = torch.zeros(C, device=seg_label.device)
32 | for n in range(N):
33 | for c in range(C):
34 | pred_pc_x, pred_pc_y = torch.where(seg_pred[n, c] != 0)
35 | label_pc_x, label_pc_y = torch.where(seg_label[n, c] != 0)
36 | pred_pc_x = pred_pc_x.float() * scale_x
37 | pred_pc_y = pred_pc_y.float() * scale_y
38 | label_pc_x = label_pc_x.float() * scale_x
39 | label_pc_y = label_pc_y.float() * scale_y
40 | if len(pred_pc_x) == 0 and len(label_pc_x) == 0:
41 | continue
42 |
43 | if len(label_pc_x) == 0:
44 | cum_CD1[c] += len(pred_pc_x) * threshold
45 | cum_num1[c] += len(pred_pc_x)
46 | continue
47 |
48 | if len(pred_pc_x) == 0:
49 | cum_CD2[c] += len(label_pc_x) * threshold
50 | cum_num2[c] += len(label_pc_x)
51 | continue
52 |
53 | pred_pc_coords = torch.stack([pred_pc_x, pred_pc_y], -1).float()
54 | label_pc_coords = torch.stack([label_pc_x, label_pc_y], -1).float()
55 | CD1, CD2, len1, len2 = chamfer_distance(pred_pc_coords[None], label_pc_coords[None], threshold=threshold, cum=True)
56 | cum_CD1[c] += CD1.item()
57 | cum_CD2[c] += CD2.item()
58 | cum_num1[c] += len1
59 | cum_num2[c] += len2
60 | return cum_CD1, cum_CD2, cum_num1, cum_num2
61 |
62 | def semantic_mask_turn_cal(seg_pred, seg_label, scale_x, scale_y, threshold):
63 | # seg_label: N, C, H, W
64 | # seg_pred: N, C, H, W
65 | N, C, H, W = seg_label.shape
66 | print("N: ", N)
67 | print("C: ", C)
68 |
69 | for n in range(N):
70 | for c in range(C):
71 | label_pc_x, label_pc_y = torch.where(seg_label[n, c] != 0)
72 | label_pc_x = label_pc_x.float() * scale_x
73 | label_pc_y = label_pc_y.float() * scale_y
74 | if len(label_pc_x) == 0:
75 | continue
76 |
77 | label_pc_coords = torch.stack([label_pc_x, label_pc_y], -1).float()
78 | print("label_pc_coords.shape: ", label_pc_coords.shape)
79 | print("label_pc_coords[0]: ", label_pc_coords[0])
80 | print("label_pc_coords[-1]: ", label_pc_coords[-1])
81 | # CD1, CD2, len1, len2 = chamfer_distance(pred_pc_coords[None], label_pc_coords[None], threshold=threshold, cum=True)
82 | # cum_CD1[c] += CD1.item()
83 | # cum_CD2[c] += CD2.item()
84 | # cum_num1[c] += len1
85 | # cum_num2[c] += len2
86 | # return cum_CD1, cum_CD2, cum_num1, cum_num2
87 | return None
--------------------------------------------------------------------------------
/tools/evaluation/dataset.py:
--------------------------------------------------------------------------------
1 | import json
2 | import numpy as np
3 |
4 | import torch
5 |
6 | from data_osm.dataset import PMapNetDataset
7 | from data_osm.rasterize import rasterize_map
8 | from data_osm.const import NUM_CLASSES
9 | from nuscenes.utils.splits import create_splits_scenes
10 |
11 |
12 | class PMapNetEvalDataset(PMapNetDataset):
13 | def __init__(self, version, dataroot, eval_set, result_path, data_conf, max_line_count=300):
14 | self.eval_set = eval_set
15 | super(PMapNetEvalDataset, self).__init__(version, dataroot, data_conf, is_train=False)
16 | with open(result_path, 'r') as f:
17 | self.prediction = json.load(f)
18 | self.max_line_count = max_line_count
19 | self.thickness = data_conf['thickness']
20 |
21 | def get_scenes(self, version, is_train):
22 | return create_splits_scenes()[self.eval_set]
23 |
24 | def __len__(self):
25 | return len(self.samples)
26 |
27 | def __getitem__(self, idx):
28 | rec = self.samples[idx]
29 | location = self.nusc.get('log', self.nusc.get('scene', rec['scene_token'])['log_token'])['location']
30 | ego_pose = self.nusc.get('ego_pose', self.nusc.get('sample_data', rec['data']['LIDAR_TOP'])['ego_pose_token'])
31 | gt_vectors, polygon_geom, osm_vectors = self.vector_map.gen_vectorized_samples(location, ego_pose['translation'], ego_pose['rotation'])
32 | # import pdb; pdb.set_trace()
33 | gt_map, _ = rasterize_map(gt_vectors, self.patch_size, self.canvas_size, NUM_CLASSES, self.thickness)
34 | if self.prediction['meta']['vector']:
35 | pred_vectors = self.prediction['results'][rec['token']]
36 | pred_map, confidence_level = rasterize_map(pred_vectors, self.patch_size, self.canvas_size, NUM_CLASSES, self.thickness)
37 | else:
38 | pred_map = np.array(self.prediction['results'][rec['token']]['map'])
39 | confidence_level = self.prediction['results'][rec['token']]['confidence_level']
40 |
41 | confidence_level = torch.tensor(confidence_level + [-1] * (self.max_line_count - len(confidence_level)))
42 |
43 | return pred_map, confidence_level, gt_map
44 |
--------------------------------------------------------------------------------
/tools/evaluation/iou.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def get_batch_iou(pred_map, gt_map):
5 | intersects = []
6 | unions = []
7 | with torch.no_grad():
8 | pred_map = pred_map.bool()
9 | gt_map = gt_map.bool()
10 |
11 | for i in range(pred_map.shape[1]):
12 | pred = pred_map[:, i]
13 | tgt = gt_map[:, i]
14 | # import pdb; pdb.set_trace()
15 | intersect = (pred & tgt).sum().float()
16 | union = (pred | tgt).sum().float()
17 | intersects.append(intersect)
18 | unions.append(union)
19 | return torch.tensor(intersects), torch.tensor(unions)
20 |
--------------------------------------------------------------------------------
/tools/evaluation/modules/lpips.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from .networks import get_network, LinLayers
5 | from .utils import get_state_dict
6 |
7 |
8 | class LPIPS(nn.Module):
9 | r"""Creates a criterion that measures
10 | Learned Perceptual Image Patch Similarity (LPIPS).
11 |
12 | Arguments:
13 | net_type (str): the network type to compare the features:
14 | 'alex' | 'squeeze' | 'vgg'. Default: 'alex'.
15 | version (str): the version of LPIPS. Default: 0.1.
16 | """
17 | def __init__(self, net_type: str = 'alex', version: str = '0.1'):
18 |
19 | assert version in ['0.1'], 'v0.1 is only supported now'
20 |
21 | super(LPIPS, self).__init__()
22 |
23 | # pretrained network
24 | self.net = get_network(net_type)
25 |
26 | # linear layers
27 | self.lin = LinLayers(self.net.n_channels_list)
28 | self.lin.load_state_dict(get_state_dict(net_type, version))
29 |
30 | def forward(self, x: torch.Tensor, y: torch.Tensor):
31 | feat_x, feat_y = self.net(x), self.net(y)
32 |
33 | diff = [(fx - fy) ** 2 for fx, fy in zip(feat_x, feat_y)]
34 | res = [l(d).mean((2, 3), True) for d, l in zip(diff, self.lin)]
35 |
36 | return torch.sum(torch.cat(res, 0), 0, True)
37 |
--------------------------------------------------------------------------------
/tools/evaluation/modules/networks.py:
--------------------------------------------------------------------------------
1 | from typing import Sequence
2 |
3 | from itertools import chain
4 |
5 | import torch
6 | import torch.nn as nn
7 | from torchvision import models
8 |
9 | from .utils import normalize_activation
10 |
11 |
12 | def get_network(net_type: str):
13 | if net_type == 'alex':
14 | return AlexNet()
15 | elif net_type == 'squeeze':
16 | return SqueezeNet()
17 | elif net_type == 'vgg':
18 | return VGG16()
19 | else:
20 | raise NotImplementedError('choose net_type from [alex, squeeze, vgg].')
21 |
22 |
23 | class LinLayers(nn.ModuleList):
24 | def __init__(self, n_channels_list: Sequence[int]):
25 | super(LinLayers, self).__init__([
26 | nn.Sequential(
27 | nn.Identity(),
28 | nn.Conv2d(nc, 1, 1, 1, 0, bias=False)
29 | ) for nc in n_channels_list
30 | ])
31 |
32 | for param in self.parameters():
33 | param.requires_grad = False
34 |
35 |
36 | class BaseNet(nn.Module):
37 | def __init__(self):
38 | super(BaseNet, self).__init__()
39 |
40 | # register buffer
41 | self.register_buffer(
42 | 'mean', torch.Tensor([-.030, -.088, -.188])[None, :, None, None])
43 | self.register_buffer(
44 | 'std', torch.Tensor([.458, .448, .450])[None, :, None, None])
45 |
46 | def set_requires_grad(self, state: bool):
47 | for param in chain(self.parameters(), self.buffers()):
48 | param.requires_grad = state
49 |
50 | def z_score(self, x: torch.Tensor):
51 | return (x - self.mean) / self.std
52 |
53 | def forward(self, x: torch.Tensor):
54 | x = self.z_score(x)
55 |
56 | output = []
57 | for i, (_, layer) in enumerate(self.layers._modules.items(), 1):
58 | x = layer(x)
59 | if i in self.target_layers:
60 | output.append(normalize_activation(x))
61 | if len(output) == len(self.target_layers):
62 | break
63 | return output
64 |
65 |
66 | class SqueezeNet(BaseNet):
67 | def __init__(self):
68 | super(SqueezeNet, self).__init__()
69 |
70 | self.layers = models.squeezenet1_1(True).features
71 | self.target_layers = [2, 5, 8, 10, 11, 12, 13]
72 | self.n_channels_list = [64, 128, 256, 384, 384, 512, 512]
73 |
74 | self.set_requires_grad(False)
75 |
76 |
77 | class AlexNet(BaseNet):
78 | def __init__(self):
79 | super(AlexNet, self).__init__()
80 |
81 | self.layers = models.alexnet(True).features
82 | self.target_layers = [2, 5, 8, 10, 12]
83 | self.n_channels_list = [64, 192, 384, 256, 256]
84 |
85 | self.set_requires_grad(False)
86 |
87 |
88 | class VGG16(BaseNet):
89 | def __init__(self):
90 | super(VGG16, self).__init__()
91 |
92 | self.layers = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1).features
93 | self.target_layers = [4, 9, 16, 23, 30]
94 | self.n_channels_list = [64, 128, 256, 512, 512]
95 |
96 | self.set_requires_grad(False)
97 |
--------------------------------------------------------------------------------
/tools/evaluation/modules/utils.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import torch
4 |
5 |
6 | def normalize_activation(x, eps=1e-10):
7 | norm_factor = torch.sqrt(torch.sum(x ** 2, dim=1, keepdim=True))
8 | return x / (norm_factor + eps)
9 |
10 |
11 | def get_state_dict(net_type: str = 'alex', version: str = '0.1'):
12 | # build url
13 | url = 'https://raw.githubusercontent.com/richzhang/PerceptualSimilarity/' \
14 | + f'master/lpips/weights/v{version}/{net_type}.pth'
15 |
16 | # download
17 | old_state_dict = torch.hub.load_state_dict_from_url(
18 | url, progress=True,
19 | map_location=None if torch.cuda.is_available() else torch.device('cpu')
20 | )
21 |
22 | # rename keys
23 | new_state_dict = OrderedDict()
24 | for key, val in old_state_dict.items():
25 | new_key = key
26 | new_key = new_key.replace('lin', '')
27 | new_key = new_key.replace('model.', '')
28 | new_state_dict[new_key] = val
29 |
30 | return new_state_dict
31 |
--------------------------------------------------------------------------------
/tools/export_json.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import tqdm
3 | import torch
4 | import mmcv
5 | from config import Config
6 | import sys
7 | import os
8 | currentPath = os.path.split(os.path.realpath(__file__))[0]
9 | sys.path.append(currentPath + '/..')
10 | from data_osm.dataset import semantic_dataset
11 | from data_osm.const import NUM_CLASSES
12 | from model import get_model
13 | from postprocess.vectorize import vectorize
14 | from collections import OrderedDict
15 | from tools.evaluation.iou import get_batch_iou
16 | import warnings
17 | warnings.filterwarnings("ignore")
18 | import matplotlib.pyplot as plt
19 | import os
20 | from PIL import Image
21 |
22 |
23 |
24 | def gen_dx_bx(xbound, ybound):
25 | dx = [row[2] for row in [xbound, ybound]]
26 | bx = [row[0] + row[2] / 2.0 for row in [xbound, ybound]]
27 | nx = [(row[1] - row[0]) / row[2] for row in [xbound, ybound]]
28 | return dx, bx, nx
29 | def onehot_encoding(logits, dim=1):
30 | max_idx = torch.argmax(logits, dim, keepdim=True)
31 | one_hot = logits.new_full(logits.shape, 0)
32 | one_hot.scatter_(dim, max_idx, 1)
33 | return one_hot
34 |
35 | def export_to_json(model, val_loader, angle_class, args):
36 | submission = {
37 | "meta": {
38 | "use_camera": True,
39 | "use_lidar": False,
40 | "use_radar": False,
41 | "use_external": False,
42 | "vector": True,
43 | },
44 | "results": {}
45 | } # todo: add mode
46 |
47 | dx, bx, nx = gen_dx_bx(args.xbound, args.ybound)
48 | count = 0
49 | model.eval()
50 | with torch.no_grad():
51 | for batchi, (imgs, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans,
52 | yaw_pitch_roll, semantic_gt, instance_gt, direction_gt, osm_masks, osm_vectors, masked_map, timestamp,scene_id) in enumerate(tqdm.tqdm(val_loader)):
53 |
54 | segmentation, embedding, direction = model(imgs.cuda(), trans.cuda(), rots.cuda(), intrins.cuda(),
55 | post_trans.cuda(), post_rots.cuda(), lidar_data.cuda(),
56 | lidar_mask.cuda(), car_trans.cuda(), yaw_pitch_roll.cuda(), osm_masks.float().cuda())
57 |
58 | for si in range(segmentation.shape[0]):
59 | coords, confidences, line_types = vectorize(segmentation[si], embedding[si], direction[si], angle_class)
60 | count += 1
61 | vectors = []
62 | for coord, confidence, line_type in zip(coords, confidences, line_types):
63 | vector = {'pts': coord * dx + bx, 'pts_num': len(coord), "type": line_type, "confidence_level": confidence}
64 | vectors.append(vector)
65 | rec = val_loader.dataset.samples[batchi * val_loader.batch_size + si]
66 | submission['results'][rec['token']] = vectors
67 | mmcv.dump(submission, args.result_path)
68 |
69 |
70 | def main(args):
71 | data_conf = {
72 | 'num_channels': NUM_CLASSES + 1,
73 | 'image_size': cfg.image_size,
74 | 'xbound': cfg.xbound,
75 | 'ybound': cfg.ybound,
76 | 'zbound': cfg.zbound,
77 | 'dbound': cfg.dbound,
78 | 'thickness': cfg.thickness,
79 | 'angle_class': cfg.angle_class,
80 | 'patch_w': cfg.patch_w,
81 | 'patch_h': cfg.patch_h,
82 | 'mask_ratio': cfg.mask_ratio,
83 | 'mask_flag': cfg.mask_flag,
84 | 'sd_map_path': cfg.sd_map_path,
85 | }
86 |
87 | train_loader, val_loader = semantic_dataset(args, args.version, args.dataroot, data_conf,
88 | args.batch_size, args.nworkers, cfg.dataset)
89 | model = get_model(args, data_conf, args.instance_seg, args.embedding_dim, args.direction_pred, args.angle_class)
90 | # import pdb; pdb.set_trace()
91 | state_dict_model_120 = torch.load(args.modelf)
92 | new_state_dict_120 = OrderedDict()
93 | for k, v in state_dict_model_120.items():
94 | name = k[7:]
95 | new_state_dict_120[name] = v
96 | model.load_state_dict(new_state_dict_120, strict=True)
97 | model.cuda()
98 |
99 | export_to_json(model, val_loader, args.angle_class, args)
100 |
101 |
102 | if __name__ == '__main__':
103 | parser = argparse.ArgumentParser(description='Export vector results to json.')
104 | parser.add_argument("config", help = 'path to config file', type=str, default=None)
105 | args = parser.parse_args()
106 | cfg = Config.fromfile(args.config)
107 | print("cfg: ", cfg)
108 | main(cfg)
109 |
--------------------------------------------------------------------------------
/tools/loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class FocalLoss(nn.Module):
7 | def __init__(self, alpha=1, gamma=2, reduce='mean'):
8 | super(FocalLoss, self).__init__()
9 | self.alpha = alpha
10 | self.gamma = gamma
11 | self.reduce = reduce
12 |
13 | def forward(self, inputs, targets):
14 | BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduce=False)
15 | pt = torch.exp(-BCE_loss)
16 | F_loss = self.alpha * (1 - pt) ** self.gamma * BCE_loss
17 |
18 | if self.reduce == 'mean':
19 | return torch.mean(F_loss)
20 | elif self.reduce == 'sum':
21 | return torch.sum(F_loss)
22 | else:
23 | raise NotImplementedError
24 |
25 |
26 | class SimpleLoss(torch.nn.Module):
27 | def __init__(self, pos_weight):
28 | super(SimpleLoss, self).__init__()
29 | self.loss_fn = torch.nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([pos_weight]))
30 |
31 | def forward(self, ypred, ytgt):
32 | loss = self.loss_fn(ypred, ytgt)
33 | return loss
34 |
35 |
36 | class DiscriminativeLoss(nn.Module):
37 | def __init__(self, embed_dim, delta_v, delta_d):
38 | super(DiscriminativeLoss, self).__init__()
39 | self.embed_dim = embed_dim
40 | self.delta_v = delta_v
41 | self.delta_d = delta_d
42 |
43 | def forward(self, embedding, seg_gt):
44 | if embedding is None:
45 | return 0, 0, 0
46 | bs = embedding.shape[0]
47 |
48 | var_loss = torch.tensor(0, dtype=embedding.dtype, device=embedding.device)
49 | dist_loss = torch.tensor(0, dtype=embedding.dtype, device=embedding.device)
50 | reg_loss = torch.tensor(0, dtype=embedding.dtype, device=embedding.device)
51 |
52 | for b in range(bs):
53 | embedding_b = embedding[b] # (embed_dim, H, W)
54 | seg_gt_b = seg_gt[b]
55 |
56 | labels = torch.unique(seg_gt_b)
57 | labels = labels[labels != 0]
58 | num_lanes = len(labels)
59 | if num_lanes == 0:
60 | # please refer to issue here: https://github.com/harryhan618/LaneNet/issues/12
61 | _nonsense = embedding.sum()
62 | _zero = torch.zeros_like(_nonsense)
63 | var_loss = var_loss + _nonsense * _zero
64 | dist_loss = dist_loss + _nonsense * _zero
65 | reg_loss = reg_loss + _nonsense * _zero
66 | continue
67 |
68 | centroid_mean = []
69 | for lane_idx in labels:
70 | seg_mask_i = (seg_gt_b == lane_idx)
71 | if not seg_mask_i.any():
72 | continue
73 | embedding_i = embedding_b[:, seg_mask_i]
74 |
75 | mean_i = torch.mean(embedding_i, dim=1)
76 | centroid_mean.append(mean_i)
77 |
78 | # ---------- var_loss -------------
79 | var_loss = var_loss + torch.mean(F.relu(torch.norm(embedding_i-mean_i.reshape(self.embed_dim, 1), dim=0) - self.delta_v) ** 2) / num_lanes
80 | centroid_mean = torch.stack(centroid_mean) # (n_lane, embed_dim)
81 |
82 | if num_lanes > 1:
83 | centroid_mean1 = centroid_mean.reshape(-1, 1, self.embed_dim)
84 | centroid_mean2 = centroid_mean.reshape(1, -1, self.embed_dim)
85 | dist = torch.norm(centroid_mean1-centroid_mean2, dim=2) # shape (num_lanes, num_lanes)
86 | dist = dist + torch.eye(num_lanes, dtype=dist.dtype, device=dist.device) * self.delta_d # diagonal elements are 0, now mask above delta_d
87 |
88 | # divided by two for double calculated loss above, for implementation convenience
89 | dist_loss = dist_loss + torch.sum(F.relu(-dist + self.delta_d)**2) / (num_lanes * (num_lanes-1)) / 2
90 |
91 | # reg_loss is not used in original paper
92 | # reg_loss = reg_loss + torch.mean(torch.norm(centroid_mean, dim=1))
93 |
94 | var_loss = var_loss / bs
95 | dist_loss = dist_loss / bs
96 | reg_loss = reg_loss / bs
97 | return var_loss, dist_loss, reg_loss
98 |
99 |
100 | def calc_loss():
101 | pass
102 |
--------------------------------------------------------------------------------
/tools/postprocess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jike5/P-MapNet/b8b4cf2295ee75826046eef9cfa12b107fb43619/tools/postprocess/__init__.py
--------------------------------------------------------------------------------
/tools/postprocess/cluster.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # @Time : 18-5-30 上午10:04
4 | # @Author : MaybeShewill-CV
5 | # @Site : https://github.com/MaybeShewill-CV/lanenet-lane-detection
6 | # @File : lanenet_postprocess.py
7 | # @IDE: PyCharm Community Edition
8 | """
9 | LaneNet model post process
10 | """
11 | import matplotlib.pyplot as plt
12 | import cv2
13 | import numpy as np
14 |
15 | from sklearn.cluster import DBSCAN
16 | from sklearn.preprocessing import StandardScaler
17 |
18 |
19 | def _morphological_process(image, mode='MORPH_CLOSE', kernel_size=5):
20 | """
21 | morphological process to fill the hole in the binary segmentation result
22 | :param image:
23 | :param kernel_size:
24 | :return:
25 | """
26 | if len(image.shape) == 3:
27 | raise ValueError('Binary segmentation result image should be a single channel image')
28 |
29 | if image.dtype is not np.uint8:
30 | image = np.array(image, np.uint8)
31 |
32 | # close operation fille hole
33 | kernel = cv2.getStructuringElement(shape=cv2.MORPH_ELLIPSE, ksize=(kernel_size, kernel_size))
34 | if mode == 'MORPH_CLOSE':
35 | closing = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel, iterations=1)
36 | elif mode == 'MORPH_OPEN':
37 | closing = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel, iterations=1)
38 | else:
39 | closing = image
40 | return closing
41 |
42 |
43 | def _connect_components_analysis(image):
44 | """
45 | connect components analysis to remove the small components
46 | :param image:
47 | :return:
48 | """
49 | if len(image.shape) == 3:
50 | gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
51 | else:
52 | gray_image = image
53 |
54 | return cv2.connectedComponentsWithStats(gray_image, connectivity=8, ltype=cv2.CV_32S)
55 |
56 |
57 | class _LaneFeat(object):
58 | """
59 |
60 | """
61 | def __init__(self, feat, coord, class_id=-1):
62 | """
63 | lane feat object
64 | :param feat: lane embeddng feats [feature_1, feature_2, ...]
65 | :param coord: lane coordinates [x, y]
66 | :param class_id: lane class id
67 | """
68 | self._feat = feat
69 | self._coord = coord
70 | self._class_id = class_id
71 |
72 | @property
73 | def feat(self):
74 | """
75 |
76 | :return:
77 | """
78 | return self._feat
79 |
80 | @feat.setter
81 | def feat(self, value):
82 | """
83 |
84 | :param value:
85 | :return:
86 | """
87 | if not isinstance(value, np.ndarray):
88 | value = np.array(value, dtype=np.float64)
89 |
90 | if value.dtype != np.float32:
91 | value = np.array(value, dtype=np.float64)
92 |
93 | self._feat = value
94 |
95 | @property
96 | def coord(self):
97 | """
98 |
99 | :return:
100 | """
101 | return self._coord
102 |
103 | @coord.setter
104 | def coord(self, value):
105 | """
106 |
107 | :param value:
108 | :return:
109 | """
110 | if not isinstance(value, np.ndarray):
111 | value = np.array(value)
112 |
113 | if value.dtype != np.int32:
114 | value = np.array(value, dtype=np.int32)
115 |
116 | self._coord = value
117 |
118 | @property
119 | def class_id(self):
120 | """
121 |
122 | :return:
123 | """
124 | return self._class_id
125 |
126 | @class_id.setter
127 | def class_id(self, value):
128 | """
129 |
130 | :param value:
131 | :return:
132 | """
133 | if not isinstance(value, np.int64):
134 | raise ValueError('Class id must be integer')
135 |
136 | self._class_id = value
137 |
138 |
139 | class _LaneNetCluster(object):
140 | """
141 | Instance segmentation result cluster
142 | """
143 |
144 | def __init__(self, dbscan_eps=0.35, postprocess_min_samples=200):
145 | """
146 |
147 | """
148 | self.dbscan_eps = dbscan_eps
149 | self.postprocess_min_samples = postprocess_min_samples
150 |
151 | def _embedding_feats_dbscan_cluster(self, embedding_image_feats):
152 | """
153 | dbscan cluster
154 | :param embedding_image_feats:
155 | :return:
156 | """
157 | from sklearn.cluster import MeanShift
158 |
159 | db = DBSCAN(eps=self.dbscan_eps, min_samples=self.postprocess_min_samples)
160 | # db = MeanShift()
161 | try:
162 | features = StandardScaler().fit_transform(embedding_image_feats)
163 | db.fit(features)
164 | except Exception as err:
165 | # print(err)
166 | ret = {
167 | 'origin_features': None,
168 | 'cluster_nums': 0,
169 | 'db_labels': None,
170 | 'unique_labels': None,
171 | 'cluster_center': None
172 | }
173 | return ret
174 | db_labels = db.labels_
175 | unique_labels = np.unique(db_labels)
176 |
177 | num_clusters = len(unique_labels)
178 | # cluster_centers = db.components_
179 |
180 | ret = {
181 | 'origin_features': features,
182 | 'cluster_nums': num_clusters,
183 | 'db_labels': db_labels,
184 | 'unique_labels': unique_labels,
185 | # 'cluster_center': cluster_centers
186 | }
187 |
188 | return ret
189 |
190 | @staticmethod
191 | def _get_lane_embedding_feats(binary_seg_ret, instance_seg_ret):
192 | """
193 | get lane embedding features according the binary seg result
194 | :param binary_seg_ret:
195 | :param instance_seg_ret:
196 | :return:
197 | """
198 | idx = np.where(binary_seg_ret == 255)
199 | lane_embedding_feats = instance_seg_ret[idx]
200 | # idx_scale = np.vstack((idx[0] / 256.0, idx[1] / 512.0)).transpose()
201 | # lane_embedding_feats = np.hstack((lane_embedding_feats, idx_scale))
202 | lane_coordinate = np.vstack((idx[1], idx[0])).transpose()
203 |
204 | assert lane_embedding_feats.shape[0] == lane_coordinate.shape[0]
205 |
206 | ret = {
207 | 'lane_embedding_feats': lane_embedding_feats,
208 | 'lane_coordinates': lane_coordinate
209 | }
210 |
211 | return ret
212 |
213 | def apply_lane_feats_cluster(self, binary_seg_result, instance_seg_result):
214 | """
215 |
216 | :param binary_seg_result:
217 | :param instance_seg_result:
218 | :return:
219 | """
220 | # get embedding feats and coords
221 | get_lane_embedding_feats_result = self._get_lane_embedding_feats(
222 | binary_seg_ret=binary_seg_result,
223 | instance_seg_ret=instance_seg_result
224 | )
225 |
226 | # dbscan cluster
227 | dbscan_cluster_result = self._embedding_feats_dbscan_cluster(
228 | embedding_image_feats=get_lane_embedding_feats_result['lane_embedding_feats']
229 | )
230 |
231 | mask = np.zeros(shape=[binary_seg_result.shape[0], binary_seg_result.shape[1]], dtype=np.int)
232 | db_labels = dbscan_cluster_result['db_labels']
233 | unique_labels = dbscan_cluster_result['unique_labels']
234 | coord = get_lane_embedding_feats_result['lane_coordinates']
235 |
236 | if db_labels is None:
237 | return None, None
238 |
239 | lane_coords = []
240 |
241 | for index, label in enumerate(unique_labels.tolist()):
242 | if label == -1:
243 | continue
244 | idx = np.where(db_labels == label)
245 | pix_coord_idx = tuple((coord[idx][:, 1], coord[idx][:, 0]))
246 | mask[pix_coord_idx] = label + 1
247 | lane_coords.append(coord[idx])
248 |
249 | return mask, lane_coords
250 |
251 |
252 | class LaneNetPostProcessor(object):
253 | """
254 | lanenet post process for lane generation
255 | """
256 | def __init__(self, dbscan_eps=0.35, postprocess_min_samples=200):
257 | """
258 |
259 | :param ipm_remap_file_path: ipm generate file path
260 | """
261 |
262 | self._cluster = _LaneNetCluster(dbscan_eps, postprocess_min_samples)
263 |
264 | def postprocess(self, binary_seg_result, mode, instance_seg_result=None, min_area_threshold=100):
265 | """
266 |
267 | :param binary_seg_result:
268 | :param instance_seg_result:
269 | :param min_area_threshold:
270 | :param source_image:
271 | :param data_source:
272 | :return:
273 | """
274 | # convert binary_seg_result
275 | binary_seg_result = np.array(binary_seg_result * 255, dtype=np.uint8)
276 |
277 | # apply image morphology operation to fill in the hold and reduce the small area
278 | morphological_ret = _morphological_process(binary_seg_result, mode, kernel_size=5)
279 |
280 | connect_components_analysis_ret = _connect_components_analysis(image=morphological_ret)
281 |
282 | labels = connect_components_analysis_ret[1]
283 | stats = connect_components_analysis_ret[2]
284 | for index, stat in enumerate(stats):
285 | if stat[4] <= min_area_threshold:
286 | idx = np.where(labels == index)
287 | morphological_ret[idx] = 0
288 |
289 | # apply embedding features cluster
290 | mask_image, lane_coords = self._cluster.apply_lane_feats_cluster(
291 | binary_seg_result=morphological_ret,
292 | instance_seg_result=instance_seg_result
293 | )
294 |
295 | return mask_image, lane_coords
296 |
--------------------------------------------------------------------------------
/tools/postprocess/connect.py:
--------------------------------------------------------------------------------
1 | import math
2 | import random
3 | import numpy as np
4 | from copy import deepcopy
5 |
6 | import torch
7 |
8 |
9 | def sort_points_by_dist(coords):
10 | coords = coords.astype('float')
11 | num_points = coords.shape[0]
12 | diff_matrix = np.repeat(coords[:, None], num_points, 1) - coords
13 | # x_range = np.max(np.abs(diff_matrix[..., 0]))
14 | # y_range = np.max(np.abs(diff_matrix[..., 1]))
15 | # diff_matrix[..., 1] *= x_range / y_range
16 | dist_matrix = np.sqrt(((diff_matrix) ** 2).sum(-1))
17 | dist_matrix_full = deepcopy(dist_matrix)
18 | direction_matrix = diff_matrix / (dist_matrix.reshape(num_points, num_points, 1) + 1e-6)
19 |
20 | sorted_points = [coords[0]]
21 | sorted_indices = [0]
22 | dist_matrix[:, 0] = np.inf
23 |
24 | last_direction = np.array([0, 0])
25 | for i in range(num_points - 1):
26 | last_idx = sorted_indices[-1]
27 | dist_metric = dist_matrix[last_idx] - 0 * (last_direction * direction_matrix[last_idx]).sum(-1)
28 | idx = np.argmin(dist_metric) % num_points
29 | new_direction = direction_matrix[last_idx, idx]
30 | if dist_metric[idx] > 3 and min(dist_matrix_full[idx][sorted_indices]) < 5:
31 | dist_matrix[:, idx] = np.inf
32 | continue
33 | if dist_metric[idx] > 10 and i > num_points * 0.9:
34 | break
35 | sorted_points.append(coords[idx])
36 | sorted_indices.append(idx)
37 | dist_matrix[:, idx] = np.inf
38 | last_direction = new_direction
39 |
40 | return np.stack(sorted_points, 0)
41 |
42 |
43 | def connect_by_step(coords, direction_mask, sorted_points, taken_direction, step=5, per_deg=10):
44 | while True:
45 | last_point = tuple(np.flip(sorted_points[-1]))
46 | if not taken_direction[last_point][0]:
47 | direction = direction_mask[last_point][0]
48 | taken_direction[last_point][0] = True
49 | elif not taken_direction[last_point][1]:
50 | direction = direction_mask[last_point][1]
51 | taken_direction[last_point][1] = True
52 | else:
53 | break
54 |
55 | if direction == -1:
56 | continue
57 |
58 | deg = per_deg * direction
59 | vector_to_target = step * np.array([np.cos(np.deg2rad(deg)), np.sin(np.deg2rad(deg))])
60 | last_point = deepcopy(sorted_points[-1])
61 |
62 | # NMS
63 | coords = coords[np.linalg.norm(coords - last_point, axis=-1) > step-1]
64 |
65 | if len(coords) == 0:
66 | break
67 |
68 | target_point = np.array([last_point[0] + vector_to_target[0], last_point[1] + vector_to_target[1]])
69 | dist_metric = np.linalg.norm(coords - target_point, axis=-1)
70 | idx = np.argmin(dist_metric)
71 |
72 | if dist_metric[idx] > 50:
73 | continue
74 |
75 | sorted_points.append(deepcopy(coords[idx]))
76 |
77 | vector_to_next = coords[idx] - last_point
78 | deg = np.rad2deg(math.atan2(vector_to_next[1], vector_to_next[0]))
79 | inverse_deg = (180 + deg) % 360
80 | target_direction = per_deg * direction_mask[tuple(np.flip(sorted_points[-1]))]
81 | tmp = np.abs(target_direction - inverse_deg)
82 | tmp = torch.min(tmp, 360 - tmp)
83 | taken = np.argmin(tmp)
84 | taken_direction[tuple(np.flip(sorted_points[-1]))][taken] = True
85 |
86 |
87 | def connect_by_direction(coords, direction_mask, step=5, per_deg=10):
88 | sorted_points = [deepcopy(coords[random.randint(0, coords.shape[0]-1)])]
89 | taken_direction = np.zeros_like(direction_mask, dtype=np.bool)
90 |
91 | connect_by_step(coords, direction_mask, sorted_points, taken_direction, step, per_deg)
92 | sorted_points.reverse()
93 | connect_by_step(coords, direction_mask, sorted_points, taken_direction, step, per_deg)
94 | return np.stack(sorted_points, 0)
95 |
--------------------------------------------------------------------------------
/tools/postprocess/vectorize.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | import torch.nn as nn
4 |
5 | from .cluster import LaneNetPostProcessor
6 | from .connect import sort_points_by_dist, connect_by_direction
7 |
8 |
9 | def onehot_encoding(logits, dim=0):
10 | max_idx = torch.argmax(logits, dim, keepdim=True)
11 | one_hot = logits.new_full(logits.shape, 0)
12 | one_hot.scatter_(dim, max_idx, 1)
13 | return one_hot
14 |
15 |
16 | def onehot_encoding_spread(logits, dim=1):
17 | max_idx = torch.argmax(logits, dim, keepdim=True)
18 | one_hot = logits.new_full(logits.shape, 0)
19 | one_hot.scatter_(dim, max_idx, 1)
20 | one_hot.scatter_(dim, torch.clamp(max_idx-1, min=0), 1)
21 | one_hot.scatter_(dim, torch.clamp(max_idx-2, min=0), 1)
22 | one_hot.scatter_(dim, torch.clamp(max_idx+1, max=logits.shape[dim]-1), 1)
23 | one_hot.scatter_(dim, torch.clamp(max_idx+2, max=logits.shape[dim]-1), 1)
24 |
25 | return one_hot
26 |
27 |
28 | def get_pred_top2_direction(direction, dim=1):
29 | direction = torch.softmax(direction, dim)
30 | idx1 = torch.argmax(direction, dim)
31 | idx1_onehot_spread = onehot_encoding_spread(direction, dim)
32 | idx1_onehot_spread = idx1_onehot_spread.bool()
33 | direction[idx1_onehot_spread] = 0
34 | idx2 = torch.argmax(direction, dim)
35 | direction = torch.stack([idx1, idx2], dim) - 1 #torch.Size([200, 400, 2])
36 | return direction
37 |
38 |
39 | def vectorize(segmentation, embedding, direction, angle_class, morpho_mode='MORPH_CLOSE'):
40 | segmentation = segmentation.softmax(0)
41 | embedding = embedding.cpu()
42 | direction = direction.permute(1, 2, 0).cpu()
43 | direction = get_pred_top2_direction(direction, dim=-1)
44 |
45 | max_pool_1 = nn.MaxPool2d((1, 5), padding=(0, 2), stride=1)
46 | avg_pool_1 = nn.AvgPool2d((9, 5), padding=(4, 2), stride=1)
47 | max_pool_2 = nn.MaxPool2d((5, 1), padding=(2, 0), stride=1)
48 | avg_pool_2 = nn.AvgPool2d((5, 9), padding=(2, 4), stride=1)
49 | post_processor = LaneNetPostProcessor(dbscan_eps=1.5, postprocess_min_samples=50)
50 |
51 | oh_pred = onehot_encoding(segmentation).cpu().numpy()
52 | confidences = []
53 | line_types = []
54 | simplified_coords = []
55 | for i in range(1, oh_pred.shape[0]):
56 | single_mask = oh_pred[i].astype('uint8')
57 | single_embedding = embedding.permute(1, 2, 0)
58 |
59 | single_class_inst_mask, single_class_inst_coords = post_processor.postprocess(single_mask, morpho_mode, single_embedding)
60 | if single_class_inst_mask is None:
61 | continue
62 |
63 | num_inst = len(single_class_inst_coords)
64 |
65 | prob = segmentation[i]
66 | prob[single_class_inst_mask == 0] = 0
67 | nms_mask_1 = ((max_pool_1(prob.unsqueeze(0))[0] - prob) < 0.0001).cpu().numpy()
68 | avg_mask_1 = avg_pool_1(prob.unsqueeze(0))[0].cpu().numpy()
69 | nms_mask_2 = ((max_pool_2(prob.unsqueeze(0))[0] - prob) < 0.0001).cpu().numpy()
70 | avg_mask_2 = avg_pool_2(prob.unsqueeze(0))[0].cpu().numpy()
71 | vertical_mask = avg_mask_1 > avg_mask_2
72 | horizontal_mask = ~vertical_mask
73 | nms_mask = (vertical_mask & nms_mask_1) | (horizontal_mask & nms_mask_2)
74 |
75 | for j in range(1, num_inst + 1):
76 | full_idx = np.where((single_class_inst_mask == j))
77 | full_lane_coord = np.vstack((full_idx[1], full_idx[0])).transpose()
78 | confidence = prob[single_class_inst_mask == j].mean().item()
79 |
80 | idx = np.where(nms_mask & (single_class_inst_mask == j))
81 | if len(idx[0]) == 0:
82 | continue
83 | lane_coordinate = np.vstack((idx[1], idx[0])).transpose()
84 |
85 | range_0 = np.max(full_lane_coord[:, 0]) - np.min(full_lane_coord[:, 0])
86 | range_1 = np.max(full_lane_coord[:, 1]) - np.min(full_lane_coord[:, 1])
87 | if range_0 > range_1:
88 | lane_coordinate = sorted(lane_coordinate, key=lambda x: x[0])
89 | else:
90 | lane_coordinate = sorted(lane_coordinate, key=lambda x: x[1])
91 |
92 | lane_coordinate = np.stack(lane_coordinate)
93 | lane_coordinate = sort_points_by_dist(lane_coordinate)
94 | lane_coordinate = lane_coordinate.astype('int32')
95 | lane_coordinate = connect_by_direction(lane_coordinate, direction, step=7, per_deg=360 / angle_class)
96 |
97 | simplified_coords.append(lane_coordinate)
98 | confidences.append(confidence)
99 | line_types.append(i-1)
100 |
101 | return simplified_coords, confidences, line_types
102 |
--------------------------------------------------------------------------------
/tools/vis_map.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append('.')
3 | import argparse
4 | import tqdm
5 | import os
6 | import cv2
7 | import torch
8 | from tools.evaluation.iou import get_batch_iou
9 | from tools.config import Config
10 | from data_osm.dataset import semantic_dataset
11 | from data_osm.const import NUM_CLASSES
12 | from model import get_model
13 | from postprocess.vectorize import vectorize
14 | from collections import OrderedDict
15 | import matplotlib.pyplot as plt
16 | import numpy as np
17 | from PIL import Image
18 | from tools.evaluation import lpips
19 | from data_osm.image import denormalize_img
20 | import warnings
21 | warnings.filterwarnings("ignore")
22 |
23 |
24 | Nu_SCENE_CANDIDATE = [
25 | 'scene-0555', 'scene-0556', 'scene-0557', 'scene-0558',
26 | 'scene-1065', 'scene-1066', 'scene-1067', 'scene-1068'
27 | 'scene-0275', 'scene-0276', 'scene-0277', 'scene-0278',
28 | 'scene-0519', 'scene-0520', 'scene-0521', 'scene-0522',
29 | 'scene-0911', 'scene-0912', 'scene-0913', 'scene-0914',
30 | ]
31 |
32 | AV2_SCENE_CANDIDATE = [
33 | 'f1275002-842e-3571-8f7d-05816bc7cf56',
34 | 'ba67827f-6b99-3d2a-96ab-7c829eb999bb',
35 | 'bf360aeb-1bbd-3c1e-b143-09cf83e4f2e4',
36 | 'ded5ef6e-46ea-3a66-9180-18a6fa0a2db4',
37 | 'e8c9fd64-fdd2-422d-a2a2-6f47500d1d12',
38 | '1f434d15-8745-3fba-9c3e-ccb026688397',
39 | '6f128f23-ee40-3ea9-8c50-c9cdb9d3e8b6',
40 | ]
41 |
42 | SCENE_CANDIDATE = None
43 |
44 | def onehot_encoding(logits, dim=1):
45 | max_idx = torch.argmax(logits, dim, keepdim=True)
46 | one_hot = logits.new_full(logits.shape, 0)
47 | one_hot.scatter_(dim, max_idx, 1)
48 | return one_hot
49 |
50 |
51 | def vis(semantic, semantic_gt, sd_map, time, scene_id, save_path, with_gt=False):
52 | car_img = Image.open('icon/car_gray.png')
53 | semantic = onehot_encoding(semantic)
54 | semantic = semantic.clone().cpu().numpy()
55 | semantic[semantic < 0.1] = np.nan
56 | semantic_gt_mask = semantic_gt.clone().cpu().numpy()
57 | semantic_gt_mask[semantic_gt < 0.1] = np.nan
58 | sd_map = sd_map.cpu().numpy()
59 | sd_map[sd_map < 0.1] = np.nan
60 |
61 | b, c, h, w = semantic.shape
62 | alpha = 0.8
63 | dpi = 600
64 | divier = 'Blues'
65 | ped_crossing = 'Greens'
66 | boundary = 'Purples'
67 | vmax = 1
68 | for i in range(semantic.shape[0]):
69 | if scene_id[i] not in SCENE_CANDIDATE:
70 | continue
71 | save_path_seg = os.path.join(save_path, f'{scene_id[i]}', f'{time[i]}')
72 | if not os.path.exists(save_path_seg):
73 | os.makedirs(save_path_seg)
74 | # vis hdmap gt with sd map
75 | imname = os.path.join(save_path_seg, 'gt_sd_map.png')
76 | if not os.path.exists(imname):
77 | plt.figure(figsize=(w*2/100, 4))
78 | plt.imshow(semantic_gt_mask[i][1]*0.5, vmin=0, cmap= divier, vmax=vmax, alpha=alpha)
79 | plt.imshow(semantic_gt_mask[i][2]*0.5, vmin=0, cmap= ped_crossing, vmax=vmax, alpha=alpha)
80 | plt.imshow(semantic_gt_mask[i][3]*0.5, vmin=0, cmap=boundary, vmax=vmax, alpha=alpha)
81 | plt.imshow(sd_map[i][0]*0.8, vmin=0, cmap='Greys', vmax=1, alpha=0.9)
82 | plt.xlim(0, w)
83 | plt.ylim(0, h)
84 | plt.axis('off')
85 | plt.tight_layout()
86 | print('saving', imname)
87 | plt.savefig(imname, bbox_inches='tight', format='png', dpi=dpi)
88 | plt.close()
89 |
90 | imname = os.path.join(save_path_seg, 'sd_map.png')
91 | if not os.path.exists(imname):
92 | plt.figure(figsize=(w*2/100, 4))
93 | plt.imshow(sd_map[i][0]*0.8, vmin=0, cmap='Greys', vmax=1, alpha=0.9)
94 | plt.xlim(0, w)
95 | plt.ylim(0, h)
96 | plt.axis('off')
97 | plt.tight_layout()
98 | print('saving', imname)
99 | plt.savefig(imname, bbox_inches='tight', format='png', dpi=dpi)
100 | plt.close()
101 |
102 | # vis pred hdmap
103 | imname = os.path.join(save_path_seg, 'pred_map.png')
104 | if not os.path.exists(imname):
105 | plt.figure(figsize=(w*2/100, 4))
106 | plt.imshow(semantic[i][1]*0.5, vmin=0, cmap= divier, vmax=vmax, alpha=alpha)
107 | plt.imshow(semantic[i][2]*0.5, vmin=0, cmap= ped_crossing, vmax=vmax, alpha=alpha)
108 | plt.imshow(semantic[i][3]*0.5, vmin=0, cmap=boundary, vmax=vmax, alpha=alpha)
109 | plt.xlim(0, w)
110 | plt.ylim(0, h)
111 | plt.imshow(car_img, extent=[w//2-15, w//2+15, h//2-12, h//2+12])
112 | plt.axis('off')
113 | plt.tight_layout()
114 | print('saving', imname)
115 | plt.savefig(imname, bbox_inches='tight', format='png', dpi=dpi)
116 | plt.close()
117 |
118 | if with_gt:
119 | # vis hdmap gt
120 | imname = os.path.join(save_path_seg, 'gt_map.png')
121 | if not os.path.exists(imname):
122 | plt.figure(figsize=(w*2/100, 4))
123 | plt.imshow(semantic_gt_mask[i][1]*0.5, vmin=0, cmap=divier, vmax=vmax, alpha=alpha)
124 | plt.imshow(semantic_gt_mask[i][2]*0.5, vmin=0, cmap=ped_crossing, vmax=vmax, alpha=alpha)
125 | plt.imshow(semantic_gt_mask[i][3]*0.5, vmin=0, cmap=boundary, vmax=vmax, alpha=alpha)
126 | plt.xlim(0, w)
127 | plt.ylim(0, h)
128 | plt.imshow(car_img, extent=[w//2-15, w//2+15, h//2-12, h//2+12])
129 | plt.axis('off')
130 | plt.tight_layout()
131 | print('saving ', imname)
132 | plt.savefig(imname, bbox_inches='tight', format='png', dpi=dpi)
133 | plt.close()
134 |
135 |
136 | def vis_vec(coords, timestamp, scene_id, save_path, h, w):
137 | save_path_vec = os.path.join(save_path, 'vec', f'{scene_id}')
138 | if not os.path.exists(save_path_vec):
139 | os.makedirs(save_path_vec)
140 |
141 | car_img = Image.open('icon/car_gray.png')
142 |
143 | plt.figure(figsize=(w*2/100, 2))
144 | for coord in coords:
145 | plt.plot(coord[:, 0], coord[:, 1], linewidth=2)
146 |
147 | plt.xlim((0, w))
148 | plt.ylim((0, h))
149 | plt.axis('off')
150 | plt.grid(False)
151 | plt.imshow(car_img, extent=[w//2-15, w//2+15, h//2-12, h//2+12])
152 |
153 | img_name = os.path.join(save_path_vec, f'{timestamp}_vecz_.jpg')
154 | print('saving', img_name)
155 | plt.savefig(img_name)
156 | plt.close()
157 |
158 |
159 | def eval_vis_all(model, save_path, val_loader):
160 | model.eval()
161 | total_intersects = 0
162 | total_union = 0
163 | i=0
164 | with torch.no_grad():
165 | for (imgs, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans,
166 | yaw_pitch_roll, semantic_gt, instance_gt, direction_gt, osm_masks, osm_vectors, masked_map, timestamps, scene_ids) in tqdm.tqdm(val_loader):
167 | # import pdb; pdb.set_trace()
168 | semantic, embedding, direction = model(imgs.cuda(), trans.cuda(), rots.cuda(), intrins.cuda(),
169 | post_trans.cuda(), post_rots.cuda(), lidar_data.cuda(),
170 | lidar_mask.cuda(), car_trans.cuda(), yaw_pitch_roll.cuda(), osm_masks.float().cuda())
171 |
172 | semantic_gt = semantic_gt.cuda().float()
173 | device = semantic_gt.device
174 | if semantic.device != device:
175 | semantic = semantic.to(device)
176 | intersects, union = get_batch_iou(onehot_encoding(semantic), semantic_gt)
177 | total_intersects += intersects
178 | total_union += union
179 | vis(semantic.cpu().float(), semantic_gt.cpu().float(), osm_masks.float(), timestamps, scene_ids, save_path, with_gt=True)
180 | i+=1
181 | return (total_intersects / (total_union + 1e-7))
182 |
183 | def main(cfg):
184 | # import pdb; pdb.set_trace()
185 | global SCENE_CANDIDATE
186 | SCENE_CANDIDATE = Nu_SCENE_CANDIDATE
187 | if 'dataset' in cfg:
188 | if cfg.dataset == 'av2':
189 | SCENE_CANDIDATE = AV2_SCENE_CANDIDATE
190 |
191 | data_conf = {
192 | 'num_channels': NUM_CLASSES + 1,
193 | 'image_size': cfg.image_size,
194 | 'xbound': cfg.xbound,
195 | 'ybound': cfg.ybound,
196 | 'zbound': cfg.zbound,
197 | 'dbound': cfg.dbound,
198 | 'thickness': cfg.thickness,
199 | 'angle_class': cfg.angle_class,
200 | 'patch_w': cfg.patch_w,
201 | 'patch_h': cfg.patch_h,
202 | 'mask_ratio': cfg.mask_ratio,
203 | 'mask_flag': cfg.mask_flag,
204 | 'sd_map_path': cfg.sd_map_path,
205 | }
206 |
207 | train_loader, val_loader = semantic_dataset(cfg, cfg.version, cfg.dataroot, data_conf,
208 | cfg.batch_size, cfg.nworkers, cfg.dataset)
209 | model = get_model(cfg, data_conf, cfg.instance_seg, cfg.embedding_dim, cfg.direction_pred, cfg.angle_class)
210 |
211 | state_dict_model = torch.load(cfg.modelf)
212 | new_state_dict = OrderedDict()
213 | for k, v in state_dict_model.items():
214 | name = k[7:]
215 | new_state_dict[name] = v
216 | # import pdb; pdb.set_trace()
217 | model.load_state_dict(new_state_dict, strict=True)
218 | model.cuda()
219 | if "vis_path" not in cfg:
220 | cfg.vis_path = os.path.join(cfg.logdir, "vis")
221 | eval_vis_all(model, cfg.vis_path, val_loader)
222 |
223 | if __name__ == '__main__':
224 | parser = argparse.ArgumentParser(description='P-MapNet pre-train HD Prior.')
225 | parser.add_argument("config", help = 'path to config file', type=str, default=None)
226 | args = parser.parse_args()
227 | cfg = Config.fromfile(args.config)
228 | main(cfg)
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import sys
4 | import logging
5 | import time
6 | from tensorboardX import SummaryWriter
7 | import argparse
8 | import matplotlib.pyplot as plt
9 | import torch
10 | import torch.nn as nn
11 | from tools.config import Config
12 | from torch.optim.lr_scheduler import StepLR
13 | from tools.loss import SimpleLoss, DiscriminativeLoss
14 | from data_osm.dataset import semantic_dataset
15 | from data_osm.const import NUM_CLASSES
16 | from tools.evaluation.iou import get_batch_iou
17 | from tools.evaluation.angle_diff import calc_angle_diff
18 | from tools.eval import onehot_encoding, eval_iou
19 | from model.utils.map_mae_head import vit_base_patch8
20 | import warnings
21 | warnings.filterwarnings("ignore")
22 |
23 | import tqdm
24 | import pdb
25 | from PIL import Image
26 | from model import get_model
27 |
28 | from collections import OrderedDict
29 | import torch.nn.functional as F
30 | from sklearn import metrics
31 |
32 |
33 | def write_log(writer, ious, title, counter):
34 | writer.add_scalar(f'{title}/iou', torch.mean(ious[1:]), counter)
35 |
36 | for i, iou in enumerate(ious):
37 | writer.add_scalar(f'{title}/class_{i}/iou', iou, counter)
38 |
39 | def train(cfg):
40 | if not os.path.exists(cfg.logdir):
41 | os.makedirs(cfg.logdir)
42 | logname = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime(time.time()))
43 | logging.basicConfig(filename=os.path.join(cfg.logdir, logname+'.log'),
44 | filemode='w',
45 | format='%(asctime)s: %(message)s',
46 | datefmt='%Y-%m-%d %H:%M:%S',
47 | level=logging.INFO)
48 | logging.getLogger('shapely.geos').setLevel(logging.CRITICAL)
49 |
50 | logger = logging.getLogger()
51 | logger.addHandler(logging.StreamHandler(sys.stdout))
52 |
53 | data_conf = {
54 | 'num_channels': NUM_CLASSES + 1,
55 | 'image_size': cfg.image_size,
56 | 'xbound': cfg.xbound,
57 | 'ybound': cfg.ybound,
58 | 'zbound': cfg.zbound,
59 | 'dbound': cfg.dbound,
60 | 'thickness': cfg.thickness,
61 | 'angle_class': cfg.angle_class,
62 | 'patch_w': cfg.patch_w,
63 | 'patch_h': cfg.patch_h,
64 | 'mask_ratio': cfg.mask_ratio,
65 | 'mask_flag': cfg.mask_flag,
66 | 'sd_map_path': cfg.sd_map_path,
67 | }
68 |
69 | model = get_model(cfg, data_conf, cfg.instance_seg, cfg.embedding_dim, cfg.direction_pred, cfg.angle_class)
70 | # import pdb; pdb.set_trace()
71 | if "hd" in cfg.model:
72 | cfg.modelf_map = cfg.modelf_map if "modelf_map" in cfg else None
73 | cfg.modelf_mae = cfg.modelf_mae if "modelf_mae" in cfg else None
74 | if cfg.modelf_map:
75 | state_dict_model = torch.load(cfg.modelf_map)
76 | new_state_dict = OrderedDict()
77 | for k, v in state_dict_model.items():
78 | name = k[7:]
79 | new_state_dict[name] = v
80 | model.load_state_dict(new_state_dict, strict=False)
81 |
82 | if cfg.modelf_mae:
83 | state_dict_model = torch.load(cfg.modelf_mae)
84 | new_state_dict = OrderedDict()
85 | for k, v in state_dict_model.items():
86 | name = k.replace('module', 'mae_head')
87 | new_state_dict[name] = v
88 | model.load_state_dict(new_state_dict, strict=False)
89 |
90 | cfg.freeze_backbone = cfg.freeze_backbone if "freeze_backbone" in cfg else None
91 | if cfg.freeze_backbone:
92 | for name, param in model.named_parameters():
93 | if 'mae_head' not in name:
94 | param.requires_grad = False
95 |
96 | if 'resume' in cfg and cfg.resume is not None:
97 | print("Loading checkpoint from cfg.resume: ", cfg.resume)
98 | state_dict_model = torch.load(cfg.resume)
99 | new_state_dict = OrderedDict()
100 | for k, v in state_dict_model.items():
101 | name = k[7:]
102 | new_state_dict[name] = v
103 | model.load_state_dict(new_state_dict, strict=False)
104 |
105 | model = nn.DataParallel(model, device_ids=cfg.gpus)
106 | model.cuda(device=cfg.gpus[0])
107 | # import pdb; pdb.set_trace()
108 | train_loader, val_loader = semantic_dataset(cfg, cfg.version, cfg.dataroot, data_conf,
109 | cfg.batch_size, cfg.nworkers, cfg.dataset)
110 |
111 | opt = torch.optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay)
112 | sched = StepLR(opt, 3, 0.1)
113 | writer = SummaryWriter(logdir=cfg.logdir)
114 |
115 | loss_fn = SimpleLoss(cfg.pos_weight).cuda()
116 | embedded_loss_fn = DiscriminativeLoss(cfg.embedding_dim, cfg.delta_v, cfg.delta_d).cuda()
117 | direction_loss_fn = torch.nn.BCELoss(reduction='none')
118 |
119 | counter = 0
120 | last_idx = len(train_loader) - 1
121 | for epoch in range(cfg.nepochs):
122 | for batchi, (imgs, trans, rots, intrins, post_trans, post_rots, lidar_data, lidar_mask, car_trans,
123 | yaw_pitch_roll, semantic_gt, instance_gt, direction_gt, osm_masks, osm_vectors, masked_map, timestamps, scene_ids) in enumerate(train_loader):
124 | # import pdb; pdb.set_trace()
125 | t0 = time.time()
126 | opt.zero_grad()
127 | semantic, embedding, direction = model(imgs.cuda(), trans.cuda(), rots.cuda(), intrins.cuda(),
128 | post_trans.cuda(), post_rots.cuda(), lidar_data.cuda(),
129 | lidar_mask.cuda(), car_trans.cuda(), yaw_pitch_roll.cuda(), osm_masks.float().cuda())
130 |
131 | semantic_gt = semantic_gt.cuda().float()
132 | instance_gt = instance_gt.cuda()
133 |
134 | device = semantic_gt.device
135 | if semantic.device != device:
136 | semantic = semantic.to(device)
137 | embedding = embedding.to(device)
138 | direction = direction.to(device)
139 |
140 | seg_loss = loss_fn(semantic, semantic_gt)
141 | if cfg.instance_seg:
142 | var_loss, dist_loss, reg_loss = embedded_loss_fn(embedding, instance_gt)
143 | else:
144 | var_loss = 0
145 | dist_loss = 0
146 | reg_loss = 0
147 |
148 | if cfg.direction_pred:
149 | direction_gt = direction_gt.cuda()
150 | lane_mask = (1 - direction_gt[:, 0]).unsqueeze(1)
151 | direction_loss = direction_loss_fn(torch.softmax(direction, 1), direction_gt)
152 | direction_loss = (direction_loss * lane_mask).sum() / (lane_mask.sum() * direction_loss.shape[1] + 1e-6)
153 | angle_diff = calc_angle_diff(direction, direction_gt, cfg.angle_class)
154 | else:
155 | direction_loss = 0
156 | angle_diff = 0
157 |
158 | final_loss = seg_loss * cfg.scale_seg + var_loss * cfg.scale_var + dist_loss * cfg.scale_dist + direction_loss * cfg.scale_direction
159 | final_loss.backward()
160 | torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.max_grad_norm)
161 | opt.step()
162 | counter += 1
163 | t1 = time.time()
164 |
165 | if counter % 100 == 0:
166 | intersects, union = get_batch_iou(onehot_encoding(semantic), semantic_gt)
167 | iou = intersects / (union + 1e-7)
168 | logger.info(f"TRAIN[{epoch:>3d}]: [{batchi:>4d}/{last_idx}] "
169 | f"Time: {t1-t0:>7.4f} "
170 | f"Loss: {final_loss.item():>7.4f} "
171 | f"IOU: {np.array2string(iou[1:].numpy(), precision=3, floatmode='fixed')}")
172 |
173 | write_log(writer, iou, 'train', counter)
174 | writer.add_scalar('train/step_time', t1 - t0, counter)
175 | writer.add_scalar('train/seg_loss', seg_loss, counter)
176 | writer.add_scalar('train/var_loss', var_loss, counter)
177 | writer.add_scalar('train/dist_loss', dist_loss, counter)
178 | writer.add_scalar('train/reg_loss', reg_loss, counter)
179 | writer.add_scalar('train/direction_loss', direction_loss, counter)
180 | writer.add_scalar('train/final_loss', final_loss, counter)
181 | writer.add_scalar('train/angle_diff', angle_diff, counter)
182 |
183 | model_name = os.path.join(cfg.logdir, f"model{epoch}.pt")
184 | torch.save(model.state_dict(), model_name)
185 | logger.info(f"{model_name} saved")
186 |
187 | iou = eval_iou(model, val_loader)
188 | logger.info(f"EVAL[{epoch:>2d}]: "
189 | f"IOU: {np.array2string(iou[1:].numpy(), precision=3, floatmode='fixed')}")
190 | write_log(writer, iou, 'eval', counter)
191 | model.train()
192 | sched.step()
193 |
194 | if __name__ == '__main__':
195 | parser = argparse.ArgumentParser(description='P-MapNet training with HD Prior.')
196 | parser.add_argument("config", help = 'path to config file', type=str, default=None)
197 | args = parser.parse_args()
198 | cfg = Config.fromfile(args.config)
199 |
200 | if not os.path.exists(cfg.logdir):
201 | os.makedirs(cfg.logdir)
202 | with open(os.path.join(cfg.logdir, 'config.txt'), 'w') as f:
203 | argsDict = cfg.__dict__
204 | for eachArg, value in argsDict.items():
205 | f.writelines(eachArg + " : " + str(value) + "\n")
206 | train(cfg)
207 |
208 |
--------------------------------------------------------------------------------
/train_HDPrior_pretrain.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import sys
4 | import logging
5 | import time
6 | from tensorboardX import SummaryWriter
7 | import argparse
8 | import matplotlib.pyplot as plt
9 | import torch
10 | import torch.nn as nn
11 | from tools.config import Config
12 | from torch.optim.lr_scheduler import StepLR
13 | from tools.loss import SimpleLoss, DiscriminativeLoss
14 | from data_osm.dataset import semantic_dataset
15 | from data_osm.const import NUM_CLASSES
16 | from tools.evaluation.iou import get_batch_iou
17 | from tools.evaluation.angle_diff import calc_angle_diff
18 | from tools.eval import onehot_encoding, eval_pretrain
19 | from model.utils.map_mae_head import vit_base_patch8
20 | from model import get_model
21 |
22 | import warnings
23 | warnings.filterwarnings("ignore")
24 | from collections import OrderedDict
25 |
26 | def write_log(writer, ious, title, counter):
27 | writer.add_scalar(f'{title}/iou', torch.mean(ious[1:]), counter)
28 | for i, iou in enumerate(ious):
29 | writer.add_scalar(f'{title}/class_{i}/iou', iou, counter)
30 |
31 | def train(cfg):
32 | if not os.path.exists(cfg.logdir):
33 | os.makedirs(cfg.logdir)
34 | logging.basicConfig(filename=os.path.join(cfg.logdir, "results.log"),
35 | filemode='w',
36 | format='%(asctime)s: %(message)s',
37 | datefmt='%Y-%m-%d %H:%M:%S',
38 | level=logging.INFO)
39 | logging.getLogger('shapely.geos').setLevel(logging.CRITICAL)
40 |
41 | logger = logging.getLogger()
42 | logger.addHandler(logging.StreamHandler(sys.stdout))
43 |
44 | data_conf = {
45 | 'num_channels': NUM_CLASSES + 1,
46 | 'image_size': cfg.image_size,
47 | 'xbound': cfg.xbound,
48 | 'ybound': cfg.ybound,
49 | 'zbound': cfg.zbound,
50 | 'dbound': cfg.dbound,
51 | 'thickness': cfg.thickness,
52 | 'angle_class': cfg.angle_class,
53 | 'patch_w': cfg.patch_w,
54 | 'patch_h': cfg.patch_h,
55 | 'mask_ratio': cfg.mask_ratio,
56 | 'mask_flag': cfg.mask_flag,
57 | 'sd_map_path': cfg.sd_map_path,
58 | }
59 |
60 | train_loader, val_loader = semantic_dataset(cfg, cfg.version, cfg.dataroot, data_conf,
61 | cfg.batch_size, cfg.nworkers, cfg.dataset)
62 | patch_h = data_conf['ybound'][1] - data_conf['ybound'][0]
63 | patch_w = data_conf['xbound'][1] - data_conf['xbound'][0]
64 | canvas_h = int(patch_h / data_conf['ybound'][2])
65 | canvas_w = int(patch_w / data_conf['xbound'][2])
66 |
67 | # # TODO: add to cfg and add support for patch32
68 | # model = vit_base_patch8(data_conf=data_conf,
69 | # instance_seg=cfg.instance_seg,
70 | # embedded_dim=cfg.embedding_dim,
71 | # direction_pred=cfg.direction_pred,
72 | # direction_dim=cfg.angle_class,
73 | # lidar=True,
74 | # img_size=(canvas_h, canvas_w))
75 | model = get_model(cfg, data_conf, cfg.instance_seg, cfg.embedding_dim, cfg.direction_pred, cfg.angle_class)
76 |
77 | if 'vit_base' in cfg and cfg.vit_base is not None:
78 | state_dict_model = torch.load(cfg.vit_base)
79 | model.load_state_dict(state_dict_model, strict=False)
80 | model = nn.DataParallel(model, device_ids=cfg.gpus)
81 | opt = torch.optim.Adam(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay)
82 | sched = StepLR(opt, 3, 0.1)
83 | writer = SummaryWriter(logdir=cfg.logdir)
84 |
85 | loss_fn = SimpleLoss(cfg.pos_weight).cuda()
86 | embedded_loss_fn = DiscriminativeLoss(cfg.embedding_dim, cfg.delta_v, cfg.delta_d).cuda()
87 | direction_loss_fn = torch.nn.BCELoss(reduction='none')
88 |
89 | model.cuda(device=cfg.gpus[0])
90 | model.train()
91 |
92 | counter = 0
93 | last_idx = len(train_loader) - 1
94 |
95 | for epoch in range(cfg.nepochs):
96 | for batchi, (imgs, trans, rots, intrins, post_trans, post_rots,
97 | lidar_data, lidar_mask, car_trans, yaw_pitch_roll,
98 | semantic_gt, instance_gt, direction_gt, osm_masks,
99 | osm_vectors, masked_map, timestamps, scene_ids) in enumerate(train_loader):
100 | t0 = time.time()
101 | opt.zero_grad()
102 | semantic, embedding, direction = model(masked_map.float())
103 | semantic_gt = semantic_gt.cuda().float()
104 | instance_gt = instance_gt.cuda()
105 |
106 | device = semantic_gt.device
107 | if semantic.device != device:
108 | semantic = semantic.to(device)
109 | embedding = embedding.to(device)
110 | direction = direction.to(device)
111 |
112 | seg_loss = loss_fn(semantic, semantic_gt)
113 | if cfg.instance_seg:
114 | var_loss, dist_loss, reg_loss = embedded_loss_fn(embedding, instance_gt)
115 | else:
116 | var_loss = 0
117 | dist_loss = 0
118 | reg_loss = 0
119 |
120 | if cfg.direction_pred:
121 | direction_gt = direction_gt.cuda()
122 | lane_mask = (1 - direction_gt[:, 0]).unsqueeze(1)
123 | direction_loss = direction_loss_fn(torch.softmax(direction, 1), direction_gt)
124 | direction_loss = (direction_loss * lane_mask).sum() / (lane_mask.sum() * direction_loss.shape[1] + 1e-6)
125 | angle_diff = calc_angle_diff(direction, direction_gt, cfg.angle_class)
126 | else:
127 | direction_loss = 0
128 | angle_diff = 0
129 |
130 | final_loss = seg_loss * cfg.scale_seg + var_loss * cfg.scale_var + dist_loss * cfg.scale_dist + direction_loss * cfg.scale_direction
131 | final_loss.backward()
132 | torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.max_grad_norm)
133 | opt.step()
134 | counter += 1
135 | t1 = time.time()
136 | if counter % 100 == 0:
137 | intersects, union = get_batch_iou(onehot_encoding(semantic), semantic_gt)
138 | iou = intersects / (union + 1e-7)
139 | logger.info(f"TRAIN[{epoch:>3d}]: [{batchi:>4d}/{last_idx}] "
140 | f"Time: {t1-t0:>7.4f} "
141 | f"Loss: {final_loss.item():>7.4f} "
142 | f"IOU: {np.array2string(iou[1:].numpy(), precision=3, floatmode='fixed')}")
143 |
144 | write_log(writer, iou, 'train', counter)
145 | writer.add_scalar('train/step_time', t1 - t0, counter)
146 | writer.add_scalar('train/seg_loss', seg_loss, counter)
147 | writer.add_scalar('train/var_loss', var_loss, counter)
148 | writer.add_scalar('train/dist_loss', dist_loss, counter)
149 | writer.add_scalar('train/reg_loss', reg_loss, counter)
150 | writer.add_scalar('train/direction_loss', direction_loss, counter)
151 | writer.add_scalar('train/final_loss', final_loss, counter)
152 | writer.add_scalar('train/angle_diff', angle_diff, counter)
153 | cur_lr = opt.state_dict()['param_groups'][0]['lr']
154 | writer.add_scalar('train/lr', cur_lr, counter)
155 |
156 | model_name = os.path.join(cfg.logdir, f"model{epoch}.pt")
157 | torch.save(model.state_dict(), model_name)
158 |
159 | logger.info(f"{model_name} saved")
160 |
161 | iou = eval_pretrain(model, val_loader)
162 |
163 | logger.info(f"EVAL[{epoch:>2d}]: "
164 | f"IOU: {np.array2string(iou[1:].numpy(), precision=3, floatmode='fixed')}")
165 |
166 | write_log(writer, iou, 'eval', counter)
167 |
168 | model.train()
169 |
170 | sched.step()
171 |
172 |
173 | if __name__ == '__main__':
174 | parser = argparse.ArgumentParser(description='P-MapNet pre-train HD Prior.')
175 | parser.add_argument("config", help = 'path to config file', type=str, default=None)
176 | args = parser.parse_args()
177 | cfg = Config.fromfile(args.config)
178 |
179 | if not os.path.exists(cfg.logdir):
180 | os.makedirs(cfg.logdir)
181 | with open(os.path.join(cfg.logdir, 'config.txt'), 'w') as f:
182 | argsDict = cfg.__dict__
183 | for eachArg, value in argsDict.items():
184 | f.writelines(eachArg + " : " + str(value) + "\n")
185 | train(cfg)
186 |
--------------------------------------------------------------------------------