├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── docs ├── CVPR_2019_poster.jpg ├── CVPR_2019_poster.pdf ├── cvpr2018-pipeline.png ├── index.html ├── main_result.png ├── pdf_thumbnail.jpg └── table1_caption.png ├── figures ├── cvpr2018-pipeline.png └── result.png ├── preprocessing ├── generate_disp.py ├── generate_lidar.py ├── kitti_process_RANSAC.py └── kitti_util.py ├── psmnet ├── README.md ├── dataloader │ ├── KITTILoader.py │ ├── KITTILoader3D.py │ ├── KITTILoader_dataset3d.py │ ├── KITTI_submission_loader.py │ ├── KITTI_submission_loader2012.py │ ├── KITTIloader2012.py │ ├── KITTIloader2015.py │ ├── SecenFlowLoader.py │ ├── __init__.py │ ├── listflowfile.py │ ├── preprocess.py │ └── readpfm.py ├── finetune_3d.py ├── logger.py ├── models │ ├── __init__.py │ ├── basic.py │ ├── stackhourglass.py │ └── submodule.py ├── submission.py └── utils │ ├── __init__.py │ ├── preprocess.py │ └── readpfm.py └── visualization ├── 000012.bin ├── pyntcloud.ipynb └── pyntcloud.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "avod"] 2 | path = avod 3 | url = git@github.com:mileyan/avod.git 4 | 5 | [submodule "frustum-pointnets"] 6 | path = frustum-pointnets 7 | url = git@github.com:charlesq34/frustum-pointnets.git 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Yan (Eric) Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pseudo-LiDAR from Visual Depth Estimation: Bridging the Gap in 3D Object Detection for Autonomous Driving 2 | This paper has been accpeted by Conference on Computer Vision and Pattern Recognition ([CVPR](http://cvpr2019.thecvf.com/)) 2019. 3 | 4 | [ 5 | Pseudo-LiDAR from Visual Depth Estimation: Bridging the Gap in 3D Object Detection for Autonomous Driving](https://arxiv.org/abs/1812.07179) 6 | 7 | by [Yan Wang](https://www.cs.cornell.edu/~yanwang/), [Wei-Lun Chao](http://www-scf.usc.edu/~weilunc/), [Divyansh Garg](http://divyanshgarg.com/), [Bharath Hariharan](http://home.bharathh.info/), [Mark Campbell](https://campbell.mae.cornell.edu/) and [Kilian Q. Weinberger](http://kilian.cs.cornell.edu/) 8 | 9 | ![Figure](figures/cvpr2018-pipeline.png) 10 | ### Citation 11 | ``` 12 | @inproceedings{wang2019pseudo, 13 | title={Pseudo-LiDAR from Visual Depth Estimation: Bridging the Gap in 3D Object Detection for Autonomous Driving}, 14 | author={Wang, Yan and Chao, Wei-Lun and Garg, Divyansh and Hariharan, Bharath and Campbell, Mark and Weinberger, Kilian}, 15 | booktitle={CVPR}, 16 | year={2019} 17 | } 18 | ``` 19 | ## Update 20 | * 2nd July 2020: Add a jupyter script to visualize point cloud. It is in ./visualization folder. 21 | * 29th July 2019: `submission.py` will save the disparity to the numpy file, not png file. And fix the `generate_lidar.py`. 22 | * I have modifed the official avod a little bit. Now you can directly train and test pseudo-lidar with avod. Please check the code https://github.com/mileyan/avod_pl. 23 | 24 | ## Contents 25 | 26 | - [Introduction](#introduction) 27 | - [Usage](#usage) 28 | - [Results](#results) 29 | - [Contacts](#contacts) 30 | 31 | ## Introduction 32 | 3D object detection is an essential task in autonomous driving. Recent techniques excel with highly accurate detection rates, provided the 3D input data is obtained from precise but expensive LiDAR technology. Approaches based on cheaper monocular or stereo imagery data have, until now, resulted in drastically lower accuracies --- a gap that is commonly attributed to poor image-based depth estimation. However, in this paper we argue that data representation (rather than its quality) accounts for the majority of the difference. Taking the inner workings of convolutional neural networks into consideration, we propose to convert image-based depth maps to pseudo-LiDAR representations --- essentially mimicking LiDAR signal. With this representation we can apply different existing LiDAR-based detection algorithms. On the popular KITTI benchmark, our approach achieves impressive improvements over the existing state-of-the-art in image-based performance --- raising the detection accuracy of objects within 30m range from the previous state-of-the-art of 22% to an unprecedented 74%. At the time of submission our algorithm holds the highest entry on the KITTI 3D object detection leaderboard for stereo image based approaches. 33 | 34 | ## Usage 35 | 36 | ### 1. Overview 37 | 38 | We provide the guidance and codes to train stereo depth estimator and 3D object detector using the [KITTI object detection benchmark](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d). We also provide our pre-trained models. 39 | 40 | ### 2. Stereo depth estimation models 41 | We provide our pretrained [PSMNet](http://openaccess.thecvf.com/content_cvpr_2018/papers/Chang_Pyramid_Stereo_Matching_CVPR_2018_paper.pdf) model using the Scene Flow dataset and the 3,712 training images of the KITTI detection benchmark. 42 | - [Pretrained PSMNet](https://drive.google.com/file/d/1sWjsIO9Fuy92wT3gLkHF3PA7SP8QZBzu/view?usp=sharing) 43 | 44 | We also directly provide the pseudo-LiDAR point clouds and the ground planes of training and testing images estimated by this pre-trained model. 45 | - [training/pseudo-lidar_velodyne](https://drive.google.com/file/d/10txZOtKk_aY3B7AhHjJPMCiRf5pP62nV/view?usp=sharing) 46 | - [testing/pseudo-lidar_velodyne](https://drive.google.com/file/d/1XRAWYpMJeaVVXNN442xDgXnAa3pLBUvv/view?usp=sharing) 47 | - [training/pseudo-lidar_planes](https://drive.google.com/file/d/1NBN85o9Jl7FjV5HwldmBv_9T4LeoNiwV/view?usp=sharing) 48 | - [testing/pseudo-lidar_planes](https://drive.google.com/file/d/1G5_5VHbygssrKOzz1zEirNlKjVnMc5tz/view?usp=sharing) 49 | 50 | We also provide codes to train your own stereo depth estimator and prepare the point clouds and gound planes. **If you want to use our pseudo-LiDAR data for 3D object detection, you may skip the following contents and directly move on to object detection models.** 51 | 52 | #### 2.1 Dependencies 53 | - Python 3.5+ 54 | - numpy, scikit-learn, scipy 55 | - KITTI 3D object detection dataset 56 | 57 | #### 2.2 Download the dataset 58 | You need to download the KITTI dataset from [here](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d), including left and right color images, Velodyne point clouds, camera calibration matrices, and training labels. You also need to download the image set files from [here](https://github.com/charlesq34/frustum-pointnets/tree/master/kitti/image_sets). Then you need to organize the data in the following way. 59 | ```angular2html 60 | KITTI/object/ 61 | 62 | train.txt 63 | val.txt 64 | test.txt 65 | 66 | training/ 67 | calib/ 68 | image_2/ #left image 69 | image_3/ #right image 70 | label_2/ 71 | velodyne/ 72 | 73 | testing/ 74 | calib/ 75 | image_2/ 76 | image_3/ 77 | velodyne/ 78 | ``` 79 | The Velodyne point clouds (by LiDAR) are used **ONLY** as the ground truths to train a stereo depth estimator (e.g., PSMNet). 80 | #### 2.3 Generate ground-truth image disparities 81 | Use the script`./preprocessing/generate_disp.py` to process all velodyne files appeared in `train.txt`. This is our **training ground truth**. Or you can directly download them from [disparity](https://drive.google.com/file/d/1JqtPdYnajNhDNxucuQYmD-79rl7MIXoZ/view?usp=sharing). Name this folder as `disparity` and put it inside the `training` folder. 82 | ```angular2html 83 | python generate_disp.py --data_path ./KITTI/object/training/ --split_file ./KITTI/object/train.txt 84 | ``` 85 | 86 | #### 2.4. Train the stereo model 87 | You can train any stereo disparity model as you want. Here we give an example to train the PSMNet. The modified code is saved in the subfolder `psmnet`. Make sure you follow the `README` inside this folder to install the correct python and library. I strongly suggest using `conda env` to organize the python environments since we will use Python with different versions. Download the psmnet model pretrained on Sceneflow dataset from [here](https://drive.google.com/file/d/1D-OcFbrQXNl3iSOeBnMBGd87pNXp0RT1/view?usp=sharing). 88 | 89 | ```python2html 90 | # train psmnet with 4 TITAN X GPUs. 91 | python ./psmnet/finetune_3d.py --maxdisp 192 \ 92 | --model stackhourglass \ 93 | --datapath ./KITTI/object/training/ \ 94 | --split_file ./KITTI/object/train.txt \ 95 | --epochs 300 \ 96 | --lr_scale 50 \ 97 | --loadmodel ./pretrained_sceneflow.tar \ 98 | --savemodel ./psmnet/kitti_3d/ --btrain 12 99 | ``` 100 | 101 | #### 2.5 Predict the point clouds 102 | ##### Predict the disparities. 103 | ```angular2html 104 | # training 105 | python ./psmnet/submission.py \ 106 | --loadmodel ./psmnet/kitti_3d/finetune_300.tar \ 107 | --datapath ./KITTI/object/training/ \ 108 | --save_path ./KITTI/object/training/predict_disparity 109 | # testing 110 | python ./psmnet/submission.py \ 111 | --loadmodel ./psmnet/kitti_3d/finetune_300.tar \ 112 | --datapath ./KITTI/object/testing/ \ 113 | --save_path ./KITTI/object/testing/predict_disparity 114 | ``` 115 | ##### Convert the disparities to point clouds. 116 | ```angular2html 117 | # training 118 | python ./preprocessing/generate_lidar.py \ 119 | --calib_dir ./KITTI/object/training/calib/ \ 120 | --save_dir ./KITTI/object/training/pseudo-lidar_velodyne/ \ 121 | --disparity_dir ./KITTI/object/training/predict_disparity \ 122 | --max_high 1 123 | # testing 124 | python ./preprocessing/generate_lidar.py \ 125 | --calib_dir ./KITTI/object/testing/calib/ \ 126 | --save_dir ./KITTI/object/testing/pseudo-lidar_velodyne/ \ 127 | --disparity_dir ./KITTI/object/testing/predict_disparity \ 128 | --max_high 1 129 | ``` 130 | If you want to generate point cloud from depth map (like DORN), you can add `--is_depth` in the command. 131 | 132 | #### 2.6 Generate ground plane 133 | If you want to train an [AVOD]( https://github.com/kujason/avod) model for 3D object detection, you need to generate ground planes from pseudo-lidar point clouds. 134 | ```angular2html 135 | #training 136 | python ./preprocessing/kitti_process_RANSAC.py \ 137 | --calib ./KITTI/object/training/calib/ \ 138 | --lidar_dir ./KITTI/object/training/pseudo-lidar_velodyne/ \ 139 | --planes_dir /KITTI/object/training/pseudo-lidar_planes/ 140 | #testing 141 | python ./preprocessing/kitti_process_RANSAC.py \ 142 | --calib ./KITTI/object/testing/calib/ \ 143 | --lidar_dir ./KITTI/object/testing/pseudo-lidar_velodyne/ \ 144 | --planes_dir /KITTI/object/testing/pseudo-lidar_planes/ 145 | ``` 146 | ### 3. Object Detection models 147 | #### AVOD model 148 | Download the code from [https://github.com/kujason/avod](https://github.com/kujason/avod) and install the Python dependencies. 149 | 150 | Follow their README to prepare the data and then replace (1) files in `velodyne` with those in `pseudo-lidar_velodyne` and (2) files in `planes` with those in `pseudo-lidar_planes`. Note that you should still keep the folder names as `velodyne` and `planes`. 151 | 152 | Follow their README to train the `pyramid_cars_with_aug_example` model. You can also download our pretrained model and directly evaluate on it. But if you want to submit your result to the leaderboard, you need to train it on `trainval.txt`. 153 | 154 | - [pretrained AVOD](https://drive.google.com/file/d/1wuMykUDx8tcCfxpqnprmzrgUyheQV42F/view?usp=sharing) (trained only on train.txt) 155 | 156 | 157 | 158 | #### Frustum-PointNets model 159 | Download the code from [https://github.com/charlesq34/frustum-pointnets](https://github.com/charlesq34/frustum-pointnets) and install the Python dependencies. 160 | 161 | Follow their README to prepare the data and then replace files in `velodyne` with those in `pseudo-lidar_velodyne`. Note that you should still keep the folder name as `velodyne`. 162 | 163 | Follow their README to train the v1 model. You can also download our pretrained model and directly evaluate on it. 164 | 165 | - [pretrained Frustum_V1](https://drive.google.com/file/d/1qhCxw6uHqQ4SAkxIuBi-QCKqLmTGiNhP/view?usp=sharing) (trained only on train.txt) 166 | 167 | ## Results 168 | The main results on the validation dataset of our pseudo-LiDAR method. 169 | ![Figure](figures/result.png) 170 | 171 | You can download the avod validation results from [HERE](https://drive.google.com/file/d/13nOhBCmj8rzjMHDEw3syROuqHsoxWIKJ/view?usp=sharing). 172 | 173 | 174 | ## Contact 175 | If you have any question, please feel free to email us. 176 | 177 | Yan Wang (yw763@cornell.edu), Harry Chao(weilunchao760414@gmail.com), Div Garg(dg595@cornell.edu) 178 | -------------------------------------------------------------------------------- /docs/CVPR_2019_poster.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mileyan/pseudo_lidar/032c7a0d73c3fdf84e934af3f57f8eb489a52906/docs/CVPR_2019_poster.jpg -------------------------------------------------------------------------------- /docs/CVPR_2019_poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mileyan/pseudo_lidar/032c7a0d73c3fdf84e934af3f57f8eb489a52906/docs/CVPR_2019_poster.pdf -------------------------------------------------------------------------------- /docs/cvpr2018-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mileyan/pseudo_lidar/032c7a0d73c3fdf84e934af3f57f8eb489a52906/docs/cvpr2018-pipeline.png -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | 12 | Pseudo-LiDAR from Visual Depth Estimation: Bridging the Gap in 3D Object Detection for Autonomous Driving 13 | 14 | 15 |
16 |

17 | Pseudo-LiDAR from Visual Depth Estimation: Bridging the Gap in 3D Object Detection for Autonomous Driving

18 |
Yan Wang, Wei-Lun Chao, Divyansh Garg, Bharath Hariharan, Mark Campbell, Kilian Q. Weinberger
19 |
Cornell University, Ithaca, NY
20 |
21 |
22 |
23 | 24 |
25 |
26 |
27 |
28 |

29 | Abstract: 30 |

31 |
32 |

33 | 3D object detection is an essential task in autonomous 34 | driving. Recent techniques excel with highly accurate detection 35 | rates, provided the 3D input data is obtained from 36 | precise but expensive LiDAR technology. Approaches based 37 | on cheaper monocular or stereo imagery data have, until 38 | now, resulted in drastically lower accuracies — a gap that is 39 | commonly attributed to poor image-based depth estimation. 40 | However, in this paper we argue that data representation 41 | (rather than its quality) accounts for the majority of the difference. 42 | Taking the inner workings of convolutional neural 43 | networks into consideration, we propose to convert image-based 44 | depth maps to pseudo-LiDAR representations — essentially 45 | mimicking LiDAR signal. With this representation 46 | we can apply different existing LiDAR-based detection algorithms. 47 | On the popular KITTI benchmark, our approach 48 | achieves impressive improvements over the existing state-of-the-art 49 | in image-based performance — raising the detection 50 | accuracy of objects within 30m range from the previous 51 | state-of-the-art of 22% to an unprecedented 74%. At 52 | the time of submission our algorithm holds the highest entry 53 | on the KITTI 3D object detection leaderboard for stereo 54 | image based approaches. 55 |

56 |
57 | 58 |
59 |
60 |

Architecture:

61 |
62 |
63 |
64 | 65 |
66 |
67 |
68 | 69 |
70 |
71 |

Experiment Results:

72 |
73 |
74 |
75 | 76 | 77 |
78 |
79 |
80 |
81 | 82 |
83 |
84 |
85 | 86 | 87 |
88 |
89 |

Paper:

90 |
91 | 92 |
93 |
94 | 95 |
96 |
97 |

Poster:

98 |
99 | 100 |
101 |
102 | 103 | 104 |
105 |
106 |

107 | Citation: 108 |

109 |
110 | @article{wang2018pseudo,
111 |       title={Pseudo-LiDAR from Visual Depth Estimation: Bridging the Gap in 3D Object Detection for Autonomous Driving},
112 |       author={Wang, Yan and Chao, Wei-Lun and Garg, Divyansh and Hariharan, Bharath and Campbell, Mark and Weinberger, Kilian Q.},
113 |       journal={arXiv preprint arXiv:1812.07179},
114 |       year={2018}
115 | }
116 |     
117 | 118 |
119 | 120 | 121 | 122 | 125 | 128 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /docs/main_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mileyan/pseudo_lidar/032c7a0d73c3fdf84e934af3f57f8eb489a52906/docs/main_result.png -------------------------------------------------------------------------------- /docs/pdf_thumbnail.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mileyan/pseudo_lidar/032c7a0d73c3fdf84e934af3f57f8eb489a52906/docs/pdf_thumbnail.jpg -------------------------------------------------------------------------------- /docs/table1_caption.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mileyan/pseudo_lidar/032c7a0d73c3fdf84e934af3f57f8eb489a52906/docs/table1_caption.png -------------------------------------------------------------------------------- /figures/cvpr2018-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mileyan/pseudo_lidar/032c7a0d73c3fdf84e934af3f57f8eb489a52906/figures/cvpr2018-pipeline.png -------------------------------------------------------------------------------- /figures/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mileyan/pseudo_lidar/032c7a0d73c3fdf84e934af3f57f8eb489a52906/figures/result.png -------------------------------------------------------------------------------- /preprocessing/generate_disp.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | import scipy.misc as ssc 6 | 7 | import kitti_util 8 | 9 | 10 | def generate_dispariy_from_velo(pc_velo, height, width, calib): 11 | pts_2d = calib.project_velo_to_image(pc_velo) 12 | fov_inds = (pts_2d[:, 0] < width - 1) & (pts_2d[:, 0] >= 0) & \ 13 | (pts_2d[:, 1] < height - 1) & (pts_2d[:, 1] >= 0) 14 | fov_inds = fov_inds & (pc_velo[:, 0] > 2) 15 | imgfov_pc_velo = pc_velo[fov_inds, :] 16 | imgfov_pts_2d = pts_2d[fov_inds, :] 17 | imgfov_pc_rect = calib.project_velo_to_rect(imgfov_pc_velo) 18 | depth_map = np.zeros((height, width)) - 1 19 | imgfov_pts_2d = np.round(imgfov_pts_2d).astype(int) 20 | for i in range(imgfov_pts_2d.shape[0]): 21 | depth = imgfov_pc_rect[i, 2] 22 | depth_map[int(imgfov_pts_2d[i, 1]), int(imgfov_pts_2d[i, 0])] = depth 23 | baseline = 0.54 24 | 25 | disp_map = (calib.f_u * baseline) / depth_map 26 | return disp_map 27 | 28 | 29 | if __name__ == '__main__': 30 | parser = argparse.ArgumentParser(description='Generate Disparity') 31 | parser.add_argument('--data_path', type=str, default='~/Kitti/object/training/') 32 | parser.add_argument('--split_file', type=str, default='~/Kitti/object/train.txt') 33 | args = parser.parse_args() 34 | 35 | assert os.path.isdir(args.data_path) 36 | lidar_dir = args.data_path + '/velodyne/' 37 | calib_dir = args.data_path + '/calib/' 38 | image_dir = args.data_path + '/image_2/' 39 | disparity_dir = args.data_path + '/disparity/' 40 | 41 | assert os.path.isdir(lidar_dir) 42 | assert os.path.isdir(calib_dir) 43 | assert os.path.isdir(image_dir) 44 | 45 | if not os.path.isdir(disparity_dir): 46 | os.makedirs(disparity_dir) 47 | 48 | lidar_files = [x for x in os.listdir(lidar_dir) if x[-3:] == 'bin'] 49 | lidar_files = sorted(lidar_files) 50 | 51 | assert os.path.isfile(args.split_file) 52 | with open(args.split_file, 'r') as f: 53 | file_names = [x.strip() for x in f.readlines()] 54 | 55 | for fn in lidar_files: 56 | predix = fn[:-4] 57 | if predix not in file_names: 58 | continue 59 | calib_file = '{}/{}.txt'.format(calib_dir, predix) 60 | calib = kitti_util.Calibration(calib_file) 61 | # load point cloud 62 | lidar = np.fromfile(lidar_dir + '/' + fn, dtype=np.float32).reshape((-1, 4))[:, :3] 63 | image_file = '{}/{}.png'.format(image_dir, predix) 64 | image = ssc.imread(image_file) 65 | height, width = image.shape[:2] 66 | disp = generate_dispariy_from_velo(lidar, height, width, calib) 67 | np.save(disparity_dir + '/' + predix, disp) 68 | print('Finish Disparity {}'.format(predix)) 69 | -------------------------------------------------------------------------------- /preprocessing/generate_lidar.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | import scipy.misc as ssc 6 | 7 | import kitti_util 8 | 9 | 10 | def project_disp_to_points(calib, disp, max_high): 11 | disp[disp < 0] = 0 12 | baseline = 0.54 13 | mask = disp > 0 14 | depth = calib.f_u * baseline / (disp + 1. - mask) 15 | rows, cols = depth.shape 16 | c, r = np.meshgrid(np.arange(cols), np.arange(rows)) 17 | points = np.stack([c, r, depth]) 18 | points = points.reshape((3, -1)) 19 | points = points.T 20 | points = points[mask.reshape(-1)] 21 | cloud = calib.project_image_to_velo(points) 22 | valid = (cloud[:, 0] >= 0) & (cloud[:, 2] < max_high) 23 | return cloud[valid] 24 | 25 | def project_depth_to_points(calib, depth, max_high): 26 | rows, cols = depth.shape 27 | c, r = np.meshgrid(np.arange(cols), np.arange(rows)) 28 | points = np.stack([c, r, depth]) 29 | points = points.reshape((3, -1)) 30 | points = points.T 31 | cloud = calib.project_image_to_velo(points) 32 | valid = (cloud[:, 0] >= 0) & (cloud[:, 2] < max_high) 33 | return cloud[valid] 34 | 35 | if __name__ == '__main__': 36 | parser = argparse.ArgumentParser(description='Generate Libar') 37 | parser.add_argument('--calib_dir', type=str, 38 | default='~/Kitti/object/training/calib') 39 | parser.add_argument('--disparity_dir', type=str, 40 | default='~/Kitti/object/training/predicted_disparity') 41 | parser.add_argument('--save_dir', type=str, 42 | default='~/Kitti/object/training/predicted_velodyne') 43 | parser.add_argument('--max_high', type=int, default=1) 44 | parser.add_argument('--is_depth', action='store_true') 45 | 46 | args = parser.parse_args() 47 | 48 | assert os.path.isdir(args.disparity_dir) 49 | assert os.path.isdir(args.calib_dir) 50 | 51 | if not os.path.isdir(args.save_dir): 52 | os.makedirs(args.save_dir) 53 | 54 | disps = [x for x in os.listdir(args.disparity_dir) if x[-3:] == 'png' or x[-3:] == 'npy'] 55 | disps = sorted(disps) 56 | 57 | for fn in disps: 58 | predix = fn[:-4] 59 | calib_file = '{}/{}.txt'.format(args.calib_dir, predix) 60 | calib = kitti_util.Calibration(calib_file) 61 | # disp_map = ssc.imread(args.disparity_dir + '/' + fn) / 256. 62 | if fn[-3:] == 'png': 63 | disp_map = ssc.imread(args.disparity_dir + '/' + fn) 64 | elif fn[-3:] == 'npy': 65 | disp_map = np.load(args.disparity_dir + '/' + fn) 66 | else: 67 | assert False 68 | if not args.is_depth: 69 | disp_map = (disp_map*256).astype(np.uint16)/256. 70 | lidar = project_disp_to_points(calib, disp_map, args.max_high) 71 | else: 72 | disp_map = (disp_map).astype(np.float32)/256. 73 | lidar = project_depth_to_points(calib, disp_map, args.max_high) 74 | # pad 1 in the indensity dimension 75 | lidar = np.concatenate([lidar, np.ones((lidar.shape[0], 1))], 1) 76 | lidar = lidar.astype(np.float32) 77 | lidar.tofile('{}/{}.bin'.format(args.save_dir, predix)) 78 | print('Finish Depth {}'.format(predix)) 79 | -------------------------------------------------------------------------------- /preprocessing/kitti_process_RANSAC.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import numpy as np 5 | from sklearn.linear_model import RANSACRegressor 6 | 7 | import kitti_util as utils 8 | 9 | 10 | def extract_ransac(calib_dir, lidar_dir, planes_dir): 11 | data_idx_list = [x[:-4] for x in os.listdir(lidar_dir) if x[-4:] == '.bin'] 12 | 13 | if not os.path.isdir(planes_dir): 14 | os.makedirs(planes_dir) 15 | 16 | for data_idx in data_idx_list: 17 | 18 | print('------------- ', data_idx) 19 | calib = calib_dir + '/' + data_idx + '.txt' 20 | calib = utils.Calibration(calib) 21 | pc_velo = lidar_dir + '/' + data_idx + '.bin' 22 | pc_velo = np.fromfile(pc_velo, dtype=np.float32).reshape(-1, 4) 23 | pc_rect = calib.project_velo_to_rect(pc_velo[:, :3]) 24 | valid_loc = (pc_rect[:, 1] > 1.5) & \ 25 | (pc_rect[:, 1] < 1.86) & \ 26 | (pc_rect[:, 2] > 0) & \ 27 | (pc_rect[:, 2] < 40) & \ 28 | (pc_rect[:, 0] > -15) & \ 29 | (pc_rect[:, 0] < 15) 30 | pc_rect = pc_rect[valid_loc] 31 | if len(pc_rect) < 1: 32 | w = [0, -1, 0] 33 | h = 1.65 34 | else: 35 | reg = RANSACRegressor().fit(pc_rect[:, [0, 2]], pc_rect[:, 1]) 36 | w = np.zeros(3) 37 | w[0] = reg.estimator_.coef_[0] 38 | w[2] = reg.estimator_.coef_[1] 39 | w[1] = -1.0 40 | h = reg.estimator_.intercept_ 41 | w = w / np.linalg.norm(w) 42 | print(w) 43 | print(h) 44 | 45 | lines = ['# Plane', 'Width 4', 'Height 1'] 46 | 47 | plane_file = os.path.join(planes_dir, data_idx + '.txt') 48 | result_lines = lines[:3] 49 | result_lines.append("{:e} {:e} {:e} {:e}".format(w[0], w[1], w[2], h)) 50 | result_str = '\n'.join(result_lines) 51 | with open(plane_file, 'w') as f: 52 | f.write(result_str) 53 | 54 | 55 | if __name__ == '__main__': 56 | parser = argparse.ArgumentParser() 57 | parser.add_argument('--calib_dir', default='KITTI/object/training/calib') 58 | parser.add_argument('--lidar_dir', default='KITTI/object/training/velodyne') 59 | parser.add_argument('--planes_dir', default='KITTI/object/training/velodyne_planes') 60 | args = parser.parse_args() 61 | 62 | extract_ransac(args.calib_dir, args.lidar_dir, args.planes_dir) 63 | -------------------------------------------------------------------------------- /preprocessing/kitti_util.py: -------------------------------------------------------------------------------- 1 | """ Helper methods for loading and parsing KITTI data. 2 | 3 | Author: Charles R. Qi 4 | Date: September 2017 5 | """ 6 | from __future__ import print_function 7 | 8 | import numpy as np 9 | 10 | 11 | class Calibration(object): 12 | ''' Calibration matrices and utils 13 | 3d XYZ in