├── .gitignore ├── LICENSE ├── README.md ├── assets ├── images │ ├── semanticdepth.jpg │ └── test_munich │ │ ├── test_3.jpg │ │ ├── test_3_ALL.jpg │ │ ├── test_3_output.jpg │ │ └── test_3_planes.jpg └── pdfs │ └── semanticDepthPabloRodriguezPalafox.pdf ├── data ├── roborace750_mockup │ ├── gtFine │ │ ├── test │ │ │ └── berlin │ │ │ │ ├── berlin_00125_gtFine_labelIds.png │ │ │ │ ├── berlin_00125_gtFine_polygons.json │ │ │ │ ├── berlin_00126_gtFine_labelIds.png │ │ │ │ └── berlin_00126_gtFine_polygons.json │ │ ├── train │ │ │ ├── montreal │ │ │ │ ├── montreal_00000_gtFine_labelIds.png │ │ │ │ ├── montreal_00000_gtFine_polygons.json │ │ │ │ ├── montreal_00001_gtFine_labelIds.png │ │ │ │ ├── montreal_00001_gtFine_polygons.json │ │ │ │ ├── montreal_00002_gtFine_labelIds.png │ │ │ │ └── montreal_00002_gtFine_polygons.json │ │ │ └── newyork │ │ │ │ ├── newyork_00000_gtFine_labelIds.png │ │ │ │ ├── newyork_00000_gtFine_polygons.json │ │ │ │ ├── newyork_00001_gtFine_labelIds.png │ │ │ │ ├── newyork_00001_gtFine_polygons.json │ │ │ │ ├── newyork_00002_gtFine_labelIds.png │ │ │ │ └── newyork_00002_gtFine_polygons.json │ │ └── val │ │ │ └── berlin │ │ │ ├── berlin_00000_gtFine_labelIds.png │ │ │ ├── berlin_00000_gtFine_polygons.json │ │ │ ├── berlin_00001_gtFine_labelIds.png │ │ │ └── berlin_00001_gtFine_polygons.json │ └── leftImg8bit │ │ ├── test │ │ └── berlin │ │ │ ├── berlin_00125_leftImg8bit.png │ │ │ └── berlin_00126_leftImg8bit.png │ │ ├── train │ │ ├── montreal │ │ │ ├── montreal_00000_leftImg8bit.png │ │ │ ├── montreal_00001_leftImg8bit.png │ │ │ └── montreal_00002_leftImg8bit.png │ │ └── newyork │ │ │ ├── newyork_00000_leftImg8bit.png │ │ │ ├── newyork_00001_leftImg8bit.png │ │ │ └── newyork_00002_leftImg8bit.png │ │ └── val │ │ └── berlin │ │ ├── berlin_00000_leftImg8bit.png │ │ └── berlin_00001_leftImg8bit.png └── test_images_munich │ ├── test_1.png │ ├── test_2.png │ ├── test_3.png │ ├── test_4.png │ └── test_5.png ├── fcn8s ├── fcn.py ├── helper.py └── segment_video_robo.py ├── models ├── get_monodepth_model.sh ├── get_sem_seg_models.md └── stuttgart_video │ └── README.md ├── monodepth_lib └── README.md ├── requirements.txt ├── semantic_depth.py ├── semantic_depth_cityscapes_sequence.py ├── semantic_depth_lib ├── __init__.py ├── pcl.py └── point_cloud_2_ply.py └── utils ├── create_video_from_frames.py ├── outlier_removal.py └── render_ply.py /.gitignore: -------------------------------------------------------------------------------- 1 | data/roborace750 2 | data/vgg 3 | data/stuttgart_video_test 4 | models/sem_seg 5 | models/monodepth 6 | __pycache__/ 7 | fcn8s/log 8 | fcn8s/runs 9 | fcn8s/times.txt 10 | media 11 | results 12 | .venv 13 | *.DS_Store 14 | README_old.md 15 | .gitignore 16 | *.json 17 | .open3d 18 | data/stuttgart_tmp 19 | backproject.py 20 | fence_overlaid.png 21 | road.png 22 | stuttgart_02_000000_005176_leftImg8bit_raw.ply 23 | monodepth_lib/*.py 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SemanticDepth 2 | 3 | ### [Paper](https://www.mdpi.com/1424-8220/19/14/3224/htm) | [Video](https://youtu.be/0yBb6kJ3mgQ) 4 | 5 | Fusing Semantic Segmentation and Monocular Depth Estimation for Enabling Autonomous Driving in Roads without Lane Lines 6 | 7 | 8 | | | | 9 | |:-------------------------:|:-------------------------:| 10 | |test_3 | test_3_output| 11 | |test_3_ALL | test_3_planes| 12 | 13 | --- 14 | 15 | Click on the image below to watch a VIDEO demonstrating the system on Cityscapes: 16 | 17 | [![STUTTGART SEQUENCE](http://img.youtube.com/vi/0yBb6kJ3mgQ/0.jpg)](https://youtu.be/0yBb6kJ3mgQ) 18 | 19 | --- 20 | 21 | 22 | SemanticDepth is a deep learning-based computer vision pipeline that computes the width of the road at a certain depth in front of a car. 23 | 24 | It does so by fusing together two deep learning-based architectures, namely a **semantic segmentation** network ([fcn8-s](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf)) and a **monocular depth estimation** network ([monodepth](https://github.com/mrharicot/monodepth)). 25 | 26 | We have two ways of computing the width of the road at a certain depth: 27 | 28 | * The __road's width__ (rw) itself. This measure is obtained like so. We obtain the point cloud corresponding to the road in front of the camera. Then we compute the distance between the furthest point to the left and the furthest point to the right of this road point cloud at a certain depth. Here, depth means the direction in front of the camera. 29 | 30 | * The __fence-to-fence distance__ (f2f). In this approach we additionally extract the point clouds corresponding to left and right fences/walls to each side of the road (assuming they exist). Then we fit planes to the point clouds of the road and to those of the left and right fences. We compute the intersection between the road's plane with the left fence's plane, and the intersection between the road's plane and the right fence's plane. We end up with two intersected lines. We can now decide on a depth at which we wish to compute the width of the road, here meaning the distance between these two intersection lines. 31 | 32 |

33 | pipeline 34 |

35 | 36 | 37 | ## Citation 38 | If you find our work useful in your research, please consider citing: 39 | 40 | @article{palafox2019semanticdepth, 41 | title={Semanticdepth: Fusing semantic segmentation and monocular depth estimation for enabling autonomous driving in roads without lane lines}, 42 | author={Palafox, Pablo R and Betz, Johannes and Nobis, Felix and Riedl, Konstantin and Lienkamp, Markus}, 43 | journal={Sensors}, 44 | volume={19}, 45 | number={14}, 46 | pages={3224}, 47 | year={2019}, 48 | publisher={Multidisciplinary Digital Publishing Institute} 49 | } 50 | 51 | 52 | 53 | ## 1. Requirements (& Installation tips) 54 | This code was tested with Tensorflow 1.0, CUDA 8.0 and Ubuntu 16.04. 55 | 56 | First of, install python3-tk. 57 | 58 | `$ sudo apt-get install python3-tk` 59 | 60 | 61 | Git clone this repo and change to the cloned dir: 62 | 63 | ```bash 64 | $ git clone 65 | $ cd semantic_depth 66 | ``` 67 | 68 | Using virtual environments is always a good idea. We will need to have pip and virtualenv installed. 69 | 70 | Install pip and virtualenv: 71 | 72 | `$ sudo apt-get install python3-pip python3.5-dev python-virtualenv` 73 | 74 | 75 | To create a new virtualenv, being inside the root directory of the cloned repository, run the following: 76 | 77 | `$ virtualenv --no-site-packages -p python3.5 .venv` 78 | 79 | We now have a python3.5 virtual environment (with all packages that you already had installed for your python3.5 installation). Activate the virtualenv like so: 80 | 81 | `$ source .venv/bin/activate` 82 | 83 | Inside the virtualenv, you can run: 84 | 85 | `$ pip install -r requirements.txt` 86 | 87 | to get the [dependencies](requirements.txt) needed. 88 | 89 | 90 | 91 | ## 2. Datasets 92 | 93 | ### Datasets for Semantic Segmentation on classes _fence_ and _road_ 94 | 95 | For the semantic segmentation task, we labeled 750 [Roborace](https://roborace.com/) images with the classes fence, road and background. For the task of labelling our own images, we used the [cityscapesScripts](https://github.com/mcordts/cityscapesScripts). 96 | 97 | We cannot make the whole dataset public, as the original images are property of the [Roborace](https://roborace.com/) competition. A mockup of this dataset can be found [here](data/roborace750_mockup). It follows the same structure as the Cityscapes dataset. If you would like to get more images, join the [Roborace](https://roborace.com/) competition and you'll get tons of data from the racetracks. 98 | 99 | Another option is training on [Cityscapes](https://www.cityscapes-dataset.com/) on the classes _fence_ and _road_ (and _background_). If your goal is participating in the Roborace competition, doing this can get you decent results when running inference on Roborace images. 100 | 101 | ### Datasets for Monocular Depth Estimation 102 | 103 | [MonoDepth](https://github.com/mrharicot/monodepth), an unsupervised single image depth prediction network that we make use of in our work, can be trained on [Kitti](http://www.cvlibs.net/datasets/kitti/eval_depth_all.php) or [Cityscapes](https://www.cityscapes-dataset.com/). 104 | 105 | We directly use a model pre-trained on Cityscapes, which you can get at the [monodepth](https://github.com/mrharicot/monodepth) repo, at the Models section. Alternatively, follow the instructions in section [Monodepth model](#monodepth). 106 | 107 | 108 | ### Munich Test Set 109 | 110 | This is a set of 5 images of the streets of Munich on which you can test the whole pipeline. You can find it [here](data/test_images_munich). In section [Test SemanticDepth on our Munich test set](#test_pipeline) you can find the commands on how to test our whole pipeline on these images. 111 | 112 | 113 | 114 | ## 3. SemanticDepth - The whole pipeline 115 | SemanticDepth merges together [semantic segmentation](#sem_seg) and [monocular depth estimation](#monodepth) to compute the distance from the left fence to the right fence in a Formula E-like circuit (where the Roborace competition takes place). We have also found that by using a semantic segmentation model trained on Roborace images for fence and road classification plus a [monodepth](https://github.com/mrharicot/monodepth) model pre-trained on Cityscapes our pipeline generalizes to city environments, like those featured in our [Munich test set](data/test_images_munich). 116 | 117 | 118 | ### Test SemanticDepth 119 | 120 | By running the command below, SemanticDepth will be applied on the [Munich test set](data/test_images_munich) using different focal lengths. By default, the list of focal lengths to try is `[380, 580]`. The reason behind trying different focal lengths is that we are using a [monodepth model trained on the Cityscapes dataset](#monodepth), and Cityscapes comprises images with a certain focal lenght. As the author (Godard) puts it in this [discussion](https://github.com/mrharicot/monodepth/issues/190), the behaviour of monodepth is undefined when applied on images which have different aspect ratio and focal length as those on which the model was trained, since the network really only saw one type of images. Applying the same model on our own images requires that we tune the focal length so that computing depth from disparity outputs reasonable numbers (again, see [discussion on the topic](https://github.com/mrharicot/monodepth/issues/190)). 121 | 122 | `$ python semantic_depth.py --save_data` 123 | 124 | Results will be stored inside a newly created folder called **results**. Inside this folder, two more directories, namely **380** and **580**, will have been created, each containing the results relative to each of the 5 test images on which we have applied SemanticDepth. Also, a file _data.txt_ will have been generated, where every line refers to a test image except the last line. For every line (every test image), we save the following: 125 | 126 | `real_distance|road_width|fence2fence|abs(real_distance-road_width)|abs(real_distance-fence2fence)` 127 | 128 | The last line of this _data.txt_ contains the Mean Absolute Error for the absolute differences between the estimated distance and the real distance at a depth of x meters -- in our experiments, we set x = 10 m. We compute the MAE both for the road's width and the fence-to-fence distance (see the [Introduction](#intro) for an explanation on these two approaches). 129 | 130 | After having ran the previous python script with the `--save_data` argument set, we can now find the following inside the folders **380** and **580**: 131 | 132 | * **\*\_output.ply** contains the reconstructed 3D scene, featuring only the road, the walls and the [road's width and fence-to-fence distance](#intro) (red and green lines, respectively). You can use [MeshLab](http://www.meshlab.net/) to open a PLY file. 133 | 134 | * **\*\_output.png** features the segmented scene with the computed distances at the top. 135 | 136 | * **\*\_output_dips.png** is the disparity map that [monodepth](https://github.com/mrharicot/monodepth) predicts for the given input image. 137 | 138 | * **\*\_output_distances.txt** is a plain text file containing the [road's width and the fence-to-fence distance](#intro). 139 | 140 | * **\*\_output_times.txt** is a plain text file containing the inference times for each task of the pipeline. 141 | 142 | The rest of the files can be disregarded. They are only generated for sanity checks. 143 | 144 | 145 | Note that you can set the `--verbose` option when running the previous command to get more info during execution, like so: 146 | 147 | `$ python semantic_depth.py --save_data --verbose` 148 | 149 | 150 | #### Other functionalites 151 | 152 | 153 | Note as well that running the python script without any arguments 154 | 155 | `$ python semantic_depth.py` 156 | 157 | will just generate the following files: 158 | 159 | * **\*\_output_distances.txt** is a plain text file containing the [road's width and fence-to-fence distance](#intro). 160 | 161 | * **\*\_output_times.txt** is a plain text file containing the inference times for each task of the pipeline. 162 | 163 | So no backend info (i.e., no 3D point clouds, which are just used in the backend to compute distances). 164 | 165 | Also, by running the following, SemanticDepth will be applied using the focal length provided as argument: 166 | 167 | `$ python semantic_depth.py --f=360` 168 | 169 | Other params: 170 | 171 | * `--input_frame=`: If set, the pipeline will only be applied to the indicated image 172 | * `--aproach=both`: If set to _both_, the road's width (rw) and the fence-to-fence distance (f2f) are computed. By setting it to _rw_ only the road's width will be computed. 173 | * `--is_city`: Must be set when we want to process an image from Cityscapes. It helps set the correct intrinsic camera parameters). 174 | 175 | #### I just want to test the system on a single image! 176 | 177 | Simply run: 178 | 179 | `python semantic_depth.py --input_frame=media/images/bielefeld_018644.png --save_data --is_city --f=580` 180 | 181 | The `--is_city` flag indicates the system that we are processing a Cityscapes frame. 182 | 183 | ### Test SemanticDepth on the Stuttgart video sequence from Cityscapes 184 | 185 | Download the Stuttgart sequence from [Cityscapes](https://www.cityscapes-dataset.com/login/). Extract all the _png_ images from the sequence (or just a subset of the sequence) into *data/stuttgart_video_test*. Then run: 186 | 187 | `$ python semantic_depth_cityscapes_sequence.py --verbose` 188 | 189 | By default, the _road's width_ will be computed, given that the Stuttgart sequence does not have walls/fences at each side of the road, as a Formula-E-like racetrack would, on which to compute our _fence-to-fence distance_. 190 | 191 | In the **results** folder (which will have been created in the root if you didn't have one yet) you will find a new folder named **stuttgart_video** containing two other directories, namely **result_sequence_imgs** and **result_sequence_ply**. The former contains the output images with the computed distances written on the frame; the latter contains the masked 3D point cloud on which we compute the road's width at a certain depth. 192 | 193 | You can then use the script [_create_video_from_frames.py_](utils/create_video_from_frames.py) inside **utils** to convert the list of images that have been just created (**result_sequence_imgs**) into _mp4_ format. 194 | 195 | 196 | 197 | ## 4. Semantic Segmentation Network 198 | 199 | The source files for the semantic segmentation network are under the folder [fcn8s](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf). There you can find an implementation of an [FCN-8s](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf) semantic segmenatation architecture. 200 | 201 | ### To train a new model we need to: 202 | 203 | * Make sure that your virtulenv is activated. Otherwise, run the following inside the root directory of your project (or wherever you have your virtual environment): 204 | 205 | `source .venv/bin/activate` 206 | 207 | * Then, change directories to [fcn8s](fcn8s) and execute the **fcn.py** file to train our FCN-8s implementation on a specified dataset (e.g., roborace750_mockup or Cityscapes) like so: 208 | 209 | ```bash 210 | $ cd fcn8s 211 | $ python fcn.py --dataset=roborace750_mockup --epochs=100 212 | ``` 213 | 214 | 215 | * After training is done, the following folders will have been created: 216 | 217 | - **../models/sem_seg**: contains the model which has been just trained 218 | 219 | - **log** (inside [fcn8s](fcn8s)): contains logging info about training: 220 | - loss vs epochs for training and validation sets 221 | - IoU vs epochs for training and validation sets 222 | 223 | ### Pretrained Model for Semantic Segmentation on _fences_ and _road_ 224 | 225 | Under request at pablo.rodriguez-palafox@tum.de. See [models/get_sem_seg_models.md](models/get_sem_seg_models.md) for further details on how to get them. 226 | 227 | 228 | ### Test a model on the Roborace dataset's test set: 229 | 230 | Check that you are inside the [fcn8s](fcn8s) directory. 231 | 232 | Within the virtual environment, execute the following to run inference on the test set of the dataset indicated in the `--dataset` argument by using a previously trained model, which will be asked automatically after running the following command: 233 | 234 | `$ python fcn.py --mode=test --dataset=roborace750_mockup` 235 | 236 | `Enter the name of the model you want to use in the format -Epochs-, e.g., 100-Epochs-roborace750` 237 | 238 | 239 | After testing is done, the following folder and files will have appeared in the same folder as the fcn.py file: 240 | 241 | * **runs**: contains the segmented images 242 | * **log//iou/test_set_iou_.txt**: contains the IoU metric for each image of the test set 243 | * **times.txt**: inference times for each image of the test set 244 | 245 | 246 | 247 | ## 5. Monocular Depth Estimation Network (monodepth) 248 | We use the network developed by Godard et al., called [MonoDepth](https://github.com/mrharicot/monodepth) (Copyright © Niantic, Inc. 2018. Patent Pending. All rights reserved.). 249 | 250 | 251 | ### Monodepth model (monocular depth estimation model trained on Cityscapes by [Godard](https://github.com/mrharicot/monodepth)) 252 | 253 | 254 | To download the [monodepth model](https://github.com/mrharicot/monodepth) trained on cityscapes by [Godard](https://github.com/mrharicot/monodepth), go to the [monodepth repo](https://github.com/mrharicot/monodepth) or run the following: 255 | 256 | ```bash 257 | $ cd models 258 | $ sudo chmod +x get_monodepth_model.sh 259 | $ ./get_monodepth_model.sh model_cityscapes ./monodepth/model_cityscapes 260 | ``` 261 | 262 | 263 | 264 | ## 6. License 265 | 266 | Files [fcn8s/fcn.py](fcn8s/fcn.py) and [fcn8s/helper.py](fcn8s/helper.py) are based on the [FCN-8s implementation by Udacity](https://github.com/udacity/CarND-Semantic-Segmentation), released under the [MIT License](https://opensource.org/licenses/MIT). 267 | 268 | The rest of the files in this project are released under a [GPLv3 License](https://www.gnu.org/licenses/gpl-3.0.en.html). 269 | 270 | Check the [LICENSE](LICENSE) for a detailed explanation on the licenses under which this work is released. 271 | -------------------------------------------------------------------------------- /assets/images/semanticdepth.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/assets/images/semanticdepth.jpg -------------------------------------------------------------------------------- /assets/images/test_munich/test_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/assets/images/test_munich/test_3.jpg -------------------------------------------------------------------------------- /assets/images/test_munich/test_3_ALL.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/assets/images/test_munich/test_3_ALL.jpg -------------------------------------------------------------------------------- /assets/images/test_munich/test_3_output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/assets/images/test_munich/test_3_output.jpg -------------------------------------------------------------------------------- /assets/images/test_munich/test_3_planes.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/assets/images/test_munich/test_3_planes.jpg -------------------------------------------------------------------------------- /assets/pdfs/semanticDepthPabloRodriguezPalafox.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/assets/pdfs/semanticDepthPabloRodriguezPalafox.pdf -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/test/berlin/berlin_00125_gtFine_labelIds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/gtFine/test/berlin/berlin_00125_gtFine_labelIds.png -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/test/berlin/berlin_00125_gtFine_polygons.json: -------------------------------------------------------------------------------- 1 | { 2 | "imgHeight": 1200, 3 | "imgWidth": 1600, 4 | "objects": [ 5 | { 6 | "date": "24-Feb-2018 17:32:53", 7 | "deleted": 0, 8 | "draw": true, 9 | "id": 0, 10 | "label": "road", 11 | "polygon": [ 12 | [ 13 | 0.0, 14 | 559.0308370044053 15 | ], 16 | [ 17 | 161.23348017621146, 18 | 552.4229074889868 19 | ], 20 | [ 21 | 1599.0, 22 | 582.8193832599119 23 | ], 24 | [ 25 | 1599.0, 26 | 1103.5242290748897 27 | ], 28 | [ 29 | 1517.180616740088, 30 | 1199.0 31 | ], 32 | [ 33 | 0.0, 34 | 1199.0 35 | ] 36 | ], 37 | "user": "pablo", 38 | "verified": 0 39 | }, 40 | { 41 | "date": "24-Feb-2018 17:33:53", 42 | "deleted": 0, 43 | "draw": true, 44 | "id": 1, 45 | "label": "fence", 46 | "polygon": [ 47 | [ 48 | 0.0, 49 | 613.215859030837 50 | ], 51 | [ 52 | 29.074889867841406, 53 | 605.2863436123348 54 | ], 55 | [ 56 | 50.220264317180614, 57 | 600.0 58 | ], 59 | [ 60 | 64.75770925110132, 61 | 596.0352422907489 62 | ], 63 | [ 64 | 71.36563876651982, 65 | 589.4273127753304 66 | ], 67 | [ 68 | 75.33039647577093, 69 | 584.1409691629956 70 | ], 71 | [ 72 | 75.33039647577093, 73 | 562.9955947136564 74 | ], 75 | [ 76 | 163.87665198237883, 77 | 562.9955947136564 78 | ], 79 | [ 80 | 229.95594713656385, 81 | 564.3171806167401 82 | ], 83 | [ 84 | 343.6123348017621, 85 | 565.6387665198238 86 | ], 87 | [ 88 | 453.30396475770925, 89 | 566.9603524229075 90 | ], 91 | [ 92 | 453.30396475770925, 93 | 589.4273127753304 94 | ], 95 | [ 96 | 490.3083700440528, 97 | 593.3920704845815 98 | ], 99 | [ 100 | 874.8898678414097, 101 | 625.1101321585903 102 | ], 103 | [ 104 | 892.0704845814978, 105 | 626.431718061674 106 | ], 107 | [ 108 | 1525.1101321585902, 109 | 671.3656387665197 110 | ], 111 | [ 112 | 1599.0, 113 | 675.3303964757708 114 | ], 115 | [ 116 | 1599.0, 117 | 0.0 118 | ], 119 | [ 120 | 853.7444933920705, 121 | 0.0 122 | ], 123 | [ 124 | 704.4052863436123, 125 | 67.40088105726872 126 | ], 127 | [ 128 | 674.0088105726871, 129 | 108.37004405286343 130 | ], 131 | [ 132 | 637.0044052863436, 133 | 130.83700440528634 134 | ], 135 | [ 136 | 623.7885462555066, 137 | 133.48017621145374 138 | ], 139 | [ 140 | 615.8590308370044, 141 | 457.2687224669603 142 | ], 143 | [ 144 | 615.8590308370044, 145 | 499.55947136563873 146 | ], 147 | [ 148 | 395.1541850220264, 149 | 515.4185022026431 150 | ], 151 | [ 152 | 120.26431718061673, 153 | 535.2422907488987 154 | ], 155 | [ 156 | 75.33039647577093, 157 | 533.920704845815 158 | ], 159 | [ 160 | 75.33039647577093, 161 | 519.3832599118942 162 | ], 163 | [ 164 | 72.68722466960352, 165 | 514.0969162995594 166 | ], 167 | [ 168 | 70.04405286343612, 169 | 504.84581497797353 170 | ], 171 | [ 172 | 72.68722466960352, 173 | 459.9118942731277 174 | ], 175 | [ 176 | 87.22466960352422, 177 | 429.51541850220264 178 | ], 179 | [ 180 | 88.54625550660792, 181 | 360.7929515418502 182 | ], 183 | [ 184 | 50.220264317180614, 185 | 268.2819383259912 186 | ], 187 | [ 188 | 0.0, 189 | 208.81057268722466 190 | ] 191 | ], 192 | "user": "pablo", 193 | "verified": 0 194 | } 195 | ] 196 | } -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/test/berlin/berlin_00126_gtFine_labelIds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/gtFine/test/berlin/berlin_00126_gtFine_labelIds.png -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/test/berlin/berlin_00126_gtFine_polygons.json: -------------------------------------------------------------------------------- 1 | { 2 | "imgHeight": 1200, 3 | "imgWidth": 1600, 4 | "objects": [ 5 | { 6 | "date": "24-Feb-2018 17:34:25", 7 | "deleted": 0, 8 | "draw": true, 9 | "id": 0, 10 | "label": "road", 11 | "polygon": [ 12 | [ 13 | 0.0, 14 | 552.4229074889868 15 | ], 16 | [ 17 | 594.7136563876652, 18 | 549.7797356828194 19 | ], 20 | [ 21 | 806.1674008810572, 22 | 553.7444933920705 23 | ], 24 | [ 25 | 1599.0, 26 | 551.1013215859031 27 | ], 28 | [ 29 | 1599.0, 30 | 1103.5242290748897 31 | ], 32 | [ 33 | 1523.7885462555066, 34 | 1199.0 35 | ], 36 | [ 37 | 0.0, 38 | 1199.0 39 | ] 40 | ], 41 | "user": "pablo", 42 | "verified": 0 43 | }, 44 | { 45 | "date": "24-Feb-2018 17:36:42", 46 | "deleted": 0, 47 | "draw": true, 48 | "id": 1, 49 | "label": "fence", 50 | "polygon": [ 51 | [ 52 | 0.0, 53 | 581.4977973568282 54 | ], 55 | [ 56 | 15.859030837004404, 57 | 580.1762114537445 58 | ], 59 | [ 60 | 15.859030837004404, 61 | 573.568281938326 62 | ], 63 | [ 64 | 29.074889867841406, 65 | 572.2466960352423 66 | ], 67 | [ 68 | 130.83700440528634, 69 | 569.6035242290749 70 | ], 71 | [ 72 | 215.41850220264317, 73 | 568.2819383259912 74 | ], 75 | [ 76 | 294.7136563876652, 77 | 566.9603524229075 78 | ], 79 | [ 80 | 396.4757709251101, 81 | 564.3171806167401 82 | ], 83 | [ 84 | 434.80176211453744, 85 | 562.9955947136564 86 | ], 87 | [ 88 | 454.62555066079295, 89 | 562.9955947136564 90 | ], 91 | [ 92 | 494.2731277533039, 93 | 561.6740088105727 94 | ], 95 | [ 96 | 531.2775330396476, 97 | 560.352422907489 98 | ], 99 | [ 100 | 570.9251101321586, 101 | 557.7092511013216 102 | ], 103 | [ 104 | 582.8193832599119, 105 | 556.3876651982379 106 | ], 107 | [ 108 | 592.0704845814978, 109 | 555.0660792951542 110 | ], 111 | [ 112 | 654.1850220264316, 113 | 555.0660792951542 114 | ], 115 | [ 116 | 679.2951541850219, 117 | 555.0660792951542 118 | ], 119 | [ 120 | 692.511013215859, 121 | 556.3876651982379 122 | ], 123 | [ 124 | 742.7312775330396, 125 | 557.7092511013216 126 | ], 127 | [ 128 | 745.374449339207, 129 | 560.352422907489 130 | ], 131 | [ 132 | 798.237885462555, 133 | 564.3171806167401 134 | ], 135 | [ 136 | 800.8810572687224, 137 | 564.3171806167401 138 | ], 139 | [ 140 | 873.568281938326, 141 | 569.6035242290749 142 | ], 143 | [ 144 | 886.784140969163, 145 | 572.2466960352423 146 | ], 147 | [ 148 | 1071.806167400881, 149 | 593.3920704845815 150 | ], 151 | [ 152 | 1599.0, 153 | 654.1850220264316 154 | ], 155 | [ 156 | 1599.0, 157 | 0.0 158 | ], 159 | [ 160 | 1379.735682819383, 161 | 0.0 162 | ], 163 | [ 164 | 1173.568281938326, 165 | 157.26872246696036 166 | ], 167 | [ 168 | 1050.6607929515417, 169 | 269.6035242290749 170 | ], 171 | [ 172 | 823.3480176211453, 173 | 471.806167400881 174 | ], 175 | [ 176 | 794.2731277533039, 177 | 488.9867841409691 178 | ], 179 | [ 180 | 758.590308370044, 181 | 495.5947136563876 182 | ], 183 | [ 184 | 672.6872246696034, 185 | 525.9911894273127 186 | ], 187 | [ 188 | 580.1762114537445, 189 | 527.3127753303964 190 | ], 191 | [ 192 | 561.6740088105727, 193 | 519.3832599118942 194 | ], 195 | [ 196 | 504.84581497797353, 197 | 504.84581497797353 198 | ], 199 | [ 200 | 333.03964757709247, 201 | 469.1629955947136 202 | ], 203 | [ 204 | 15.859030837004404, 205 | 422.90748898678413 206 | ], 207 | [ 208 | 14.537444933920703, 209 | 407.0484581497797 210 | ], 211 | [ 212 | 31.71806167400881, 213 | 383.2599118942731 214 | ], 215 | [ 216 | 1.3215859030837003, 217 | 374.0088105726872 218 | ] 219 | ], 220 | "user": "pablo", 221 | "verified": 0 222 | } 223 | ] 224 | } -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/train/montreal/montreal_00000_gtFine_labelIds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/gtFine/train/montreal/montreal_00000_gtFine_labelIds.png -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/train/montreal/montreal_00000_gtFine_polygons.json: -------------------------------------------------------------------------------- 1 | { 2 | "imgHeight": 1200, 3 | "imgWidth": 1600, 4 | "objects": [ 5 | { 6 | "date": "15-Jan-2018 14:19:52", 7 | "deleted": 0, 8 | "draw": true, 9 | "id": 0, 10 | "label": "road", 11 | "polygon": [ 12 | [ 13 | 1599.0, 14 | 633.3333333333333 15 | ], 16 | [ 17 | 933.3333333333333, 18 | 558.974358974359 19 | ], 20 | [ 21 | 387.1794871794872, 22 | 538.4615384615385 23 | ], 24 | [ 25 | 0.0, 26 | 548.7179487179487 27 | ], 28 | [ 29 | 0.0, 30 | 1199.0 31 | ], 32 | [ 33 | 1516.6666666666665, 34 | 1199.0 35 | ], 36 | [ 37 | 1599.0, 38 | 1089.7435897435896 39 | ] 40 | ], 41 | "user": "pablo", 42 | "verified": 0 43 | }, 44 | { 45 | "date": "15-Jan-2018 14:20:34", 46 | "deleted": 0, 47 | "draw": true, 48 | "id": 1, 49 | "label": "fence", 50 | "polygon": [ 51 | [ 52 | 1599.0, 53 | 671.7948717948718 54 | ], 55 | [ 56 | 983.3333333333333, 57 | 598.7179487179487 58 | ], 59 | [ 60 | 310.2564102564102, 61 | 584.6153846153846 62 | ], 63 | [ 64 | 0.0, 65 | 589.7435897435897 66 | ], 67 | [ 68 | 0.0, 69 | 546.1538461538462 70 | ], 71 | [ 72 | 330.7692307692308, 73 | 542.3076923076923 74 | ], 75 | [ 76 | 332.05128205128204, 77 | 423.07692307692304 78 | ], 79 | [ 80 | 451.28205128205127, 81 | 407.6923076923077 82 | ], 83 | [ 84 | 973.0769230769231, 85 | 405.12820512820514 86 | ], 87 | [ 88 | 964.1025641025641, 89 | 396.15384615384613 90 | ], 91 | [ 92 | 1573.076923076923, 93 | 1.282051282051282 94 | ], 95 | [ 96 | 1599.0, 97 | 0.0 98 | ] 99 | ], 100 | "user": "pablo", 101 | "verified": 0 102 | } 103 | ] 104 | } -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/train/montreal/montreal_00001_gtFine_labelIds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/gtFine/train/montreal/montreal_00001_gtFine_labelIds.png -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/train/montreal/montreal_00001_gtFine_polygons.json: -------------------------------------------------------------------------------- 1 | { 2 | "imgHeight": 1200, 3 | "imgWidth": 1600, 4 | "objects": [ 5 | { 6 | "date": "15-Jan-2018 14:20:57", 7 | "deleted": 0, 8 | "draw": true, 9 | "id": 0, 10 | "label": "road", 11 | "polygon": [ 12 | [ 13 | 0.0, 14 | 555.1282051282051 15 | ], 16 | [ 17 | 1599.0, 18 | 575.6410256410256 19 | ], 20 | [ 21 | 1599.0, 22 | 1110.2564102564102 23 | ], 24 | [ 25 | 1521.7948717948718, 26 | 1199.0 27 | ], 28 | [ 29 | 0.0, 30 | 1199.0 31 | ] 32 | ], 33 | "user": "pablo", 34 | "verified": 0 35 | }, 36 | { 37 | "date": "15-Jan-2018 14:21:22", 38 | "deleted": 0, 39 | "draw": true, 40 | "id": 1, 41 | "label": "fence", 42 | "polygon": [ 43 | [ 44 | 0.0, 45 | 516.6666666666666 46 | ], 47 | [ 48 | 0.0, 49 | 596.1538461538462 50 | ], 51 | [ 52 | 591.025641025641, 53 | 606.4102564102564 54 | ], 55 | [ 56 | 614.1025641025641, 57 | 616.6666666666666 58 | ], 59 | [ 60 | 1024.3589743589744, 61 | 605.1282051282051 62 | ], 63 | [ 64 | 1175.6410256410256, 65 | 600.0 66 | ], 67 | [ 68 | 1599.0, 69 | 587.1794871794872 70 | ], 71 | [ 72 | 1599.0, 73 | 387.1794871794872 74 | ], 75 | [ 76 | 1369.2307692307693, 77 | 379.48717948717945 78 | ], 79 | [ 80 | 683.3333333333333, 81 | 429.48717948717945 82 | ] 83 | ], 84 | "user": "pablo", 85 | "verified": 0 86 | } 87 | ] 88 | } -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/train/montreal/montreal_00002_gtFine_labelIds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/gtFine/train/montreal/montreal_00002_gtFine_labelIds.png -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/train/montreal/montreal_00002_gtFine_polygons.json: -------------------------------------------------------------------------------- 1 | { 2 | "imgHeight": 1200, 3 | "imgWidth": 1600, 4 | "objects": [ 5 | { 6 | "date": "15-Jan-2018 14:21:48", 7 | "deleted": 0, 8 | "draw": true, 9 | "id": 0, 10 | "label": "road", 11 | "polygon": [ 12 | [ 13 | 0.0, 14 | 576.9230769230769 15 | ], 16 | [ 17 | 1557.6923076923076, 18 | 588.4615384615385 19 | ], 20 | [ 21 | 1562.8205128205127, 22 | 594.8717948717948 23 | ], 24 | [ 25 | 1562.8205128205127, 26 | 591.025641025641 27 | ], 28 | [ 29 | 1576.923076923077, 30 | 592.3076923076923 31 | ], 32 | [ 33 | 1593.5897435897436, 34 | 593.5897435897435 35 | ], 36 | [ 37 | 1599.0, 38 | 594.8717948717948 39 | ], 40 | [ 41 | 1599.0, 42 | 1107.6923076923076 43 | ], 44 | [ 45 | 1505.128205128205, 46 | 1199.0 47 | ], 48 | [ 49 | 0.0, 50 | 1199.0 51 | ] 52 | ], 53 | "user": "pablo", 54 | "verified": 0 55 | }, 56 | { 57 | "date": "15-Jan-2018 14:22:33", 58 | "deleted": 0, 59 | "draw": true, 60 | "id": 1, 61 | "label": "fence", 62 | "polygon": [ 63 | [ 64 | 0.0, 65 | 605.1282051282051 66 | ], 67 | [ 68 | 106.41025641025641, 69 | 601.2820512820513 70 | ], 71 | [ 72 | 233.33333333333331, 73 | 602.5641025641025 74 | ], 75 | [ 76 | 334.6153846153846, 77 | 601.2820512820513 78 | ], 79 | [ 80 | 562.8205128205128, 81 | 598.7179487179487 82 | ], 83 | [ 84 | 982.051282051282, 85 | 606.4102564102564 86 | ], 87 | [ 88 | 1364.102564102564, 89 | 607.6923076923076 90 | ], 91 | [ 92 | 1488.4615384615383, 93 | 601.2820512820513 94 | ], 95 | [ 96 | 1519.2307692307693, 97 | 601.2820512820513 98 | ], 99 | [ 100 | 1541.0256410256409, 101 | 601.2820512820513 102 | ], 103 | [ 104 | 1553.8461538461538, 105 | 601.2820512820513 106 | ], 107 | [ 108 | 1558.974358974359, 109 | 600.0 110 | ], 111 | [ 112 | 1564.102564102564, 113 | 589.7435897435897 114 | ], 115 | [ 116 | 1561.5384615384614, 117 | 552.5641025641025 118 | ], 119 | [ 120 | 1562.8205128205127, 121 | 550.0 122 | ], 123 | [ 124 | 1561.5384615384614, 125 | 546.1538461538462 126 | ], 127 | [ 128 | 1557.6923076923076, 129 | 543.5897435897435 130 | ], 131 | [ 132 | 1248.7179487179487, 133 | 502.56410256410254 134 | ], 135 | [ 136 | 1247.4358974358975, 137 | 455.12820512820514 138 | ], 139 | [ 140 | 856.4102564102564, 141 | 397.4358974358974 142 | ], 143 | [ 144 | 694.8717948717948, 145 | 375.64102564102564 146 | ], 147 | [ 148 | 462.8205128205128, 149 | 356.4102564102564 150 | ], 151 | [ 152 | 208.97435897435898, 153 | 343.5897435897436 154 | ], 155 | [ 156 | 0.0, 157 | 343.5897435897436 158 | ] 159 | ], 160 | "user": "pablo", 161 | "verified": 0 162 | }, 163 | { 164 | "date": "16-Jan-2018 15:16:32", 165 | "deleted": 0, 166 | "draw": true, 167 | "id": 2, 168 | "label": "fence", 169 | "polygon": [ 170 | [ 171 | 1564.102564102564, 172 | 591.025641025641 173 | ], 174 | [ 175 | 1571.7948717948718, 176 | 592.3076923076923 177 | ], 178 | [ 179 | 1583.3333333333333, 180 | 593.5897435897435 181 | ], 182 | [ 183 | 1599.0, 184 | 593.5897435897435 185 | ], 186 | [ 187 | 1599.0, 188 | 593.5897435897435 189 | ], 190 | [ 191 | 1599.0, 192 | 565.3846153846154 193 | ], 194 | [ 195 | 1575.6410256410256, 196 | 564.1025641025641 197 | ], 198 | [ 199 | 1567.9487179487178, 200 | 564.1025641025641 201 | ], 202 | [ 203 | 1562.8205128205127, 204 | 564.1025641025641 205 | ] 206 | ], 207 | "user": "pablo", 208 | "verified": 0 209 | } 210 | ] 211 | } -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/train/newyork/newyork_00000_gtFine_labelIds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/gtFine/train/newyork/newyork_00000_gtFine_labelIds.png -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/train/newyork/newyork_00000_gtFine_polygons.json: -------------------------------------------------------------------------------- 1 | { 2 | "imgHeight": 1200, 3 | "imgWidth": 1600, 4 | "objects": [ 5 | { 6 | "date": "19-Jan-2018 13:01:05", 7 | "deleted": 0, 8 | "draw": true, 9 | "id": 0, 10 | "label": "road", 11 | "polygon": [ 12 | [ 13 | 0.0, 14 | 570.5128205128204 15 | ], 16 | [ 17 | 405.12820512820514, 18 | 538.4615384615385 19 | ], 20 | [ 21 | 702.5641025641025, 22 | 537.1794871794872 23 | ], 24 | [ 25 | 1200.0, 26 | 552.5641025641025 27 | ], 28 | [ 29 | 1599.0, 30 | 562.8205128205128 31 | ], 32 | [ 33 | 1599.0, 34 | 1085.8974358974358 35 | ], 36 | [ 37 | 1539.7435897435896, 38 | 1199.0 39 | ], 40 | [ 41 | 0.0, 42 | 1199.0 43 | ] 44 | ], 45 | "user": "pablo", 46 | "verified": 0 47 | }, 48 | { 49 | "date": "19-Jan-2018 13:01:42", 50 | "deleted": 0, 51 | "draw": true, 52 | "id": 1, 53 | "label": "fence", 54 | "polygon": [ 55 | [ 56 | 0.0, 57 | 638.4615384615385 58 | ], 59 | [ 60 | 374.35897435897436, 61 | 621.7948717948718 62 | ], 63 | [ 64 | 405.12820512820514, 65 | 611.5384615384615 66 | ], 67 | [ 68 | 492.30769230769226, 69 | 607.6923076923076 70 | ], 71 | [ 72 | 403.8461538461538, 73 | 594.8717948717948 74 | ], 75 | [ 76 | 402.56410256410254, 77 | 417.94871794871796 78 | ], 79 | [ 80 | 394.87179487179486, 81 | 411.53846153846155 82 | ], 83 | [ 84 | 410.2564102564102, 85 | 217.94871794871793 86 | ], 87 | [ 88 | 421.79487179487177, 89 | 171.7948717948718 90 | ], 91 | [ 92 | 170.5128205128205, 93 | 152.56410256410257 94 | ], 95 | [ 96 | 0.0, 97 | 147.43589743589743 98 | ] 99 | ], 100 | "user": "pablo", 101 | "verified": 0 102 | }, 103 | { 104 | "date": "19-Jan-2018 13:02:23", 105 | "deleted": 0, 106 | "draw": true, 107 | "id": 2, 108 | "label": "fence", 109 | "polygon": [ 110 | [ 111 | 1599.0, 112 | 630.7692307692307 113 | ], 114 | [ 115 | 1191.0256410256409, 116 | 602.5641025641025 117 | ], 118 | [ 119 | 958.9743589743589, 120 | 591.025641025641 121 | ], 122 | [ 123 | 907.6923076923076, 124 | 588.4615384615385 125 | ], 126 | [ 127 | 761.5384615384615, 128 | 587.1794871794872 129 | ], 130 | [ 131 | 594.8717948717948, 132 | 582.051282051282 133 | ], 134 | [ 135 | 489.7435897435897, 136 | 578.2051282051282 137 | ], 138 | [ 139 | 430.7692307692308, 140 | 576.9230769230769 141 | ], 142 | [ 143 | 403.8461538461538, 144 | 575.6410256410256 145 | ], 146 | [ 147 | 405.12820512820514, 148 | 417.94871794871796 149 | ], 150 | [ 151 | 515.3846153846154, 152 | 412.8205128205128 153 | ], 154 | [ 155 | 637.1794871794872, 156 | 414.1025641025641 157 | ], 158 | [ 159 | 750.0, 160 | 408.97435897435895 161 | ], 162 | [ 163 | 867.9487179487179, 164 | 407.6923076923077 165 | ], 166 | [ 167 | 870.5128205128204, 168 | 405.12820512820514 169 | ], 170 | [ 171 | 870.5128205128204, 172 | 362.8205128205128 173 | ], 174 | [ 175 | 878.2051282051282, 176 | 366.66666666666663 177 | ], 178 | [ 179 | 1002.5641025641025, 180 | 369.2307692307692 181 | ], 182 | [ 183 | 1032.051282051282, 184 | 361.53846153846155 185 | ], 186 | [ 187 | 1598.7179487179487, 188 | 192.3076923076923 189 | ] 190 | ], 191 | "user": "pablo", 192 | "verified": 0 193 | } 194 | ] 195 | } -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/train/newyork/newyork_00001_gtFine_labelIds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/gtFine/train/newyork/newyork_00001_gtFine_labelIds.png -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/train/newyork/newyork_00001_gtFine_polygons.json: -------------------------------------------------------------------------------- 1 | { 2 | "imgHeight": 1200, 3 | "imgWidth": 1600, 4 | "objects": [ 5 | { 6 | "date": "19-Jan-2018 13:02:40", 7 | "deleted": 0, 8 | "draw": true, 9 | "id": 0, 10 | "label": "road", 11 | "polygon": [ 12 | [ 13 | 0.0, 14 | 561.5384615384615 15 | ], 16 | [ 17 | 788.4615384615385, 18 | 566.6666666666666 19 | ], 20 | [ 21 | 835.8974358974359, 22 | 565.3846153846154 23 | ], 24 | [ 25 | 892.3076923076923, 26 | 569.2307692307692 27 | ], 28 | [ 29 | 978.2051282051282, 30 | 570.5128205128204 31 | ], 32 | [ 33 | 1599.0, 34 | 578.2051282051282 35 | ], 36 | [ 37 | 1599.0, 38 | 1082.051282051282 39 | ], 40 | [ 41 | 1529.4871794871794, 42 | 1199.0 43 | ], 44 | [ 45 | 0.0, 46 | 1199.0 47 | ] 48 | ], 49 | "user": "pablo", 50 | "verified": 0 51 | }, 52 | { 53 | "date": "19-Jan-2018 13:03:31", 54 | "deleted": 0, 55 | "draw": true, 56 | "id": 1, 57 | "label": "fence", 58 | "polygon": [ 59 | [ 60 | 0.0, 61 | 741.025641025641 62 | ], 63 | [ 64 | 734.6153846153846, 65 | 589.7435897435897 66 | ], 67 | [ 68 | 748.7179487179487, 69 | 587.1794871794872 70 | ], 71 | [ 72 | 767.9487179487179, 73 | 582.051282051282 74 | ], 75 | [ 76 | 775.6410256410256, 77 | 582.051282051282 78 | ], 79 | [ 80 | 783.3333333333333, 81 | 579.4871794871794 82 | ], 83 | [ 84 | 791.025641025641, 85 | 576.9230769230769 86 | ], 87 | [ 88 | 791.025641025641, 89 | 575.6410256410256 90 | ], 91 | [ 92 | 873.0769230769231, 93 | 574.3589743589744 94 | ], 95 | [ 96 | 896.1538461538461, 97 | 576.9230769230769 98 | ], 99 | [ 100 | 897.4358974358975, 101 | 576.9230769230769 102 | ], 103 | [ 104 | 897.4358974358975, 105 | 565.3846153846154 106 | ], 107 | [ 108 | 888.4615384615385, 109 | 564.1025641025641 110 | ], 111 | [ 112 | 874.3589743589744, 113 | 562.8205128205128 114 | ], 115 | [ 116 | 867.9487179487179, 117 | 561.5384615384615 118 | ], 119 | [ 120 | 867.9487179487179, 121 | 553.8461538461538 122 | ], 123 | [ 124 | 825.6410256410256, 125 | 551.2820512820513 126 | ], 127 | [ 128 | 789.7435897435897, 129 | 550.0 130 | ], 131 | [ 132 | 788.4615384615385, 133 | 539.7435897435897 134 | ], 135 | [ 136 | 793.5897435897435, 137 | 535.8974358974359 138 | ], 139 | [ 140 | 789.7435897435897, 141 | 532.051282051282 142 | ], 143 | [ 144 | 782.051282051282, 145 | 523.0769230769231 146 | ], 147 | [ 148 | 773.0769230769231, 149 | 521.7948717948718 150 | ], 151 | [ 152 | 769.2307692307692, 153 | 520.5128205128204 154 | ], 155 | [ 156 | 769.2307692307692, 157 | 497.4358974358974 158 | ], 159 | [ 160 | 771.7948717948718, 161 | 493.5897435897436 162 | ], 163 | [ 164 | 775.6410256410256, 165 | 488.46153846153845 166 | ], 167 | [ 168 | 757.6923076923076, 169 | 457.6923076923077 170 | ], 171 | [ 172 | 739.7435897435897, 173 | 419.2307692307692 174 | ], 175 | [ 176 | 735.8974358974359, 177 | 407.6923076923077 178 | ], 179 | [ 180 | 728.2051282051282, 181 | 385.8974358974359 182 | ], 183 | [ 184 | 724.3589743589744, 185 | 383.3333333333333 186 | ], 187 | [ 188 | 664.1025641025641, 189 | 247.43589743589743 190 | ], 191 | [ 192 | 552.5641025641025, 193 | 0.0 194 | ], 195 | [ 196 | 0.0, 197 | 0.0 198 | ] 199 | ], 200 | "user": "pablo", 201 | "verified": 0 202 | }, 203 | { 204 | "date": "19-Jan-2018 13:04:05", 205 | "deleted": 0, 206 | "draw": true, 207 | "id": 2, 208 | "label": "fence", 209 | "polygon": [ 210 | [ 211 | 1599.0, 212 | 621.7948717948718 213 | ], 214 | [ 215 | 1260.2564102564102, 216 | 602.5641025641025 217 | ], 218 | [ 219 | 1038.4615384615383, 220 | 589.7435897435897 221 | ], 222 | [ 223 | 1000.0, 224 | 585.8974358974359 225 | ], 226 | [ 227 | 944.8717948717948, 228 | 582.051282051282 229 | ], 230 | [ 231 | 941.025641025641, 232 | 580.7692307692307 233 | ], 234 | [ 235 | 919.2307692307692, 236 | 578.2051282051282 237 | ], 238 | [ 239 | 898.7179487179487, 240 | 578.2051282051282 241 | ], 242 | [ 243 | 898.7179487179487, 244 | 547.4358974358975 245 | ], 246 | [ 247 | 893.5897435897435, 248 | 546.1538461538462 249 | ], 250 | [ 251 | 894.8717948717948, 252 | 533.3333333333334 253 | ], 254 | [ 255 | 902.5641025641025, 256 | 533.3333333333334 257 | ], 258 | [ 259 | 903.8461538461538, 260 | 539.7435897435897 261 | ], 262 | [ 263 | 928.2051282051282, 264 | 532.051282051282 265 | ], 266 | [ 267 | 989.7435897435897, 268 | 515.3846153846154 269 | ], 270 | [ 271 | 1001.2820512820513, 272 | 511.53846153846155 273 | ], 274 | [ 275 | 1051.2820512820513, 276 | 496.15384615384613 277 | ], 278 | [ 279 | 1599.0, 280 | 321.79487179487177 281 | ] 282 | ], 283 | "user": "pablo", 284 | "verified": 0 285 | } 286 | ] 287 | } -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/train/newyork/newyork_00002_gtFine_labelIds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/gtFine/train/newyork/newyork_00002_gtFine_labelIds.png -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/train/newyork/newyork_00002_gtFine_polygons.json: -------------------------------------------------------------------------------- 1 | { 2 | "imgHeight": 1200, 3 | "imgWidth": 1600, 4 | "objects": [ 5 | { 6 | "date": "19-Jan-2018 13:04:29", 7 | "deleted": 0, 8 | "draw": true, 9 | "id": 0, 10 | "label": "road", 11 | "polygon": [ 12 | [ 13 | 0.0, 14 | 615.3846153846154 15 | ], 16 | [ 17 | 320.5128205128205, 18 | 567.9487179487179 19 | ], 20 | [ 21 | 507.6923076923077, 22 | 558.974358974359 23 | ], 24 | [ 25 | 771.7948717948718, 26 | 516.6666666666666 27 | ], 28 | [ 29 | 1426.923076923077, 30 | 537.1794871794872 31 | ], 32 | [ 33 | 1599.0, 34 | 588.4615384615385 35 | ], 36 | [ 37 | 1599.0, 38 | 1083.3333333333333 39 | ], 40 | [ 41 | 1523.076923076923, 42 | 1199.0 43 | ], 44 | [ 45 | 0.0, 46 | 1199.0 47 | ] 48 | ], 49 | "user": "pablo", 50 | "verified": 0 51 | }, 52 | { 53 | "date": "19-Jan-2018 13:05:16", 54 | "deleted": 0, 55 | "draw": true, 56 | "id": 1, 57 | "label": "fence", 58 | "polygon": [ 59 | [ 60 | 0.0, 61 | 660.2564102564103 62 | ], 63 | [ 64 | 133.33333333333334, 65 | 646.1538461538461 66 | ], 67 | [ 68 | 262.8205128205128, 69 | 630.7692307692307 70 | ], 71 | [ 72 | 264.1025641025641, 73 | 594.8717948717948 74 | ], 75 | [ 76 | 302.56410256410254, 77 | 592.3076923076923 78 | ], 79 | [ 80 | 424.35897435897436, 81 | 587.1794871794872 82 | ], 83 | [ 84 | 470.5128205128205, 85 | 582.051282051282 86 | ], 87 | [ 88 | 505.12820512820514, 89 | 579.4871794871794 90 | ], 91 | [ 92 | 529.4871794871794, 93 | 574.3589743589744 94 | ], 95 | [ 96 | 537.1794871794872, 97 | 570.5128205128204 98 | ], 99 | [ 100 | 537.1794871794872, 101 | 537.1794871794872 102 | ], 103 | [ 104 | 537.1794871794872, 105 | 443.5897435897436 106 | ], 107 | [ 108 | 547.4358974358975, 109 | 432.05128205128204 110 | ], 111 | [ 112 | 547.4358974358975, 113 | 421.79487179487177 114 | ], 115 | [ 116 | 539.7435897435897, 117 | 415.38461538461536 118 | ], 119 | [ 120 | 507.6923076923077, 121 | 387.1794871794872 122 | ], 123 | [ 124 | 503.8461538461538, 125 | 379.48717948717945 126 | ], 127 | [ 128 | 391.025641025641, 129 | 311.53846153846155 130 | ], 131 | [ 132 | 332.05128205128204, 133 | 275.64102564102564 134 | ], 135 | [ 136 | 276.9230769230769, 137 | 250.0 138 | ], 139 | [ 140 | 282.05128205128204, 141 | 193.5897435897436 142 | ], 143 | [ 144 | 320.5128205128205, 145 | 148.7179487179487 146 | ], 147 | [ 148 | 121.7948717948718, 149 | 0.0 150 | ], 151 | [ 152 | 0.0, 153 | 0.0 154 | ] 155 | ], 156 | "user": "pablo", 157 | "verified": 0 158 | }, 159 | { 160 | "date": "19-Jan-2018 13:05:57", 161 | "deleted": 0, 162 | "draw": true, 163 | "id": 2, 164 | "label": "fence", 165 | "polygon": [ 166 | [ 167 | 1599.0, 168 | 625.6410256410256 169 | ], 170 | [ 171 | 1196.1538461538462, 172 | 598.7179487179487 173 | ], 174 | [ 175 | 765.3846153846154, 176 | 573.0769230769231 177 | ], 178 | [ 179 | 598.7179487179487, 180 | 565.3846153846154 181 | ], 182 | [ 183 | 537.1794871794872, 184 | 564.1025641025641 185 | ], 186 | [ 187 | 537.1794871794872, 188 | 523.0769230769231 189 | ], 190 | [ 191 | 539.7435897435897, 192 | 517.9487179487179 193 | ], 194 | [ 195 | 547.4358974358975, 196 | 523.0769230769231 197 | ], 198 | [ 199 | 584.6153846153846, 200 | 517.9487179487179 201 | ], 202 | [ 203 | 634.6153846153846, 204 | 510.2564102564102 205 | ], 206 | [ 207 | 655.1282051282051, 208 | 510.2564102564102 209 | ], 210 | [ 211 | 739.7435897435897, 212 | 501.28205128205127 213 | ], 214 | [ 215 | 864.1025641025641, 216 | 482.05128205128204 217 | ], 218 | [ 219 | 1024.3589743589744, 220 | 450.0 221 | ], 222 | [ 223 | 1599.0, 224 | 279.4871794871795 225 | ] 226 | ], 227 | "user": "pablo", 228 | "verified": 0 229 | } 230 | ] 231 | } -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/val/berlin/berlin_00000_gtFine_labelIds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/gtFine/val/berlin/berlin_00000_gtFine_labelIds.png -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/val/berlin/berlin_00000_gtFine_polygons.json: -------------------------------------------------------------------------------- 1 | { 2 | "imgHeight": 1200, 3 | "imgWidth": 1600, 4 | "objects": [ 5 | { 6 | "date": "19-Jan-2018 15:50:25", 7 | "deleted": 0, 8 | "draw": true, 9 | "id": 0, 10 | "label": "road", 11 | "polygon": [ 12 | [ 13 | 0.0, 14 | 560.2564102564103 15 | ], 16 | [ 17 | 500.0, 18 | 551.2820512820513 19 | ], 20 | [ 21 | 1006.4102564102564, 22 | 557.6923076923076 23 | ], 24 | [ 25 | 1289.7435897435896, 26 | 560.2564102564103 27 | ], 28 | [ 29 | 1599.0, 30 | 607.6923076923076 31 | ], 32 | [ 33 | 1599.0, 34 | 1094.871794871795 35 | ], 36 | [ 37 | 1516.6666666666665, 38 | 1199.0 39 | ], 40 | [ 41 | 0.0, 42 | 1199.0 43 | ] 44 | ], 45 | "user": "pablo", 46 | "verified": 0 47 | }, 48 | { 49 | "date": "19-Jan-2018 15:51:57", 50 | "deleted": 0, 51 | "draw": true, 52 | "id": 1, 53 | "label": "fence", 54 | "polygon": [ 55 | [ 56 | 0.0, 57 | 591.025641025641 58 | ], 59 | [ 60 | 100.0, 61 | 587.1794871794872 62 | ], 63 | [ 64 | 212.82051282051282, 65 | 583.3333333333333 66 | ], 67 | [ 68 | 300.0, 69 | 580.7692307692307 70 | ], 71 | [ 72 | 373.07692307692304, 73 | 576.9230769230769 74 | ], 75 | [ 76 | 546.1538461538462, 77 | 573.0769230769231 78 | ], 79 | [ 80 | 616.6666666666666, 81 | 573.0769230769231 82 | ], 83 | [ 84 | 641.025641025641, 85 | 571.7948717948718 86 | ], 87 | [ 88 | 696.1538461538461, 89 | 571.7948717948718 90 | ], 91 | [ 92 | 864.1025641025641, 93 | 570.5128205128204 94 | ], 95 | [ 96 | 948.7179487179487, 97 | 569.2307692307692 98 | ], 99 | [ 100 | 989.7435897435897, 101 | 569.2307692307692 102 | ], 103 | [ 104 | 1003.8461538461538, 105 | 566.6666666666666 106 | ], 107 | [ 108 | 1028.2051282051282, 109 | 567.9487179487179 110 | ], 111 | [ 112 | 1038.4615384615383, 113 | 567.9487179487179 114 | ], 115 | [ 116 | 1038.4615384615383, 117 | 579.4871794871794 118 | ], 119 | [ 120 | 1042.3076923076924, 121 | 582.051282051282 122 | ], 123 | [ 124 | 1053.8461538461538, 125 | 583.3333333333333 126 | ], 127 | [ 128 | 1079.4871794871794, 129 | 587.1794871794872 130 | ], 131 | [ 132 | 1101.2820512820513, 133 | 589.7435897435897 134 | ], 135 | [ 136 | 1538.4615384615383, 137 | 628.2051282051282 138 | ], 139 | [ 140 | 1599.0, 141 | 633.3333333333333 142 | ], 143 | [ 144 | 1599.0, 145 | 182.05128205128204 146 | ], 147 | [ 148 | 1343.5897435897436, 149 | 266.6666666666667 150 | ], 151 | [ 152 | 1241.0256410256409, 153 | 330.7692307692308 154 | ], 155 | [ 156 | 1151.2820512820513, 157 | 357.6923076923077 158 | ], 159 | [ 160 | 1119.2307692307693, 161 | 387.1794871794872 162 | ], 163 | [ 164 | 1075.6410256410256, 165 | 403.8461538461538 166 | ], 167 | [ 168 | 1061.5384615384614, 169 | 423.07692307692304 170 | ], 171 | [ 172 | 1051.2820512820513, 173 | 433.3333333333333 174 | ], 175 | [ 176 | 1038.4615384615383, 177 | 435.89743589743586 178 | ], 179 | [ 180 | 1034.6153846153845, 181 | 450.0 182 | ], 183 | [ 184 | 1034.6153846153845, 185 | 460.2564102564102 186 | ], 187 | [ 188 | 1035.8974358974358, 189 | 473.07692307692304 190 | ], 191 | [ 192 | 1044.871794871795, 193 | 479.48717948717945 194 | ], 195 | [ 196 | 1043.5897435897436, 197 | 494.87179487179486 198 | ], 199 | [ 200 | 1041.0256410256409, 201 | 543.5897435897435 202 | ], 203 | [ 204 | 1035.8974358974358, 205 | 543.5897435897435 206 | ], 207 | [ 208 | 1032.051282051282, 209 | 537.1794871794872 210 | ], 211 | [ 212 | 1008.9743589743589, 213 | 529.4871794871794 214 | ], 215 | [ 216 | 1001.2820512820513, 217 | 533.3333333333334 218 | ], 219 | [ 220 | 978.2051282051282, 221 | 529.4871794871794 222 | ], 223 | [ 224 | 966.6666666666666, 225 | 532.051282051282 226 | ], 227 | [ 228 | 887.1794871794872, 229 | 521.7948717948718 230 | ], 231 | [ 232 | 889.7435897435897, 233 | 516.6666666666666 234 | ], 235 | [ 236 | 702.5641025641025, 237 | 498.7179487179487 238 | ], 239 | [ 240 | 648.7179487179487, 241 | 492.30769230769226 242 | ], 243 | [ 244 | 615.3846153846154, 245 | 492.30769230769226 246 | ], 247 | [ 248 | 434.6153846153846, 249 | 480.7692307692308 250 | ], 251 | [ 252 | 253.84615384615384, 253 | 475.64102564102564 254 | ], 255 | [ 256 | 244.87179487179486, 257 | 470.5128205128205 258 | ], 259 | [ 260 | 248.7179487179487, 261 | 462.8205128205128 262 | ], 263 | [ 264 | 256.4102564102564, 265 | 452.56410256410254 266 | ], 267 | [ 268 | 223.07692307692307, 269 | 446.15384615384613 270 | ], 271 | [ 272 | 198.7179487179487, 273 | 425.64102564102564 274 | ], 275 | [ 276 | 153.84615384615384, 277 | 423.07692307692304 278 | ], 279 | [ 280 | 116.66666666666666, 281 | 401.28205128205127 282 | ], 283 | [ 284 | 50.0, 285 | 387.1794871794872 286 | ], 287 | [ 288 | 0.0, 289 | 357.6923076923077 290 | ] 291 | ], 292 | "user": "pablo", 293 | "verified": 0 294 | } 295 | ] 296 | } -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/val/berlin/berlin_00001_gtFine_labelIds.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/gtFine/val/berlin/berlin_00001_gtFine_labelIds.png -------------------------------------------------------------------------------- /data/roborace750_mockup/gtFine/val/berlin/berlin_00001_gtFine_polygons.json: -------------------------------------------------------------------------------- 1 | { 2 | "imgHeight": 1200, 3 | "imgWidth": 1600, 4 | "objects": [ 5 | { 6 | "date": "19-Jan-2018 15:52:14", 7 | "deleted": 0, 8 | "draw": true, 9 | "id": 0, 10 | "label": "road", 11 | "polygon": [ 12 | [ 13 | 0.0, 14 | 615.3846153846154 15 | ], 16 | [ 17 | 232.05128205128204, 18 | 551.2820512820513 19 | ], 20 | [ 21 | 388.46153846153845, 22 | 537.1794871794872 23 | ], 24 | [ 25 | 1599.0, 26 | 552.5641025641025 27 | ], 28 | [ 29 | 1599.0, 30 | 1078.2051282051282 31 | ], 32 | [ 33 | 1525.6410256410256, 34 | 1199.0 35 | ], 36 | [ 37 | 0.0, 38 | 1199.0 39 | ] 40 | ], 41 | "user": "pablo", 42 | "verified": 0 43 | }, 44 | { 45 | "date": "19-Jan-2018 15:54:07", 46 | "deleted": 0, 47 | "draw": true, 48 | "id": 1, 49 | "label": "fence", 50 | "polygon": [ 51 | [ 52 | 0.0, 53 | 767.9487179487179 54 | ], 55 | [ 56 | 107.6923076923077, 57 | 739.7435897435897 58 | ], 59 | [ 60 | 34.61538461538461, 61 | 720.5128205128204 62 | ], 63 | [ 64 | 17.94871794871795, 65 | 715.3846153846154 66 | ], 67 | [ 68 | 3.846153846153846, 69 | 711.5384615384615 70 | ], 71 | [ 72 | 32.05128205128205, 73 | 703.8461538461538 74 | ], 75 | [ 76 | 117.94871794871794, 77 | 706.4102564102564 78 | ], 79 | [ 80 | 189.74358974358972, 81 | 703.8461538461538 82 | ], 83 | [ 84 | 241.02564102564102, 85 | 697.4358974358975 86 | ], 87 | [ 88 | 280.7692307692308, 89 | 687.1794871794872 90 | ], 91 | [ 92 | 258.97435897435895, 93 | 684.6153846153846 94 | ], 95 | [ 96 | 157.69230769230768, 97 | 670.5128205128204 98 | ], 99 | [ 100 | 191.02564102564102, 101 | 670.5128205128204 102 | ], 103 | [ 104 | 275.64102564102564, 105 | 667.9487179487179 106 | ], 107 | [ 108 | 338.46153846153845, 109 | 664.1025641025641 110 | ], 111 | [ 112 | 347.4358974358974, 113 | 658.974358974359 114 | ], 115 | [ 116 | 321.79487179487177, 117 | 653.8461538461538 118 | ], 119 | [ 120 | 274.35897435897436, 121 | 648.7179487179487 122 | ], 123 | [ 124 | 370.5128205128205, 125 | 644.8717948717948 126 | ], 127 | [ 128 | 380.7692307692308, 129 | 638.4615384615385 130 | ], 131 | [ 132 | 334.6153846153846, 133 | 633.3333333333333 134 | ], 135 | [ 136 | 382.05128205128204, 137 | 632.051282051282 138 | ], 139 | [ 140 | 393.5897435897436, 141 | 628.2051282051282 142 | ], 143 | [ 144 | 373.07692307692304, 145 | 621.7948717948718 146 | ], 147 | [ 148 | 397.4358974358974, 149 | 621.7948717948718 150 | ], 151 | [ 152 | 394.87179487179486, 153 | 617.9487179487179 154 | ], 155 | [ 156 | 388.46153846153845, 157 | 615.3846153846154 158 | ], 159 | [ 160 | 403.8461538461538, 161 | 614.1025641025641 162 | ], 163 | [ 164 | 407.6923076923077, 165 | 611.5384615384615 166 | ], 167 | [ 168 | 396.15384615384613, 169 | 607.6923076923076 170 | ], 171 | [ 172 | 407.6923076923077, 173 | 606.4102564102564 174 | ], 175 | [ 176 | 379.48717948717945, 177 | 600.0 178 | ], 179 | [ 180 | 364.1025641025641, 181 | 596.1538461538462 182 | ], 183 | [ 184 | 350.0, 185 | 593.5897435897435 186 | ], 187 | [ 188 | 339.7435897435897, 189 | 591.025641025641 190 | ], 191 | [ 192 | 330.7692307692308, 193 | 587.1794871794872 194 | ], 195 | [ 196 | 302.56410256410254, 197 | 584.6153846153846 198 | ], 199 | [ 200 | 287.1794871794872, 201 | 583.3333333333333 202 | ], 203 | [ 204 | 266.6666666666667, 205 | 580.7692307692307 206 | ], 207 | [ 208 | 243.5897435897436, 209 | 576.9230769230769 210 | ], 211 | [ 212 | 244.87179487179486, 213 | 570.5128205128204 214 | ], 215 | [ 216 | 260.2564102564102, 217 | 569.2307692307692 218 | ], 219 | [ 220 | 296.15384615384613, 221 | 570.5128205128204 222 | ], 223 | [ 224 | 323.07692307692304, 225 | 570.5128205128204 226 | ], 227 | [ 228 | 387.1794871794872, 229 | 570.5128205128204 230 | ], 231 | [ 232 | 864.1025641025641, 233 | 575.6410256410256 234 | ], 235 | [ 236 | 1241.0256410256409, 237 | 582.051282051282 238 | ], 239 | [ 240 | 1599.0, 241 | 592.3076923076923 242 | ], 243 | [ 244 | 1599.0, 245 | 401.28205128205127 246 | ], 247 | [ 248 | 814.1025641025641, 249 | 474.35897435897436 250 | ], 251 | [ 252 | 456.4102564102564, 253 | 488.46153846153845 254 | ], 255 | [ 256 | 461.53846153846155, 257 | 469.2307692307692 258 | ], 259 | [ 260 | 284.6153846153846, 261 | 483.3333333333333 262 | ], 263 | [ 264 | 283.3333333333333, 265 | 506.4102564102564 266 | ], 267 | [ 268 | 280.7692307692308, 269 | 515.3846153846154 270 | ], 271 | [ 272 | 264.1025641025641, 273 | 520.5128205128204 274 | ], 275 | [ 276 | 221.7948717948718, 277 | 520.5128205128204 278 | ], 279 | [ 280 | 130.76923076923077, 281 | 521.7948717948718 282 | ], 283 | [ 284 | 53.84615384615385, 285 | 511.53846153846155 286 | ], 287 | [ 288 | 0.0, 289 | 507.6923076923077 290 | ] 291 | ], 292 | "user": "pablo", 293 | "verified": 0 294 | } 295 | ] 296 | } -------------------------------------------------------------------------------- /data/roborace750_mockup/leftImg8bit/test/berlin/berlin_00125_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/leftImg8bit/test/berlin/berlin_00125_leftImg8bit.png -------------------------------------------------------------------------------- /data/roborace750_mockup/leftImg8bit/test/berlin/berlin_00126_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/leftImg8bit/test/berlin/berlin_00126_leftImg8bit.png -------------------------------------------------------------------------------- /data/roborace750_mockup/leftImg8bit/train/montreal/montreal_00000_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/leftImg8bit/train/montreal/montreal_00000_leftImg8bit.png -------------------------------------------------------------------------------- /data/roborace750_mockup/leftImg8bit/train/montreal/montreal_00001_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/leftImg8bit/train/montreal/montreal_00001_leftImg8bit.png -------------------------------------------------------------------------------- /data/roborace750_mockup/leftImg8bit/train/montreal/montreal_00002_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/leftImg8bit/train/montreal/montreal_00002_leftImg8bit.png -------------------------------------------------------------------------------- /data/roborace750_mockup/leftImg8bit/train/newyork/newyork_00000_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/leftImg8bit/train/newyork/newyork_00000_leftImg8bit.png -------------------------------------------------------------------------------- /data/roborace750_mockup/leftImg8bit/train/newyork/newyork_00001_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/leftImg8bit/train/newyork/newyork_00001_leftImg8bit.png -------------------------------------------------------------------------------- /data/roborace750_mockup/leftImg8bit/train/newyork/newyork_00002_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/leftImg8bit/train/newyork/newyork_00002_leftImg8bit.png -------------------------------------------------------------------------------- /data/roborace750_mockup/leftImg8bit/val/berlin/berlin_00000_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/leftImg8bit/val/berlin/berlin_00000_leftImg8bit.png -------------------------------------------------------------------------------- /data/roborace750_mockup/leftImg8bit/val/berlin/berlin_00001_leftImg8bit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/roborace750_mockup/leftImg8bit/val/berlin/berlin_00001_leftImg8bit.png -------------------------------------------------------------------------------- /data/test_images_munich/test_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/test_images_munich/test_1.png -------------------------------------------------------------------------------- /data/test_images_munich/test_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/test_images_munich/test_2.png -------------------------------------------------------------------------------- /data/test_images_munich/test_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/test_images_munich/test_3.png -------------------------------------------------------------------------------- /data/test_images_munich/test_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/test_images_munich/test_4.png -------------------------------------------------------------------------------- /data/test_images_munich/test_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/data/test_images_munich/test_5.png -------------------------------------------------------------------------------- /fcn8s/fcn.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2017-2018 Udacity, Inc 4 | # Copyright (c) Modifications 2018, 2019 Pablo R. Palafox (pablo.rodriguez-palafox [at] tum.de) 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # of the Software, and to permit persons to whom the Software is furnished to do 11 | # so, subject to the following conditions: 12 | 13 | # The above copyright notice and this permission notice shall be included in all 14 | # copies or substantial portions of the Software. 15 | 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | 24 | ''' 25 | FCN-8s that trains on the Cityscapes set (or datset with similar structure) 26 | ''' 27 | 28 | import matplotlib 29 | matplotlib.use('Agg') 30 | import math 31 | import time 32 | import os.path 33 | import tensorflow as tf 34 | import helper 35 | from tqdm import tqdm 36 | import shutil 37 | import matplotlib.pyplot as plt 38 | from matplotlib.ticker import MaxNLocator 39 | from glob import glob 40 | import re 41 | import numpy as np 42 | import scipy.misc 43 | from datetime import datetime 44 | import csv 45 | import argparse 46 | 47 | 48 | class FCN(object): 49 | 50 | ''' 51 | Constructor for setting params 52 | ''' 53 | def __init__(self, params): 54 | for p in params: 55 | setattr(self, p, params[p]) 56 | 57 | # Check for compatibility, data set and conditionally download VGG16 model 58 | helper.check_compatibility() 59 | helper.maybe_download_pretrained_vgg(self.data_dir) 60 | 61 | # Define static project constants 62 | self.vgg_path = os.path.join(self.data_dir, 'vgg') 63 | 64 | self.train_gt_dir = os.path.join(self.data_dir, self.dataset, self.train_gt_subdir) 65 | self.train_imgs_dir = os.path.join(self.data_dir, self.dataset, self.train_imgs_subdir) 66 | 67 | self.val_gt_dir = os.path.join(self.data_dir, self.dataset, self.val_gt_subdir) 68 | self.val_imgs_dir = os.path.join(self.data_dir, self.dataset, self.val_imgs_subdir) 69 | 70 | self.test_gt_dir = os.path.join(self.data_dir, self.dataset, self.test_gt_subdir) 71 | self.test_imgs_dir = os.path.join(self.data_dir, self.dataset, self.test_imgs_subdir) 72 | 73 | # Define the batching function 74 | self.get_batches_fn = helper.gen_batch_function(self.train_gt_dir, self.train_imgs_dir, 75 | self.val_gt_dir, self.val_imgs_dir, 76 | self.test_gt_dir, self.test_imgs_dir, 77 | self.image_shape, self.dataset) 78 | 79 | ''' 80 | Load the VGG16 model 81 | ''' 82 | def load_vgg(self, sess): 83 | 84 | # Load the saved model 85 | tf.saved_model.loader.load(sess, ['vgg16'], self.vgg_path) 86 | 87 | # Get the relevant layers for constructing the skip-layers out of the graph 88 | graph = tf.get_default_graph() 89 | image_input = graph.get_tensor_by_name('image_input:0') 90 | keep_prob = graph.get_tensor_by_name('keep_prob:0') 91 | l3 = graph.get_tensor_by_name('layer3_out:0') 92 | l4 = graph.get_tensor_by_name('layer4_out:0') 93 | l7 = graph.get_tensor_by_name('layer7_out:0') 94 | 95 | return image_input, keep_prob, l3, l4, l7 96 | 97 | ''' 98 | Restore model and retrieve pertinent tensors 99 | ''' 100 | def restore_model(self, sess): 101 | 102 | print("Restoring saved model...") 103 | model_var_dir = '{}/{}/variables'.format(self.model_dir, self.model) 104 | 105 | model_meta_file = model_var_dir + '/saved_model.meta' 106 | new_saver = tf.train.import_meta_graph(model_meta_file) 107 | new_saver.restore(sess, tf.train.latest_checkpoint(model_var_dir)) 108 | 109 | all_vars = tf.get_collection('vars') 110 | for v in all_vars: 111 | v_ = sess.run(v) 112 | print(v_) 113 | 114 | graph = tf.get_default_graph() 115 | keep_prob = graph.get_tensor_by_name('keep_prob:0') 116 | image_input = graph.get_tensor_by_name('image_input:0') 117 | logits = graph.get_tensor_by_name('logits:0') 118 | 119 | # For computing IoU metric 120 | correct_label = tf.placeholder(dtype = tf.float32, shape = (None, None, None, self.num_classes)) 121 | predictions_argmax = graph.get_tensor_by_name('predictions_argmax:0') 122 | 123 | # Define iou metric operation 124 | labels_argmax = tf.argmax(correct_label, axis=-1, output_type=tf.int64) 125 | iou, iou_op = tf.metrics.mean_iou(labels_argmax, 126 | predictions_argmax, 127 | self.num_classes) 128 | sess.run(tf.local_variables_initializer()) 129 | print("Model successfully restored") 130 | 131 | return iou, iou_op, image_input, correct_label, keep_prob, logits 132 | 133 | ''' 134 | Save the model 135 | ''' 136 | def save_model(self, sess): 137 | 138 | # Create model dir if it doesn't exist 139 | model_var_dir = os.path.join(self.model_dir, 140 | self.model, 141 | 'variables') 142 | if os.path.exists(model_var_dir): 143 | shutil.rmtree(model_var_dir) 144 | os.makedirs(model_var_dir) 145 | 146 | # Create a Saver object 147 | saver = tf.train.Saver() 148 | 149 | print("Saving model to: {}".format(model_var_dir)) 150 | saver.save(sess, model_var_dir + '/saved_model') 151 | tf.train.write_graph(sess.graph_def, 152 | os.path.join(self.model_dir, 153 | self.model), 154 | 'saved_model.pb', False) 155 | 156 | ''' 157 | Define the layers 158 | ''' 159 | def layers(self, vgg_layer3_out, vgg_layer4_out, vgg_layer7_out, num_classes): 160 | 161 | kernel_initializer = tf.truncated_normal_initializer(stddev = 0.01) 162 | weights_regularized_l2 = 1e-3 163 | 164 | # We generate the 1x1 convolutions of layers 3, 4 and 7 of the VGG model 165 | conv_1x1_of_7 = tf.layers.conv2d(inputs=vgg_layer7_out, 166 | filters=num_classes, 167 | kernel_size=(1,1), strides=(1,1), 168 | kernel_initializer=kernel_initializer, 169 | kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) 170 | 171 | conv_1x1_of_4 = tf.layers.conv2d(inputs=vgg_layer4_out, 172 | filters=num_classes, 173 | kernel_size=(1,1), strides=(1,1), 174 | kernel_initializer=kernel_initializer, 175 | kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) 176 | 177 | conv_1x1_of_3 = tf.layers.conv2d(inputs=vgg_layer3_out, 178 | filters=num_classes, 179 | kernel_size=(1,1), 180 | strides=(1,1), 181 | kernel_initializer=kernel_initializer, 182 | kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) 183 | 184 | # Decoder, with upsampling and skipped connections 185 | # Upsampling conv_1x1_of_7 186 | deconv1 = tf.layers.conv2d_transpose(inputs=conv_1x1_of_7, 187 | filters=num_classes, 188 | kernel_size=(4,4), 189 | strides=(2,2), padding='same', 190 | kernel_initializer=kernel_initializer, 191 | kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) 192 | # Skip connections from VGG16 layer 4 193 | first_skip = tf.add(deconv1, conv_1x1_of_4) 194 | 195 | # Upsampling first_skip 196 | deconv2 = tf.layers.conv2d_transpose(inputs=first_skip, 197 | filters=num_classes, 198 | kernel_size=(4,4), 199 | strides=(2,2), 200 | padding='same', 201 | kernel_initializer=kernel_initializer, 202 | kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) 203 | # Skip connections from VGG16 layer 3 204 | second_skip = tf.add(deconv2, conv_1x1_of_3) 205 | 206 | # Upsampling l7_decoder 207 | deconv3 = tf.layers.conv2d_transpose(inputs=second_skip, 208 | filters=num_classes, 209 | kernel_size=(16,16), 210 | strides=(8,8), 211 | padding='same', 212 | kernel_initializer=kernel_initializer, 213 | kernel_regularizer=tf.contrib.layers.l2_regularizer(1e-3)) 214 | 215 | return deconv3 216 | 217 | 218 | def build_predictor(self, nn_last_layer): 219 | softmax_output = tf.nn.softmax(nn_last_layer) 220 | predictions_argmax = tf.argmax(softmax_output, 221 | axis=-1, 222 | output_type=tf.int64, 223 | name='predictions_argmax') 224 | return predictions_argmax 225 | 226 | 227 | def build_metrics(self, correct_label, predictions_argmax, num_classes): 228 | labels_argmax = tf.argmax(correct_label, 229 | axis=-1, 230 | output_type=tf.int64, 231 | name='labels_argmax') 232 | iou, iou_op = tf.metrics.mean_iou(labels_argmax, predictions_argmax, num_classes) 233 | return iou, iou_op 234 | 235 | ''' 236 | Optimizer based on cross entropy 237 | ''' 238 | def optimize_cross_entropy(self, nn_last_layer, correct_label, learning_rate, num_classes): 239 | 240 | # Reshape logits and label for computing cross entropy 241 | logits = tf.reshape(nn_last_layer, (-1, num_classes), name='logits') 242 | correct_label = tf.reshape(correct_label, (-1, num_classes), name='correct_label') 243 | 244 | # For computing accuracy on test set 245 | #acc, acc_op = tf.metrics.accuracy(labels=correct_label, predictions=logits) 246 | 247 | # Compute cross entropy and loss 248 | cross_entropy_logits = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=correct_label) 249 | cross_entropy_loss = tf.reduce_mean(cross_entropy_logits) 250 | 251 | # Define a training operation using the Adam optimizer (allows to have variable learning rate) 252 | train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy_loss) 253 | 254 | return logits, train_op, cross_entropy_loss 255 | 256 | 257 | ''' 258 | Define training op 259 | ''' 260 | def train_nn(self, sess, train_op, cross_entropy_loss, image_input, correct_label, keep_prob, learning_rate, iou, iou_op): 261 | 262 | print("\nStarting training with a learning_rate of: {}".format(self.learning_rate)) 263 | 264 | train_mean_losses_list = [] 265 | train_mean_ious_list = [] 266 | val_mean_losses_list = [] 267 | val_mean_ious_list = [] 268 | 269 | # Iterate over epochs 270 | for epoch in range(1, self.epochs+1): 271 | 272 | print("\nEpoch: {}/{}".format(epoch, self.epochs)) 273 | 274 | ########################################################################## 275 | print("\n## TRAINING of epoch {} ##".format(epoch)) 276 | 277 | ## TRAINING DATA ## 278 | # Iterate over batches of training data using the batch generation function 279 | train_losses = [] 280 | train_ious = [] 281 | train_batch = self.get_batches_fn(self.batch_size, mode='train') 282 | total_num_imgs = helper.get_num_imgs_in_folder(self.train_imgs_dir) 283 | train_size = math.ceil(total_num_imgs / self.batch_size) 284 | 285 | for i, d in tqdm(enumerate(train_batch), desc="Epoch {}: Train Batch".format(epoch), total=train_size): 286 | 287 | image, label = d 288 | 289 | # Create the feed dictionary 290 | feed_dict_train = { 291 | image_input : image, 292 | correct_label : label, 293 | keep_prob : self.dropout, 294 | learning_rate : self.learning_rate 295 | } 296 | 297 | # Create the feed dictionary 298 | feed_dict_train_iou = { 299 | image_input : image, 300 | correct_label : label, 301 | keep_prob : 1.0, 302 | } 303 | 304 | # Train and compute the loss of the current train BATCH 305 | #_, train_loss, _ = sess.run([train_op, cross_entropy_loss, iou_op], feed_dict=feed_dict_train) 306 | _, train_loss = sess.run([train_op, cross_entropy_loss], feed_dict=feed_dict_train) 307 | _ = sess.run([iou_op], feed_dict=feed_dict_train_iou) 308 | train_iou = sess.run(iou) 309 | 310 | print(' loss: {}'.format(train_loss)) 311 | print(' iou: {}'.format(train_iou)) 312 | train_losses.append(train_loss) 313 | train_ious.append(train_iou) 314 | 315 | ### LOSS ### 316 | # Compute the mean loss of the current EPOCH based on the losses from each batch 317 | train_mean_loss = sum(train_losses) / len(train_losses) 318 | print("TRAIN: mean loss of current epoch: {}".format(train_mean_loss)) 319 | # Append the mean loss of the current epoch to a list of mean losses of the whole training 320 | train_mean_losses_list.append(train_mean_loss) 321 | 322 | ### IOU ### 323 | # Compute the mean IoU of the current EPOCH based on the IoUs from each batch 324 | train_mean_iou = sum(train_ious) / len(train_ious) 325 | print("TRAIN: mean iou of current epoch: {}".format(train_mean_iou)) 326 | # Append the mean loss of the current epoch to a list of mean losses of the whole training 327 | train_mean_ious_list.append(train_mean_iou) 328 | 329 | ########################################################################## 330 | print("\n## VALIDATION of epoch {} ##".format(epoch)) 331 | ## VALIDATION DATA ## 332 | # Iterate over batches of training data using the batch generation function 333 | val_losses = [] 334 | val_ious = [] 335 | val_batch = self.get_batches_fn(self.batch_size, mode='val') 336 | total_num_imgs = helper.get_num_imgs_in_folder(self.val_imgs_dir) 337 | val_size = math.ceil(total_num_imgs / self.batch_size) 338 | 339 | for i, d in tqdm(enumerate(val_batch), desc="Epoch {}: Val Batch".format(epoch), total=val_size): 340 | 341 | image, label = d 342 | 343 | # Create the feed dictionary 344 | feed_dict_val = { 345 | image_input : image, 346 | correct_label : label, 347 | keep_prob : 1.0, 348 | } 349 | 350 | # Compute the loss of the current val BATCH 351 | val_loss, _ = sess.run([cross_entropy_loss, iou_op], feed_dict=feed_dict_val) 352 | val_iou = sess.run(iou) 353 | 354 | print(' loss: {}'.format(val_loss)) 355 | print(' iou: {}'.format(val_iou)) 356 | val_losses.append(val_loss) 357 | val_ious.append(val_iou) 358 | 359 | ### LOSS ### 360 | # Compute the mean loss of the current EPOCH based on the losses from each batch 361 | val_mean_loss = sum(val_losses) / len(val_losses) 362 | print("VAL: mean loss of current epoch: {}".format(val_mean_loss)) 363 | # Append the mean loss of the current epoch to a list of mean losses of the whole training 364 | val_mean_losses_list.append(val_mean_loss) 365 | 366 | ### IOU ### 367 | # Compute the mean IoU of the current EPOCH based on the IoUs from each batch 368 | val_mean_iou = sum(val_ious) / len(val_ious) 369 | print("VAL: mean iou of current epoch: {}".format(val_mean_iou)) 370 | # Append the mean loss of the current epoch to a list of mean losses of the whole training 371 | val_mean_ious_list.append(val_mean_iou) 372 | 373 | print("\nTraining completed") 374 | 375 | # Save logging info into images 376 | epochs_list = list(range(1, self.epochs+1)) 377 | self.logging('loss', train_mean_losses_list, val_mean_losses_list, epochs_list) 378 | self.logging('iou', train_mean_ious_list, val_mean_ious_list, epochs_list) 379 | 380 | 381 | ''' 382 | Apply inference over test set and obtain model accuracy 383 | ''' 384 | def inference(self, sess, iou, iou_op, image_input, correct_label, keep_prob, logits): 385 | 386 | # Make folder for current run if it doesn't exist 387 | time_str = datetime.now() 388 | time_str = "{}_{}_{} {}-{}".format(time_str.year, time_str.month, time_str.day, time_str.hour, time_str.minute) 389 | output_dir = os.path.join(self.runs_dir, 390 | self.model, 391 | time_str) 392 | if os.path.exists(output_dir): 393 | shutil.rmtree(output_dir) 394 | os.makedirs(output_dir) 395 | 396 | # Get ground truth and image file names 397 | gt_paths, imgs_paths = helper.get_files_paths(self.test_gt_dir, self.test_imgs_dir) 398 | both = zip(gt_paths, imgs_paths) 399 | test_size = len(gt_paths) 400 | 401 | times = [] 402 | test_ious = [] 403 | for gt_file, image_file in tqdm(both, desc="Test Batch", total=test_size): 404 | 405 | t_init = time.time() 406 | 407 | # Read Ground Truth and prepare it as a depth 3 image 408 | gt = scipy.misc.imresize(scipy.misc.imread(gt_file), self.image_shape) 409 | gt_image = helper.prepare_ground_truth(self.dataset, gt, self.num_classes, 'test') 410 | 411 | # Read the input image 412 | image = scipy.misc.imresize(scipy.misc.imread(image_file), self.image_shape) 413 | street_im = scipy.misc.toimage(image) 414 | 415 | ######################################### 416 | ## 1. Compute IoU of test set 417 | 418 | # Convert ground truth and image to (1, 256, 512, 3) format size 419 | gt_image = np.expand_dims(gt_image, axis=0) 420 | image = np.expand_dims(image, axis=0) 421 | 422 | # Create the feed dictionary 423 | feed_dict_test = { 424 | image_input : image, 425 | correct_label : gt_image, 426 | keep_prob : 1.0, 427 | } 428 | 429 | sess.run([iou_op], feed_dict=feed_dict_test) 430 | test_iou = sess.run(iou) 431 | test_ious.append(test_iou) 432 | 433 | ####################################### 434 | ## 2. Apply inference on test set 435 | # im_softmax is a matrix of heigh*width rows and 'num_classes' columns 436 | # which codifies the probability that a pixel belongs to a class 437 | im_softmax = sess.run( 438 | [tf.nn.softmax(logits)], 439 | {keep_prob: 1.0, image_input: image}) 440 | 441 | t1 = time.time() - t_init 442 | 443 | ### Road 444 | # 1. For the first class, we only select the values corresponding to the first column 445 | # We also convert the vector of pixels into a matrix 446 | im_softmax_r = im_softmax[0][:, 0].reshape(self.image_shape[0], self.image_shape[1]) 447 | # 2. We create a matrix of height*width*depth with boolean values (True or False) depending 448 | # on wheter the probability of belonging to class 'road' is higher than 0.5 449 | segmentation_r = (im_softmax_r > 0.5).reshape(self.image_shape[0], self.image_shape[1], 1) 450 | mask = np.dot(segmentation_r, np.array([[128, 64, 128, 64]])) 451 | mask = scipy.misc.toimage(mask, mode="RGBA") 452 | street_im.paste(mask, box=None, mask=mask) 453 | 454 | ### Fence 455 | im_softmax_r = im_softmax[0][:, 1].reshape(self.image_shape[0], self.image_shape[1]) 456 | segmentation_r = (im_softmax_r > 0.5).reshape(self.image_shape[0], self.image_shape[1], 1) 457 | mask = np.dot(segmentation_r, np.array([[190, 153, 153, 64]])) 458 | mask = scipy.misc.toimage(mask, mode="RGBA") 459 | street_im.paste(mask, box=None, mask=mask) 460 | 461 | t2 = time.time() - t_init 462 | 463 | t_both = "{} {}\n".format(t1, t2) 464 | times.append(t_both) 465 | 466 | # Save output image 467 | image_file = os.path.basename(image_file) 468 | output_path = os.path.join(output_dir, image_file) 469 | image = np.array(street_im) 470 | scipy.misc.imsave(output_path, image) 471 | 472 | ### Inference time log ### 473 | with open("times.txt", "w") as file: 474 | for pair in times: 475 | file.write(pair) 476 | 477 | ### IoU ### 478 | # Compute the mean IoU of the whole test set 479 | test_mean_iou = sum(test_ious) / len(test_ious) 480 | print("TEST: mean iou of test set: {}".format(test_mean_iou)) 481 | 482 | # Create txt file in which to store IoU of testing set 483 | metric_type = 'iou' 484 | metric_path = os.path.join(self.logging_dir, self.model, metric_type) 485 | if not os.path.exists(metric_path): 486 | print("Creating '{}' directory for storing {} info of Testing set".format(metric_path, metric_type)) 487 | os.makedirs(metric_path) 488 | metric_file_path = os.path.join(metric_path, "test_set_iou_{}.txt".format(time_str)) 489 | with open(metric_file_path, "w") as text_file: 490 | for iou in test_ious: 491 | text_file.write("{}\n".format(iou)) 492 | text_file.write("IoU metric of Testing set: {}".format(test_mean_iou)) 493 | 494 | ''' 495 | Plot loss vs epochs, iou vs epochs 496 | ''' 497 | def logging(self, metric_type, train_mean_metric_list, val_mean_metric_list, epochs_list): 498 | 499 | print("Plotting '{} vs epochs' and saving as image".format(metric_type)) 500 | 501 | # Create logging dir for metric if it doesn't exist already 502 | metric_path = os.path.join(self.logging_dir, self.model, metric_type) 503 | if not os.path.exists(metric_path): 504 | print("Creating '{}' directory for storing {} info of Training and Validation sets".format(metric_path, metric_type)) 505 | os.makedirs(metric_path) 506 | 507 | # Get time 508 | time = datetime.now() 509 | time = "{}_{}_{} {}-{}".format(time.year, time.month, time.day, time.hour, time.minute) 510 | 511 | # Save metric log into csv file 512 | csv_file = '{}_vs_epochs_{}.csv'.format(metric_type, time) 513 | csv_file_path = os.path.join(metric_path, csv_file) 514 | 515 | print("Saving '{} vs epochs' as a csv file into {} directory\n".format(metric_type, metric_path)) 516 | with open(csv_file_path, 'w', newline='') as csvfile: 517 | writer = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) 518 | writer.writerow(['Epoch'] + ['TRAIN_{}'.format(metric_type)] + ['VAL_{}'.format(metric_type)]) 519 | writer.writerows(zip(epochs_list, train_mean_metric_list, val_mean_metric_list)) 520 | 521 | # Plot metric and save into image 522 | ax = plt.figure().gca() 523 | ax.xaxis.set_major_locator(MaxNLocator(integer=True)) 524 | ax.plot(epochs_list, train_mean_metric_list, label='train', linestyle='--') 525 | ax.plot(epochs_list, val_mean_metric_list, label='val', linestyle='--') 526 | 527 | ax.legend() 528 | plt.xlabel('epochs') 529 | plt.ylabel(metric_type) 530 | 531 | log_file = '{}_vs_epochs_{}.png'.format(metric_type, time) 532 | log_file_path = os.path.join(metric_path, log_file) 533 | 534 | print("Plotting '{} vs epochs' and saving as image into {} directory\n".format(metric_type, metric_path)) 535 | plt.savefig(log_file_path) 536 | 537 | 538 | def train(self): 539 | ## CONFIGURATION FOR USING GPU ## 540 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 541 | config = tf.ConfigProto() 542 | config.gpu_options.allow_growth = True 543 | config.gpu_options.visible_device_list = "0" 544 | 545 | 546 | # TensorFlow session 547 | with tf.Session(config=config) as sess: 548 | 549 | tf.logging.set_verbosity(tf.logging.INFO) 550 | 551 | # Placeholders 552 | learning_rate = tf.placeholder(dtype = tf.float32) 553 | correct_label = tf.placeholder(dtype = tf.float32, shape = (None, None, None, self.num_classes)) 554 | 555 | # Define network and training operations 556 | image_input, keep_prob, l3, l4, l7 = self.load_vgg(sess) 557 | layer_output = self.layers(l3, l4, l7, self.num_classes) 558 | logits, train_op, cross_entropy_loss = self.optimize_cross_entropy(layer_output, correct_label, learning_rate, self.num_classes) 559 | 560 | predictions_argmax = self.build_predictor(layer_output) 561 | iou, iou_op = self.build_metrics(correct_label, 562 | predictions_argmax, 563 | self.num_classes) 564 | 565 | # Initialize variables 566 | sess.run(tf.global_variables_initializer()) 567 | sess.run(tf.local_variables_initializer()) 568 | 569 | # Train the model 570 | self.train_nn(sess, train_op, cross_entropy_loss, 571 | image_input, correct_label, keep_prob, learning_rate, 572 | iou, iou_op) 573 | 574 | # Do inference to compute IoU on test set and save the output images 575 | if self.inference_flag: 576 | self.inference(sess, iou, iou_op, image_input, 577 | correct_label, keep_prob, logits) 578 | 579 | # Save the model 580 | self.save_model(sess) 581 | 582 | 583 | def test(self): 584 | 585 | ## CONFIGURATION FOR USING GPU ## 586 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 587 | config = tf.ConfigProto() 588 | config.gpu_options.allow_growth = True 589 | config.gpu_options.visible_device_list = "0" 590 | 591 | # TensorFlow session 592 | with tf.Session(config=config) as sess: 593 | iou, iou_op, image_input, correct_label, keep_prob, logits = self.restore_model(sess) 594 | self.inference(sess, iou, iou_op, image_input, 595 | correct_label, keep_prob, logits) 596 | 597 | 598 | ''' 599 | Entry point 600 | ''' 601 | if __name__ == '__main__': 602 | parser = argparse.ArgumentParser(description="FCN-8s implementation.") 603 | 604 | parser.add_argument("--mode", type=str, help="Train or test mode.", 605 | default="train") 606 | 607 | parser.add_argument("--epochs", type=int, help="Number of epochs we want " 608 | "to train the network for.") 609 | 610 | parser.add_argument('--dataset', type=str, help='Name of dataset we want to ' 611 | 'train on, or where the test set we want to apply inference on resides.', 612 | required=True) 613 | 614 | parser.add_argument('--inference_flag', help='When set to true, applies inference' 615 | 'on the test set of the dataset on which we have just trained.', 616 | action='store_true') 617 | 618 | parser.add_argument("--learning_rate", type=float, help="Learning rate.", 619 | default=0.00001) 620 | 621 | parser.add_argument("--dropout", type=float, help="Dropout rate.", 622 | default=0.5) 623 | 624 | parser.add_argument('--batch_size', type=str, help='Batch size', default=1) 625 | 626 | parser.add_argument('--num_classes', type=int, help='Number of target classes', 627 | default=3) 628 | 629 | parser.add_argument('--image_shape', help='Image shape (width, height)', 630 | default=(256, 512)) 631 | 632 | parser.add_argument('--runs_dir', type=str, help='Directory in which to save ' 633 | 'inference output.', 634 | default='runs') 635 | 636 | parser.add_argument('--data_dir', type=str, help='Directory where our datasets ' 637 | 'reside.', 638 | default='../data') 639 | 640 | parser.add_argument('--train_gt_subdir', type=str, default='gtFine/train') 641 | 642 | parser.add_argument('--train_imgs_subdir', type=str, default='leftImg8bit/train') 643 | 644 | parser.add_argument('--val_gt_subdir', type=str, default='gtFine/val') 645 | 646 | parser.add_argument('--val_imgs_subdir', type=str, default='leftImg8bit/val') 647 | 648 | parser.add_argument('--test_gt_subdir', type=str, default='gtFine/test') 649 | 650 | parser.add_argument('--test_imgs_subdir', type=str, default='leftImg8bit/test') 651 | 652 | parser.add_argument('--model_dir', type=str, default='../models/sem_seg') 653 | 654 | parser.add_argument('--logging_dir', type=str, default='log') 655 | 656 | args = parser.parse_args() 657 | 658 | # Get the name of the model, either the model to be created or the model 659 | # to be used for inference 660 | if args.mode == 'train': 661 | if args.epochs is None: 662 | parser.error("train mode requires --epochs.") 663 | model = '{}-Epochs-{}'.format(args.epochs, args.dataset) 664 | elif args.mode == 'test': 665 | model = '' 666 | while len(model) is 0: 667 | model = input("Enter the name of the model you want to use " 668 | "in the format '-Epochs-' \n--> ") 669 | args.model = model 670 | 671 | # Convert the arguments into a dictionary for later usage within the class init function 672 | args_dict = vars(args) 673 | 674 | # Create an FCN object 675 | fcn = FCN(args_dict) 676 | 677 | if fcn.mode == 'train': 678 | fcn.train() 679 | elif fcn.mode == 'test': 680 | fcn.test() 681 | 682 | -------------------------------------------------------------------------------- /fcn8s/helper.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2017-2018 Udacity, Inc 4 | # Copyright (c) Modifications 2018, 2019 Pablo R. Palafox (pablo.rodriguez-palafox [at] tum.de) 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # of the Software, and to permit persons to whom the Software is furnished to do 11 | # so, subject to the following conditions: 12 | 13 | # The above copyright notice and this permission notice shall be included in all 14 | # copies or substantial portions of the Software. 15 | 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 | # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | import re 24 | import random 25 | import numpy as np 26 | import os.path 27 | import scipy.misc 28 | import shutil 29 | import zipfile 30 | import time 31 | import tensorflow as tf 32 | import cv2 33 | from glob import glob 34 | from urllib.request import urlretrieve 35 | from distutils.version import LooseVersion 36 | from tqdm import tqdm 37 | import matplotlib.pyplot as plt 38 | from matplotlib.ticker import MaxNLocator 39 | from datetime import datetime 40 | 41 | 42 | class DLProgress(tqdm): 43 | last_block = 0 44 | def hook(self, block_num=1, block_size=1, total_size=None): 45 | self.total = total_size 46 | self.update((block_num - self.last_block) * block_size) 47 | self.last_block = block_num 48 | 49 | 50 | def check_compatibility(): 51 | assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer. You are using {}'.format(tf.__version__) 52 | print('TensorFlow Version: {}'.format(tf.__version__)) 53 | 54 | if not tf.test.gpu_device_name(): 55 | print('No GPU found. Please use a GPU to train your neural network.') 56 | else: 57 | print('Default GPU Device: {}'.format(tf.test.gpu_device_name())) 58 | 59 | 60 | def maybe_download_pretrained_vgg(data_dir): 61 | """ 62 | Download and extract pretrained vgg model if it doesn't exist 63 | :param data_dir: Directory to download the model to 64 | """ 65 | vgg_filename = 'vgg.zip' 66 | vgg_path = os.path.join(data_dir, 'vgg') 67 | vgg_files = [ 68 | os.path.join(vgg_path, 'variables/variables.data-00000-of-00001'), 69 | os.path.join(vgg_path, 'variables/variables.index'), 70 | os.path.join(vgg_path, 'saved_model.pb')] 71 | 72 | missing_vgg_files = [vgg_file for vgg_file in vgg_files if not os.path.exists(vgg_file)] 73 | if missing_vgg_files: 74 | # Clean vgg dir 75 | if os.path.exists(vgg_path): 76 | shutil.rmtree(vgg_path) 77 | os.makedirs(vgg_path) 78 | 79 | # Download vgg 80 | print('Downloading pre-trained vgg model...') 81 | with DLProgress(unit='B', unit_scale=True, miniters=1) as pbar: 82 | urlretrieve( 83 | 'https://s3-us-west-1.amazonaws.com/udacity-selfdrivingcar/vgg.zip', 84 | os.path.join(vgg_path, vgg_filename), 85 | pbar.hook) 86 | 87 | # Extract vgg 88 | print('Extracting model...') 89 | zip_ref = zipfile.ZipFile(os.path.join(vgg_path, vgg_filename), 'r') 90 | zip_ref.extractall(data_dir) 91 | zip_ref.close() 92 | 93 | # Remove zip file to save space 94 | os.remove(os.path.join(vgg_path, vgg_filename)) 95 | 96 | 97 | def img_size(img): 98 | return (img.shape[0], img.shape[1]) 99 | 100 | 101 | def random_crop(img, gt): 102 | h,w = img_size(img) 103 | nw = random.randint(768, w-2) # Random crop size 104 | nh = int(nw / 2) # Keep original aspect ration 105 | x1 = random.randint(0, w - nw) # Random position of crop 106 | y1 = random.randint(0, h - nh) 107 | return img[y1:(y1+nh), x1:(x1+nw), :], gt[y1:(y1+nh), x1:(x1+nw)] 108 | 109 | 110 | def bc_img(img, s = 1.0, m = 0.0): 111 | img = img.astype(np.int) 112 | img = img * s + m 113 | img[img > 255] = 255 114 | img[img < 0] = 0 115 | img = img.astype(np.uint8) 116 | return img 117 | 118 | 119 | def get_files_paths(gt_dir, imgs_dir): 120 | """ 121 | Get training data filenames 122 | """ 123 | cities = os.listdir(imgs_dir) 124 | gt = [] 125 | imgs = [] 126 | for city in cities: 127 | new_gt_path = os.path.join(gt_dir, city) 128 | new_imgs_path = os.path.join(imgs_dir, city) 129 | gt += glob(os.path.join(new_gt_path, "*_gtFine_labelIds.png")) 130 | imgs += glob(os.path.join(new_imgs_path, "*.png")) 131 | gt.sort() 132 | imgs.sort() 133 | return gt, imgs 134 | 135 | 136 | def get_num_imgs_in_folder(imgs_dir): 137 | """ 138 | Sum the number of images contained in each city 139 | """ 140 | cities = os.listdir(imgs_dir) 141 | num_imgs = 0 142 | for city in cities: 143 | city_path = os.path.join(imgs_dir, city) 144 | num_imgs += len(os.listdir(city_path)) 145 | 146 | return num_imgs 147 | 148 | 149 | def prepare_ground_truth(dataset, img, num_classes, mode='train'): 150 | """ 151 | Prepare ground truth for cityscape data 152 | """ 153 | new_image = np.zeros((img.shape[0], img.shape[1], num_classes)) 154 | 155 | # road 156 | road_mask = img == 7 157 | 158 | # Depending on the dataset, the ``fence_mask`` will be generated differently 159 | if dataset[0:4] == 'city': 160 | if mode == 'train': 161 | # construction[building, wall, fence, guard_rail, bridge, tunnel] 162 | fence_mask = np.logical_or.reduce((img == 11, img == 12, img == 13, 163 | img == 14, img == 15, img == 16)) 164 | elif mode == 'test': 165 | fence_mask = img == 13 166 | 167 | elif dataset[0:4] == 'robo': 168 | fence_mask = img == 13 169 | 170 | # everything else 171 | else_mask = np.logical_not(np.logical_or.reduce((road_mask, fence_mask))) 172 | 173 | new_image[:,:,0] = road_mask 174 | new_image[:,:,1] = fence_mask 175 | new_image[:,:,2] = else_mask 176 | 177 | return new_image.astype(np.float32) 178 | 179 | 180 | def gen_batch_function(train_gt_dir, train_imgs_dir, 181 | val_gt_dir, val_imgs_dir, 182 | test_gt_dir, test_imgs_dir, 183 | image_shape, dataset): 184 | """ 185 | Generate function to create batches of training data 186 | """ 187 | def get_batches_fn(batch_size=1, mode='train', num_classes=3, print_flag=False): 188 | """ 189 | Create batches of training data 190 | :param batch_size: Batch Size 191 | :return: Batches of training data 192 | """ 193 | if mode == 'train': 194 | 195 | # Get only the path of the imgs. Ground truth images' paths will be obtained later 196 | _, imgs_paths = get_files_paths(train_gt_dir, train_imgs_dir) 197 | 198 | #background_color = np.array([255, 0, 0]) 199 | #road_color = np.array([128, 64, 128, 255]) 200 | #car_color = np.array([0, 0, 142, 255]) 201 | 202 | random.shuffle(imgs_paths) 203 | 204 | for batch_i in range(0, len(imgs_paths), batch_size): 205 | 206 | images = [] 207 | gt_images = [] 208 | 209 | for image_file in imgs_paths[batch_i:batch_i+batch_size]: 210 | 211 | # Get gt_image_file by first finding the city name and then renaming the basename of image_file 212 | city = os.path.basename(image_file).partition("_")[0] 213 | gt_type = 'gtFine_labelIds.png' 214 | gt_image_file = os.path.join(train_gt_dir, city, os.path.basename(image_file)[:-15]+gt_type) 215 | 216 | # Read images and groundtruth images 217 | image = scipy.misc.imread(image_file) 218 | gt_image = scipy.misc.imread(gt_image_file) 219 | 220 | # Show images and gt_images as they are 221 | if print_flag: 222 | plt.figure(figsize=(16, 8)) 223 | plt.subplot(2,2,1) 224 | plt.imshow(image) 225 | plt.subplot(2,2,2) 226 | plt.imshow(gt_image) 227 | 228 | ##################################################### 229 | # AUGMENTATION # 230 | #Random crop augmentation 231 | image, gt_image = random_crop(image, gt_image) 232 | image = scipy.misc.imresize(image, image_shape) 233 | gt_image = scipy.misc.imresize(gt_image, image_shape) 234 | 235 | # Contrast augmentation 236 | contr = random.uniform(0.85, 1.15) 237 | # Brightness augmentation 238 | bright = random.randint(-40, 30) 239 | image = bc_img(image, contr, bright) 240 | ##################################################### 241 | 242 | ##################################################### 243 | # PREPARE GROUND TRUTH 244 | gt_image = prepare_ground_truth(dataset, gt_image, num_classes) 245 | ##################################################### 246 | 247 | images.append(image) 248 | gt_images.append(gt_image) 249 | 250 | if print_flag: 251 | plt.subplot(2,2,3) 252 | plt.imshow(image) 253 | plt.subplot(2,2,4) 254 | gt_image = scipy.misc.imresize(gt_image, image_shape) 255 | plt.imshow(gt_image) 256 | plt.show() 257 | 258 | 259 | yield np.array(images), np.array(gt_images) 260 | 261 | 262 | elif mode == 'val': 263 | 264 | _, imgs_paths = get_files_paths(val_gt_dir, val_imgs_dir) 265 | 266 | #background_color = np.array([255, 0, 0]) 267 | #road_color = np.array([128, 64, 128, 255]) 268 | #car_color = np.array([0, 0, 142, 255]) 269 | 270 | random.shuffle(imgs_paths) 271 | 272 | for batch_i in range(0, len(imgs_paths), batch_size): 273 | 274 | images = [] 275 | gt_images = [] 276 | 277 | for image_file in imgs_paths[batch_i:batch_i+batch_size]: 278 | 279 | # Get gt_image_file by first finding the city name and then renaming the basename of image_file 280 | city = os.path.basename(image_file).partition("_")[0] 281 | gt_type = 'gtFine_labelIds.png' 282 | gt_image_file = os.path.join(val_gt_dir, city, os.path.basename(image_file)[:-15]+gt_type) 283 | 284 | # Read images and groundtruth images 285 | image = scipy.misc.imresize(scipy.misc.imread(image_file), image_shape) 286 | gt_image = scipy.misc.imresize(scipy.misc.imread(gt_image_file), image_shape) 287 | 288 | # Show images and gt_images as they are 289 | if print_flag: 290 | plt.figure(figsize=(16, 8)) 291 | plt.subplot(2,2,1) 292 | plt.imshow(image) 293 | plt.subplot(2,2,2) 294 | plt.imshow(gt_image) 295 | 296 | ##################################################### 297 | # PREPARE GROUND TRUTH 298 | gt_image = prepare_ground_truth(dataset, gt_image, num_classes) 299 | ##################################################### 300 | 301 | images.append(image) 302 | gt_images.append(gt_image) 303 | 304 | if print_flag: 305 | plt.subplot(2,2,3) 306 | plt.imshow(image) 307 | plt.subplot(2,2,4) 308 | gt_image = scipy.misc.imresize(gt_image, image_shape) 309 | plt.imshow(gt_image) 310 | plt.show() 311 | 312 | yield np.array(images), np.array(gt_images) 313 | 314 | return get_batches_fn -------------------------------------------------------------------------------- /fcn8s/segment_video_robo.py: -------------------------------------------------------------------------------- 1 | # This file is licensed under a GPLv3 License. 2 | # 3 | # GPLv3 License 4 | # Copyright (C) 2018-2019 Pablo R. Palafox (pablo.rodriguez-palafox@tum.de) 5 | # 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | 19 | import numpy as np 20 | import scipy.misc 21 | import tensorflow as tf 22 | from tqdm import tqdm 23 | from moviepy.editor import * 24 | import os.path 25 | from glob import glob 26 | import sys 27 | import time 28 | 29 | from load_graph import load_graph 30 | 31 | ''' 32 | city = input("Enter the name of the CITY (lowercase string) whose video you want to segment (e.g. montreal): ") 33 | epochs = int(input("Enter the number of EPOCHS (integer) on which the model you want to use was trained: ")) 34 | dataset = input("Enter the DATASET on which the model you want to use was trained: (e.g roborace350) ") 35 | seconds = int(input("Enter the number of SECONDS (integer) you want to segment from the video: ")) 36 | ''' 37 | 38 | 39 | city = "montreal" 40 | epochs = "100" 41 | dataset = "roborace350" 42 | seconds = 10 43 | 44 | 45 | class SegmentVideo(object): 46 | 47 | ''' 48 | Constructor with param setting 49 | ''' 50 | def __init__(self, params): 51 | for p in params: 52 | setattr(self, p, params[p]) 53 | 54 | 55 | ''' 56 | Segments the image 57 | ''' 58 | def segment_frame(self, frame): 59 | 60 | start_time = time.time() 61 | 62 | frame = scipy.misc.imresize(frame, self.image_shape) 63 | street_im = scipy.misc.toimage(frame) 64 | 65 | 66 | 67 | #config = tf.ConfigProto() 68 | #jit_level = tf.OptimizerOptions.ON_1 69 | #config.graph_options.optimizer_options.global_jit_level = jit_level 70 | with tf.Session(graph=self.graph) as sess: 71 | 72 | feed_dict = { 73 | self.keep_prob: 1.0, 74 | self.input_image: [frame] 75 | } 76 | 77 | im_softmax = sess.run( 78 | [tf.nn.softmax(self.logits)], 79 | feed_dict=feed_dict) 80 | 81 | 82 | ''' 83 | feed_dict = { 84 | self.keep_prob: 1.0, 85 | self.input_image: [frame] 86 | } 87 | im_softmax = self.sess.run( 88 | [tf.nn.softmax(self.logits)], 89 | feed_dict=feed_dict) 90 | ''' 91 | 92 | 93 | # Road 94 | im_softmax_r = im_softmax[0][:, 0].reshape(self.image_shape[0], self.image_shape[1]) 95 | segmentation_r = (im_softmax_r > 0.5).reshape(self.image_shape[0], self.image_shape[1], 1) 96 | mask = np.dot(segmentation_r, np.array([[50, 200, 50, 64]])) 97 | mask = scipy.misc.toimage(mask, mode="RGBA") 98 | street_im.paste(mask, box=None, mask=mask) 99 | 100 | # Fence 101 | im_softmax_r = im_softmax[0][:, 1].reshape(self.image_shape[0], self.image_shape[1]) 102 | segmentation_r = (im_softmax_r > 0.5).reshape(self.image_shape[0], self.image_shape[1], 1) 103 | mask = np.dot(segmentation_r, np.array([[255, 0, 0, 64]])) 104 | mask = scipy.misc.toimage(mask, mode="RGBA") 105 | street_im.paste(mask, box=None, mask=mask) 106 | 107 | print(time.time() - start_time) 108 | 109 | return np.array(street_im) 110 | 111 | 112 | ''' 113 | Main processing loop 114 | ''' 115 | def process_video(self): 116 | print("Applying inference to input video") 117 | 118 | # new_frames = [] 119 | # video = VideoFileClip(self.input_video) 120 | # for frame in video.iter_frames(): 121 | # new_frame = self.segment_image(frame) 122 | # new_frames.append(new_frame) 123 | # print(len(new_frames)) 124 | # new_video = ImageSequenceClip(new_frames, fps=video.fps) 125 | # new_video.write_videofile(self.output_video, audio=False) 126 | 127 | if not os.path.exists(self.output_path): 128 | print("Creating directory for storing video") 129 | os.makedirs(self.output_path) 130 | self.output_video = os.path.join(self.output_path, self.output_video) 131 | 132 | clip = VideoFileClip(self.input_video).subclip(0,seconds) 133 | new_clip = clip.fl_image(self.segment_frame) 134 | 135 | new_clip.write_videofile(self.output_video, audio=False) 136 | 137 | 138 | 139 | ''' 140 | Restore model and retrieve pertinent tensors 141 | ''' 142 | def restore_model(self): 143 | print("Restoring saved model...") 144 | 145 | ''' 146 | # 1 147 | self.sess = tf.Session() 148 | 149 | model_meta_file = self.model_var_dir + '/saved_model.meta' 150 | 151 | new_saver = tf.train.import_meta_graph(model_meta_file) 152 | new_saver.restore(self.sess, tf.train.latest_checkpoint(self.model_var_dir)) 153 | 154 | all_vars = tf.get_collection('vars') 155 | for v in all_vars: 156 | v_ = sess.run(v) 157 | print(v_) 158 | 159 | graph = tf.get_default_graph() 160 | self.keep_prob = graph.get_tensor_by_name('keep_prob:0') 161 | self.input_image = graph.get_tensor_by_name('image_input:0') 162 | self.logits = graph.get_tensor_by_name('logits:0') 163 | ''' 164 | 165 | 166 | # 2 167 | graph_filename = "models/100-Epochs-roborace350/optimized_graph.pb" 168 | graph, ops = load_graph(graph_filename, True) 169 | self.keep_prob = graph.get_tensor_by_name('keep_prob:0') 170 | self.input_image = graph.get_tensor_by_name('image_input:0') 171 | self.logits = graph.get_tensor_by_name('logits:0') 172 | self.graph = graph 173 | 174 | 175 | 176 | print("Model successfully restored") 177 | 178 | 179 | ''' 180 | Run the segmentation 181 | ''' 182 | def run(self): 183 | self.restore_model() 184 | self.process_video() 185 | 186 | 187 | 188 | ''' 189 | Entry point 190 | ''' 191 | if __name__=='__main__': 192 | 193 | params = { 194 | 'input_video': 'videos/complete_1_{}.mp4'.format(city), 195 | 'output_path': 'videos/results/{}-Epochs-{}'.format(epochs, dataset), 196 | 'output_video': 'segmented_{}seconds_{}.mp4'.format(seconds, city), 197 | 'model_var_dir': 'models/{}-Epochs-{}/variables'.format(epochs, dataset), 198 | 'image_shape': (256, 512) 199 | } 200 | 201 | sv = SegmentVideo(params) 202 | sv.run() 203 | 204 | -------------------------------------------------------------------------------- /models/get_monodepth_model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | model_name=$1 4 | output_location=$2 5 | 6 | if [ ! -d $output_location ]; then 7 | echo "output_location does not exist. Creating it..." 8 | mkdir -p $output_location 9 | fi 10 | 11 | filename=$model_name.zip 12 | 13 | url=http://visual.cs.ucl.ac.uk/pubs/monoDepth/models/$filename 14 | 15 | output_file=$output_location/$filename 16 | 17 | echo "Downloading $model_name" 18 | wget -nc $url -O $output_file 19 | unzip $output_file -d $output_location 20 | rm $output_file -------------------------------------------------------------------------------- /models/get_sem_seg_models.md: -------------------------------------------------------------------------------- 1 | You can find our pretrained models [here](https://drive.google.com/drive/folders/1Ahs_sQMG7KWhFZMcKECbSymM999WzHd3?usp=sharing) 2 | 3 | Create a folder named `sem_seg` withing this `models` folder and put them inside. 4 | -------------------------------------------------------------------------------- /models/stuttgart_video/README.md: -------------------------------------------------------------------------------- 1 | ## Put here the sequence of images from the Stuttgart sequence from the Cityscapes dataset 2 | 3 | https://www.cityscapes-dataset.com/login/ 4 | 5 | You'll need to create an account. 6 | -------------------------------------------------------------------------------- /monodepth_lib/README.md: -------------------------------------------------------------------------------- 1 | ## We will need some scripts from the the [MonoDepth's repository](https://github.com/mrharicot/monodepth), namely: 2 | 3 | * **average_gradients.py** 4 | * **bilinear_sampler.py** 5 | * **monodepth_dataloader.py** 6 | * **monodepth_model.py** 7 | 8 | Get those and put a copy inside the folder [monodepth_lib](monodepth_lib), that is, in the folder where this README.md file is now. -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.7.0 2 | astor==0.7.1 3 | certifi==2018.11.29 4 | chardet==3.0.4 5 | cycler==0.10.0 6 | decorator==4.3.2 7 | gast==0.2.2 8 | grpcio==1.19.0 9 | h5py==2.9.0 10 | idna==2.8 11 | imageio==2.4.1 12 | imageio-ffmpeg==0.2.0 13 | Keras-Applications==1.0.7 14 | Keras-Preprocessing==1.0.9 15 | kiwisolver==1.0.1 16 | Markdown==3.0.1 17 | matplotlib==3.0.3 18 | mock==2.0.0 19 | moviepy==1.0.0 20 | numpy==1.16.2 21 | opencv-contrib-python==4.0.0.21 22 | pbr==5.1.3 23 | Pillow>=7.1.0 24 | pkg-resources==0.0.0 25 | proglog==0.1.9 26 | protobuf==3.7.0 27 | pyparsing==2.3.1 28 | python-dateutil==2.8.0 29 | requests==2.21.0 30 | scipy==1.2.1 31 | six==1.12.0 32 | tensorboard==1.13.0 33 | tensorflow==1.15.2 34 | tensorflow-estimator==1.13.0 35 | tensorflow-gpu==1.15.2 36 | termcolor==1.1.0 37 | tqdm==4.31.1 38 | urllib3==1.24.2 39 | Werkzeug==0.15.3 40 | -------------------------------------------------------------------------------- /semantic_depth_cityscapes_sequence.py: -------------------------------------------------------------------------------- 1 | # This file is licensed under a GPLv3 License. 2 | # 3 | # GPLv3 License 4 | # Copyright (C) 2018-2019 Pablo R. Palafox (pablo.palafox@tum.de) 5 | # 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | 19 | 20 | ''' 21 | Roborace Vision Pipeline 22 | 23 | 1. Read frame from cam 24 | 2. Segment frame and generate: 25 | -> FENCE mask 26 | -> ROAD mask 27 | 3. Produce disparity map by using monodepth network 28 | 4. Generate 3D Point Cloud from disparity map 29 | 5. Apply masks to 3D Point Cloud and obtain: 30 | -> road3D Point Cloud 31 | -> fence3D Point Cloud 32 | 6. Compute: 33 | a) 1. width of road at every depth 34 | b) 1. Fit plane to road 35 | 2. Fit planes to fences (there can be 1, 2 or 3 fence objects visible) 36 | 3. intersections -> obtain lane borders 37 | 4. Compute distance between lane borders 38 | ''' 39 | 40 | from __future__ import absolute_import, division, print_function 41 | 42 | # only keep warnings and errors 43 | import os 44 | os.environ['TF_CPP_MIN_LOG_LEVEL']='0' 45 | 46 | import imageio 47 | 48 | imageio.plugins.ffmpeg.download() 49 | 50 | import os 51 | import numpy as np 52 | import argparse 53 | import re 54 | import time 55 | import tensorflow as tf 56 | import tensorflow.contrib.slim as slim 57 | import scipy.misc 58 | import matplotlib.pyplot as plt 59 | from moviepy.editor import * 60 | import cv2 61 | import glob 62 | 63 | #---- 64 | from monodepth_lib.monodepth_model import * 65 | from monodepth_lib.monodepth_dataloader import * 66 | from monodepth_lib.average_gradients import * 67 | #---- 68 | 69 | from semantic_depth_lib.point_cloud_2_ply import PointCloud2Ply 70 | import semantic_depth_lib.pcl as pcl 71 | 72 | from open3d import * 73 | 74 | def display_inlier_outlier(cloud, ind): 75 | inlier_cloud = select_down_sample(cloud, ind) 76 | outlier_cloud = select_down_sample(cloud, ind, invert=True) 77 | 78 | print("Showing outliers (red) and inliers (gray): ") 79 | outlier_cloud.paint_uniform_color([1, 0, 0]) 80 | inlier_cloud.paint_uniform_color([0.8, 0.8, 0.8]) 81 | draw_geometries([inlier_cloud, outlier_cloud]) 82 | 83 | def render_plys(pcd, png_file): 84 | 85 | vis = Visualizer() 86 | vis.create_window() 87 | ctr = vis.get_view_control() 88 | 89 | param = read_pinhole_camera_parameters("intrinsics_rendering.json") 90 | 91 | vis.add_geometry(pcd) 92 | ctr.convert_from_pinhole_camera_parameters(param) 93 | 94 | 95 | image = vis.capture_screen_float_buffer(True) 96 | plt.imsave(png_file, np.asarray(image), dpi = 1) 97 | 98 | vis.destroy_window() 99 | 100 | ''' 101 | Class for processing frames 102 | ''' 103 | class FrameProcessor(): 104 | 105 | disp_multiplier = 3800 # PARTICULAR FOR CITYSAPES (width of original images in dataset) 106 | 107 | def __init__(self, frame_segmenter, frame_depther, input_shape, approach, depth, 108 | verbose): 109 | 110 | self.frame_segmenter = frame_segmenter 111 | self.frame_depther = frame_depther 112 | self.input_shape = input_shape 113 | self.approach = approach 114 | self.depth = depth 115 | self.verbose = verbose 116 | 117 | def process_frame(self, input_frame, output_name, 118 | result_images_dir, result_ply_dir, rendered_ply_dir): 119 | 120 | print("\n\nPROCESSING NEW FRAME! \n") 121 | 122 | # Read frame from its path and store its shape 123 | original_frame = cv2.imread(input_frame) 124 | original_shape = original_frame.shape 125 | h = original_shape[0] 126 | w = original_shape[1] 127 | # Resize the frame to the shape the monodepth network requireds 128 | #frame = scipy.misc.imresize(original_frame, self.input_shape, interp='lanczos') 129 | frame = cv2.resize(original_frame, (self.input_shape[1], self.input_shape[0]), 130 | interpolation = cv2.INTER_CUBIC) 131 | ########################################################################## 132 | # 1. SEGMENTATION and MASKS 133 | print("\nSegmenting frame...") 134 | road_mask, fence_mask, segmented_frame = self.frame_segmenter.segment_frame(frame) 135 | road_mask = road_mask.squeeze() # Remove 3rd-dimension 136 | fence_mask = fence_mask.squeeze() # Remove 3rd-dimension 137 | # segmented_frame = cv2.cvtColor(segmented_frame, cv2.COLOR_BGR2RGB) 138 | 139 | ########################################################################## 140 | # 2. DISPARITY MAP 141 | print("\nComputing frame's disparity map...") 142 | disparity = self.frame_depther.compute_disparity(frame) 143 | # Disparities in monodepth are normalized, so we need to scale them by 144 | # the full resolution width (2048 for Cityscapes) 145 | #disparity = disparity * original_shape[1] 146 | disparity = disparity * self.disp_multiplier 147 | 148 | ########################################################################## 149 | # 3. 3D POINTS: Get 3D points from disparity map and create corresponding 150 | # color's array 151 | print("\nConverting disparity map to 3D Point Cloud...") 152 | points3D = self.frame_depther.compute_3D_points(disparity) 153 | colors = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 154 | 155 | # point_cloud = PointCloud2Ply(points3D, colors, '{}_raw'.format(output_name)) 156 | # point_cloud.prepare_and_save_point_cloud() 157 | 158 | 159 | ########################################################################## 160 | # 4. MASKED IMAGES: Convert RGB image to GRAY and apply masks to obtain 161 | # gray scale images with either a road or a fence on them 162 | # gray_frame = cv2.cvtColor(colors, cv2.COLOR_RGB2GRAY) 163 | # road_image = np.multiply(gray_frame, road_mask) 164 | # fence_image = np.multiply(gray_frame, fence_mask) 165 | 166 | ########################################################################## 167 | # 5. Apply masks to the whole 3D points matrix (to colors as well) 168 | # to only get road or fence 3D points 169 | #: ROAD 170 | road3D = points3D[road_mask] 171 | road_colors = colors[road_mask] 172 | #: FENCE 173 | fence3D = points3D[fence_mask] 174 | fence_colors = colors[fence_mask] 175 | 176 | ########################################################################## 177 | # 6. Remove noise and fit planes 178 | # Remove noise from road 3D point cloud: 179 | # Compute Median Absolute Deviation along 'z' axis in the ROAD Point Cloud 180 | road3D, road_colors = pcl.remove_from_to(road3D, road_colors, 2, 0.0, 7.0) 181 | 182 | # Compute Median Absolute Deviation along 'y' axis in the ROAD Point Cloud 183 | road3D, road_colors = pcl.remove_noise_by_mad(road3D, road_colors, 1, 15.0) 184 | 185 | # Compute Median Absolute Deviation along 'x' axis in the ROAD Point Cloud 186 | road3D, road_colors = pcl.remove_noise_by_mad(road3D, road_colors, 0, 2.0) 187 | 188 | # Find best fitting plane and remove all points too far away from this plane 189 | (road3D, road_colors, road_plane3D, road_colors_plane, 190 | road_plane_coeff) = pcl.remove_noise_by_fitting_plane(road3D, road_colors, 191 | axis=1, 192 | threshold=5.0, 193 | plane_color=[200, 200, 200]) 194 | 195 | # read into open3d 196 | road3D_pcd = PointCloud() 197 | road3D_pcd.points = Vector3dVector(road3D) 198 | road3D_pcd.colors = Vector3dVector(road_colors) 199 | # write_point_cloud("test_road.ply", road3D_pcd) 200 | 201 | # remove some more outliers 202 | print("Statistical oulier removal") 203 | cl,ind = statistical_outlier_removal(road3D_pcd, 204 | nb_neighbors=10, std_ratio=0.5) 205 | inlier_cloud = select_down_sample(road3D_pcd, ind) 206 | 207 | #inlier_cloud.paint_uniform_color([0.8, 0.8, 0.8]) 208 | #draw_geometries([inlier_cloud]) 209 | 210 | print("Radius oulier removal") 211 | cl,ind = radius_outlier_removal(inlier_cloud, 212 | nb_points=80, radius=0.5) 213 | 214 | inlier_cloud = select_down_sample(inlier_cloud, ind) 215 | 216 | #inlier_cloud.paint_uniform_color([0.8, 0.8, 0.8]) 217 | #draw_geometries([inlier_cloud]) 218 | 219 | #display_inlier_outlier(inlier_cloud, ind) 220 | 221 | # go back to numpy array 222 | road3D = np.asarray(inlier_cloud.points) 223 | road_colors = np.asarray(inlier_cloud.colors) 224 | 225 | 226 | ################################################################################# 227 | ####################### rw APPROACH ########################################## 228 | # Get 3D points that define a horizontal line at a certain depth 229 | left_pt_rw, right_pt_rw = pcl.get_end_points_of_road(road3D, 230 | self.depth-0.02) 231 | 232 | line_found = False 233 | if left_pt_rw is not None and right_pt_rw is not None: 234 | line_found = True 235 | # np.savez('{}_nai.npz'.format(self.output_name), 236 | # left_pt_rw=left_pt_rw, right_pt_rw=right_pt_rw) 237 | #dist_rw = pcl.compute_distance_in_3D(left_pt_rw, right_pt_rw) 238 | dist_rw = abs(left_pt_rw[0][0] - right_pt_rw[0][0]) 239 | if self.verbose: 240 | print("Road width", dist_rw) 241 | line_rw, colors_line_rw = pcl.create_3Dline_from_3Dpoints(left_pt_rw, 242 | right_pt_rw, 243 | [250,0,0]) 244 | 245 | if self.approach == 'both': 246 | tic_fences = time.time() 247 | ########################################################################## 248 | # 6.B Remove noise from fence 3D point cloud: 249 | # 0. Separate into LEFT and RIGHT fence 250 | # 0.1 But before, remove outliers that go to infinity upwards 251 | fence3D, fence_colors = pcl.remove_noise_by_mad(fence3D, fence_colors, 252 | 1, 5.0) 253 | # 0.2 Then, remove all points whose 'z' (2) value is greater than 254 | # a certain value (we set it to 30.0) 255 | fence3D, fence_colors = pcl.threshold_complete(fence3D, fence_colors, 256 | 2, 35.0) 257 | # 0.3 Separate into LEFT and RIGHT fences 258 | (fence3D_left, fence_left_colors, 259 | fence3D_right, fence_right_colors) = pcl.extract_pcls(fence3D, fence_colors) 260 | 261 | #### -- LEFT FENCE 262 | # 1. Compute Median Absolute Deviation along 'x' axis in the LEFT FENCE Point Cloud 263 | fence3D_left, fence_left_colors = pcl.remove_noise_by_mad(fence3D_left, fence_left_colors, 0, 5.0) 264 | # 2. Find best fitting plane and remove all points too far away from this plane 265 | 266 | (fence3D_left, fence_left_colors, fence_left_plane3D, fence_left_colors_plane, 267 | fence_left_plane_coeff) = pcl.remove_noise_by_fitting_plane(fence3D_left, fence_left_colors, 268 | axis=0, 269 | threshold=1.0, 270 | plane_color=[40, 70, 40]) 271 | 272 | #### -- RIGHT FENCE 273 | # 1. Compute Median Absolute Deviation along 'x' axis in the RIGHT FENCE Point Cloud 274 | fence3D_right, fence_right_colors = pcl.remove_noise_by_mad(fence3D_right, fence_right_colors, 0, 1.0) 275 | # 2. Find best fitting plane and remove all points too far away from this plane 276 | 277 | (fence3D_right, fence_right_colors, fence_right_plane3D, fence_right_colors_plane, 278 | fence_right_plane_coeff) = pcl.remove_noise_by_fitting_plane(fence3D_right, fence_right_colors, 279 | axis=0, 280 | threshold=1.0, 281 | plane_color=[40, 70, 40]) 282 | 283 | #################################################################################### 284 | ############################ f2f APPROACH ########################################## 285 | ######## ROAD-LEFT_FENCE intersection at a certain depth ########################### 286 | left_pt_f2f = pcl.planes_intersection_at_certain_depth(road_plane_coeff, 287 | fence_left_plane_coeff, 288 | z=self.depth) 289 | 290 | right_pt_f2f = pcl.planes_intersection_at_certain_depth(road_plane_coeff, 291 | fence_right_plane_coeff, 292 | z=self.depth) 293 | dist_f2f = pcl.compute_distance_in_3D(left_pt_f2f, right_pt_f2f) 294 | if self.verbose: 295 | print("Distance from fence to fence:", dist_f2f) 296 | line_f2f, colors_line_f2f = pcl.create_3Dline_from_3Dpoints(left_pt_f2f, 297 | right_pt_f2f, 298 | [0,255,0]) 299 | 300 | if self.verbose: 301 | print("\nf2f time: ", time_f2f) 302 | 303 | ########################################################################## 304 | # 9. Draw letters in the image 305 | self.segmented_frame = cv2.resize(segmented_frame, (w, h), interpolation = cv2.INTER_CUBIC) 306 | 307 | thickness = 2 308 | fontScale = 2 309 | 310 | if line_found: 311 | cv2.rectangle(self.segmented_frame,(0,0),(w, int(0.25*h)),(156, 157, 159), -1) 312 | cv2.putText(self.segmented_frame, 'At {:.2f} m depth:'.format(self.depth), 313 | (int(0.36*w), int(0.05*h)), 314 | fontFace = 16, fontScale = fontScale+0.2, color=(255,255,255), thickness = thickness) 315 | 316 | cv2.putText(self.segmented_frame, '{:.2f}m to road\'s left end'.format(-left_pt_rw[0][0]), 317 | (int(0.05*w), int(0.13*h)), 318 | fontFace = 16, fontScale = fontScale, color=(255,255,255), thickness = thickness) 319 | cv2.putText(self.segmented_frame, '{:.2f}m to road\'s right end'.format(right_pt_rw[0][0]), 320 | (int(0.5*w), int(0.13*h)), 321 | fontFace = 16, fontScale = fontScale, color=(255,255,255), thickness = thickness) 322 | cv2.putText(self.segmented_frame, 'Road\'s width: {:.2f} m'.format(dist_rw), 323 | (int(0.35*w), int(0.22*h)), 324 | fontFace = 16, fontScale = fontScale, color=(255,255,255), thickness = thickness) 325 | else: 326 | cv2.putText(self.segmented_frame, 'Cannot compute width of road at {:.2f} m depth:'.format(self.depth), 327 | (int(0.28*w), int(0.035*h)), 328 | fontFace = 16, fontScale = fontScale+0.2, color=(0,255,0), thickness = thickness) 329 | 330 | ########################################################################## 331 | # 8.A Project the 3D points that define the line to the image plane 332 | #self.print_line_on_image(left_pt_rw, right_pt_rw, (0,0,255)) 333 | #self.print_line_on_image(left_pt_f2f, right_pt_f2f, (0,255,0)) 334 | ########################################################################## 335 | # 10. Save image 336 | cv2.imwrite('{}/{}.png'.format(result_images_dir, output_name), self.segmented_frame) 337 | 338 | ###################################################### 339 | # 98. Save Point Cloud to ply file to check results 340 | # For ROAD 341 | #point_cloud = PointCloud2Ply(road3D, road_colors, self.output_name) 342 | #point_cloud.add_extra_point_cloud(road_plane3D, road_colors_plane) 343 | 344 | """ 345 | # For FENCEs and ROAD (f2f approach + rw approach) 346 | point_cloud = PointCloud2Ply(fence3D_left, fence_left_colors, '{}/{}_rw'.format(result_ply_dir, output_name)) 347 | point_cloud.add_extra_point_cloud(fence_left_plane3D, fence_left_colors_plane) 348 | point_cloud.add_extra_point_cloud(fence3D_right, fence_right_colors) 349 | point_cloud.add_extra_point_cloud(fence_right_plane3D, fence_right_colors_plane) 350 | point_cloud.add_extra_point_cloud(road3D, road_colors) 351 | point_cloud.add_extra_point_cloud(road_plane3D, road_colors_plane) 352 | point_cloud.add_extra_point_cloud(line_f2f, colors_line_f2f) 353 | point_cloud.add_extra_point_cloud(line_rw, colors_line_rw) 354 | point_cloud.prepare_and_save_point_cloud() 355 | """ 356 | 357 | # For FENCEs and ROAD (rw approach) 358 | point_cloud = PointCloud2Ply(road3D, road_colors, '{}/{}_rw'.format(result_ply_dir, output_name)) 359 | if line_found: 360 | point_cloud.add_extra_point_cloud(line_rw, colors_line_rw) 361 | point_cloud.prepare_and_save_point_cloud() 362 | 363 | 364 | # render pointcloud using open3d 365 | # road3D_pcd = PointCloud() 366 | # road3D_pcd.points = Vector3dVector(point_cloud.points3D) 367 | # road3D_pcd.colors = Vector3dVector(point_cloud.colors) 368 | # draw_geometries([road3D_pcd]) 369 | # render_plys(road3D_pcd, '{}/{}_rw.png'.format(rendered_ply_dir, output_name)) 370 | 371 | """ 372 | # For ALL with rw Approach 373 | point_cloud = PointCloud2Ply(points3D, colors, '{}/{}_rw'.format(result_ply_dir, output_name)) 374 | point_cloud.add_extra_point_cloud(line_rw, colors_line_rw) 375 | point_cloud.prepare_and_save_point_cloud() 376 | """ 377 | 378 | class SegmentFrame(): 379 | def __init__(self, input_shape, model_var_dir, use_frozen, use_xla, CUDA_DEVICE_NUMBER): 380 | self.input_shape = input_shape 381 | self.model_var_dir = model_var_dir 382 | self.CUDA_DEVICE_NUMBER = CUDA_DEVICE_NUMBER 383 | 384 | self.restore_model(use_frozen, use_xla) 385 | 386 | 387 | def load_graph(self, graph_file, use_xla): 388 | 389 | jit_level = 0 390 | config = tf.ConfigProto() 391 | 392 | if use_xla: 393 | 394 | jit_level = tf.OptimizerOptions.ON_1 395 | config.graph_options.optimizer_options.global_jit_level = jit_level 396 | 397 | with tf.Session(graph=tf.Graph(), config=config) as sess: 398 | 399 | gd = tf.GraphDef() 400 | 401 | with tf.gfile.Open(graph_file, 'rb') as f: 402 | data = f.read() 403 | gd.ParseFromString(data) 404 | 405 | tf.import_graph_def(gd, name='') 406 | 407 | ops = sess.graph.get_operations() 408 | n_ops = len(ops) 409 | 410 | return sess, ops 411 | 412 | 413 | def restore_model(self, use_frozen=True, use_xla=False): 414 | 415 | if use_frozen: 416 | print("\n\nRestoring (frozen) segmentation model...") 417 | 418 | graph_file = '{}/optimized_graph.pb'.format(self.model_var_dir) 419 | sess, _ = self.load_graph(graph_file, use_xla) 420 | 421 | self.sess = sess 422 | graph = self.sess.graph 423 | 424 | self.keep_prob = graph.get_tensor_by_name('keep_prob:0') 425 | self.input_image = graph.get_tensor_by_name('image_input:0') 426 | self.logits = graph.get_tensor_by_name('logits:0') 427 | 428 | print("Segmentation model successfully restored!") 429 | 430 | else: 431 | print("\n\nRestoring segmentation model...") 432 | 433 | os.environ["CUDA_VISIBLE_DEVICES"]=self.CUDA_DEVICE_NUMBER 434 | config = tf.ConfigProto() 435 | config.gpu_options.allow_growth = True 436 | config.gpu_options.visible_device_list = "0" 437 | 438 | #config = tf.ConfigProto(allow_soft_placement=True) 439 | 440 | self.sess = tf.Session(config=config) 441 | 442 | model_meta_file = "{}/variables/saved_model.meta".format(self.model_var_dir) 443 | #print(model_meta_file) 444 | 445 | new_saver = tf.train.import_meta_graph(model_meta_file) 446 | new_saver.restore(self.sess, tf.train.latest_checkpoint(self.model_var_dir+"/variables")) 447 | 448 | graph = tf.get_default_graph() 449 | 450 | self.keep_prob = graph.get_tensor_by_name('keep_prob:0') 451 | self.input_image = graph.get_tensor_by_name('image_input:0') 452 | self.logits = graph.get_tensor_by_name('logits:0') 453 | 454 | self.sess.run(tf.local_variables_initializer()) 455 | 456 | print("Segmentation model successfully restored!") 457 | 458 | 459 | def segment_frame(self, frame): 460 | 461 | # Note that the frame has already been resized by this time 462 | # to the ``input_shape`` dimensions 463 | street_im = scipy.misc.toimage(frame) 464 | 465 | im_softmax = self.sess.run( 466 | [tf.nn.softmax(self.logits)], 467 | {self.keep_prob: 1.0, self.input_image: [frame]}) 468 | 469 | # Road 470 | im_softmax_road = im_softmax[0][:, 0].reshape(self.input_shape[0], self.input_shape[1]) 471 | segmentation_road = (im_softmax_road > 0.5).reshape(self.input_shape[0], self.input_shape[1], 1) 472 | road_mask = np.dot(segmentation_road, np.array([[128, 64, 128, 64]])) 473 | road_mask = scipy.misc.toimage(road_mask, mode="RGBA") 474 | #scipy.misc.imsave('road.png', road_mask) 475 | street_im.paste(road_mask, box=None, mask=road_mask) 476 | 477 | # Fence 478 | im_softmax_fence = im_softmax[0][:, 1].reshape(self.input_shape[0], self.input_shape[1]) 479 | segmentation_fence = (im_softmax_fence > 0.5).reshape(self.input_shape[0], self.input_shape[1], 1) 480 | fence_mask = np.dot(segmentation_fence, np.array([[190, 153, 153, 64]])) 481 | fence_mask = scipy.misc.toimage(fence_mask, mode="RGBA") 482 | #scipy.misc.imsave('fence.png', fence_mask) 483 | street_im.paste(fence_mask, box=None, mask=fence_mask) 484 | 485 | return segmentation_road, segmentation_fence, np.array(street_im) 486 | 487 | 488 | class DepthFrame(): 489 | def __init__(self, encoder='vgg', input_height=256, input_width=512, 490 | checkpoint_path='models/monodepth/model_cityscapes/model_cityscapes'): 491 | 492 | 493 | self.encoder = encoder 494 | self.input_height = input_height 495 | self.input_width = input_width 496 | self.checkpoint_path = checkpoint_path 497 | 498 | # CITYSCAPES INTRINSIC PARAMS # 499 | 500 | cx = 1048.64 / 4 501 | cy = 519.277 / 4 502 | b = 1.0 # found empirically 503 | f = 500 # found empirically 504 | 505 | self.Q = np.float32([[1, 0, 0, -cx], 506 | [0,-1, 0, cy], # turn points 180 deg around x-axis, 507 | [0, 0, 0, -f], # so that y-axis looks up 508 | [0, 0, 1/b, 0]]) 509 | 510 | self.params = monodepth_parameters( 511 | encoder=self.encoder, 512 | height=self.input_height, 513 | width=self.input_width, 514 | batch_size=2, 515 | num_threads=1, 516 | num_epochs=1, 517 | do_stereo=False, 518 | wrap_mode="border", 519 | use_deconv=False, 520 | alpha_image_loss=0, 521 | disp_gradient_loss_weight=0, 522 | lr_loss_weight=0, 523 | full_summary=False) 524 | 525 | self.restore_model() 526 | 527 | 528 | def restore_model(self): 529 | print("\n\nRestoring monodepth model...") 530 | 531 | self.graph_depth = tf.Graph() 532 | with self.graph_depth.as_default(): 533 | 534 | self.left = tf.placeholder(tf.float32, [2, self.input_height, self.input_width, 3]) 535 | self.model = MonodepthModel(self.params, "test", self.left, None) 536 | 537 | # SESSION 538 | config = tf.ConfigProto(allow_soft_placement=True) 539 | self.sess = tf.Session(config=config) 540 | 541 | # SAVER 542 | train_saver = tf.train.Saver() 543 | 544 | # INIT 545 | self.sess.run(tf.global_variables_initializer()) 546 | self.sess.run(tf.local_variables_initializer()) 547 | coordinator = tf.train.Coordinator() 548 | threads = tf.train.start_queue_runners(sess=self.sess, coord=coordinator) 549 | 550 | # RESTORE 551 | restore_path = self.checkpoint_path 552 | train_saver.restore(self.sess, restore_path) 553 | 554 | print("Monodepth model successfully restored!") 555 | 556 | 557 | def post_processing(self, disp): 558 | _, h, w = disp.shape 559 | l_disp = disp[0,:,:] 560 | r_disp = np.fliplr(disp[1,:,:]) 561 | m_disp = 0.5 * (l_disp + r_disp) 562 | l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h)) 563 | l_mask = 1.0 - np.clip(20 * (l - 0.05), 0, 1) 564 | r_mask = np.fliplr(l_mask) 565 | return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp 566 | 567 | 568 | def compute_disparity(self, frame): 569 | 570 | # Note that the frame has already been resized by this time 571 | # to the ``input_shape`` dimensions 572 | frame = frame.astype(np.float32) / 255 573 | input_frames = np.stack((frame, np.fliplr(frame)), 0) 574 | 575 | with self.graph_depth.as_default(): 576 | disp = self.sess.run(self.model.disp_left_est[0], feed_dict={self.left: input_frames}) 577 | disp_pp = self.post_processing(disp.squeeze()).astype(np.float32) 578 | 579 | return disp_pp 580 | 581 | 582 | def disp_to_image(self, disp_pp, output_name, original_height, original_width): 583 | disp_to_img = scipy.misc.imresize(disp_pp.squeeze(), [original_height, original_width]) 584 | plt.imsave("{}_disp.png".format(output_name), disp_to_img, cmap='gray') # cmap='plasma' 585 | 586 | 587 | def compute_3D_points(self, disp): 588 | points3D = cv2.reprojectImageTo3D(disp, self.Q) 589 | return points3D 590 | 591 | 592 | def main(): 593 | 594 | parser = argparse.ArgumentParser(description="Read frame and " 595 | "compute the distance from the center " 596 | "of the car to the fences.") 597 | 598 | parser.add_argument("--input_folder", help="Path to folder where the input images are.", 599 | default="data/stuttgart_video_test/*.png") 600 | 601 | parser.add_argument("--semantic_model", help="Path to semantic segmentation model.", 602 | default="models/sem_seg/30-Epochs-cityscapes") 603 | 604 | parser.add_argument("--monodepth_checkpoint", help="Path to monodepthcheckpoint.", 605 | default="models/monodepth/model_cityscapes/model_cityscapes") 606 | 607 | parser.add_argument('--monodepth_encoder', type=str, 608 | help='type of encoder, vgg or resnet50', default='vgg') 609 | 610 | parser.add_argument('--input_height', type=int, 611 | help='input height', 612 | default=256) 613 | 614 | parser.add_argument('--input_width', type=int, 615 | help='input width', 616 | default=512) 617 | 618 | parser.add_argument('--approach', type=str, 619 | help='approach for measuring road width', 620 | default='rw') 621 | 622 | parser.add_argument('--use_frozen', 623 | help='If set, uses frozen model', 624 | action='store_true') 625 | 626 | parser.add_argument('--use_xla', 627 | help='If set, uses xla', 628 | action='store_true') 629 | 630 | parser.add_argument('--CUDA_DEVICE_NUMBER', 631 | help='Number of GPU device to use (e.g., 0, 1, 2, ...)', 632 | default="0") 633 | 634 | parser.add_argument('--depth', type=float, 635 | help='depth at which to compute road\'s width', 636 | default=10) 637 | 638 | parser.add_argument('--verbose', 639 | help='If set, prints info', 640 | action='store_true') 641 | 642 | args = parser.parse_args() 643 | 644 | # Input size 645 | input_shape = (args.input_height, args.input_width) 646 | 647 | # Create a DepthFrame object 648 | frame_depther = DepthFrame(args.monodepth_encoder, 649 | args.input_height, 650 | args.input_width, 651 | args.monodepth_checkpoint) 652 | 653 | # # Create a SegmentFrame object 654 | frame_segmenter = SegmentFrame(input_shape, args.semantic_model, 655 | args.use_frozen, args.use_xla, 656 | args.CUDA_DEVICE_NUMBER) 657 | 658 | # Create a FrameProcessor object 659 | frame_processor = FrameProcessor(frame_segmenter, 660 | frame_depther, 661 | input_shape, 662 | args.approach, 663 | args.depth, 664 | args.verbose) 665 | 666 | 667 | 668 | # Create output frame path 669 | output_directory = "results/stuttgart_video" 670 | result_images_dir = os.path.join(output_directory, 671 | 'result_sequence_imgs') 672 | result_ply_dir = os.path.join(output_directory, 'result_sequence_ply') 673 | rendered_ply_dir = os.path.join(output_directory, 'rendered_sequence') 674 | 675 | if not os.path.exists(result_images_dir): 676 | print("Creating directory for storing result frame") 677 | os.makedirs(result_images_dir) 678 | 679 | if not os.path.exists(result_ply_dir): 680 | print("Creating directory for storing result ply") 681 | os.makedirs(result_ply_dir) 682 | 683 | if not os.path.exists(rendered_ply_dir): 684 | print("Creating directory for storing rendered ply") 685 | os.makedirs(rendered_ply_dir) 686 | 687 | 688 | # Process input frames 689 | for input_frame in sorted(glob.glob(args.input_folder)): 690 | 691 | 692 | # if input_frame != "data/stuttgart_video_test/stuttgart_02_000000_005176_leftImg8bit.png": 693 | # continue 694 | 695 | print("Processing", input_frame) 696 | 697 | 698 | output_name = os.path.basename(input_frame) 699 | output_name = os.path.splitext(output_name)[0] 700 | frame_processor.process_frame(input_frame, output_name, 701 | result_images_dir, result_ply_dir, rendered_ply_dir) 702 | 703 | 704 | if __name__ == "__main__": 705 | main() -------------------------------------------------------------------------------- /semantic_depth_lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pablopalafox/semantic-depth/c55b4f6822f7ee7cc22b3a7f052de5204e3a037e/semantic_depth_lib/__init__.py -------------------------------------------------------------------------------- /semantic_depth_lib/pcl.py: -------------------------------------------------------------------------------- 1 | # This file is licensed under a GPLv3 License. 2 | # 3 | # GPLv3 License 4 | # Copyright (C) 2018-2019 Pablo R. Palafox (pablo.rodriguez-palafox@tum.de) 5 | # 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | 19 | 20 | ''' 21 | Hand-made Point Cloud Library to deal with the most basic operations 22 | one could want to apply to a 3D Point Cloud 23 | ''' 24 | 25 | from __future__ import absolute_import, division, print_function 26 | import numpy as np 27 | import scipy.linalg 28 | 29 | 30 | def remove_from_to(points3D, colors, axis, from_meter, to_meter): 31 | 32 | inliers_indices = [] 33 | min_value_in_axis = min(points3D[:,axis]) 34 | 35 | for p in range(points3D.shape[0]): 36 | if (points3D[p][axis]) < -to_meter: 37 | inliers_indices.append(p) 38 | 39 | 40 | points3D = points3D[inliers_indices] 41 | colors = colors[inliers_indices] 42 | 43 | return points3D, colors 44 | 45 | 46 | def remove_noise_by_mad(points3D, colors, axis, threshold=15.0): 47 | ''' 48 | The criteria is to apply Median Absolute Deviation in one of the 49 | dimensions of the point cloud 50 | ``axis``: x (0), y(1), z(2) 51 | ''' 52 | 53 | # print("\nRemoving noise from the axis '{}' by applying MAD...".format(axis)) 54 | 55 | # First, we'll get the ``axis`` coordinates of the 3D Point Cloud we want to denoise 56 | points3D_axis = points3D[:, axis] 57 | #print(points3D_axis) 58 | # We compute the Median Absolute Deviation of the set of 'y' coordinates 59 | abs_diffs, mad_axis = mad(points3D_axis) 60 | #print(abs_diffs) 61 | #print(mad_axis) 62 | # We compute the penalty of each element 63 | penalty = 0.6745 * abs_diffs / mad_axis 64 | #print(penalty) 65 | # Now, we get the indices of the points whose 'y' has a penalty lower than 66 | # ``threshold``, that is, the indices of the inliers 67 | inliers_indices = np.where(penalty < threshold) 68 | #print("Indices of inliers", inliers_indices) 69 | # Finally, remove noisy points from original 3D Point Cloud (``poins3D``) 70 | # and remove also corresponding data from the colors array 71 | points3D = points3D[inliers_indices] 72 | colors = colors[inliers_indices] 73 | return points3D, colors 74 | 75 | 76 | def mad(points1D): 77 | ''' Computes the Median Absolute Deviation ''' 78 | median = np.median(points1D) 79 | abs_diffs = abs(points1D - median) 80 | mad = np.median(abs_diffs) 81 | return abs_diffs, mad 82 | 83 | 84 | def remove_noise_by_fitting_plane(points3D, colors, axis=0, threshold=1.0, plane_color=[255,255,255]): 85 | ''' 86 | Removes noise from a 3D Point Cloud by fitting a plane to it 87 | and then removing all points that are not in this plane. 88 | 89 | The plane will be perpendicular to ``axis`` 90 | 91 | Returns the corresponding denoised 3D Point Cloud. 92 | 93 | The criteria will be to remove any point which is not situated in the same 94 | plane as the road, that is, the points whose 'y' value differs significantly 95 | from the rest. 96 | 97 | threshold = 1.0 is good 98 | ''' 99 | 100 | grid_size = 0.05 101 | 102 | #print("\nRemoving noise from 3D Point Cloud by fitting a plane...") 103 | 104 | if axis == 0: # Plane perpendicular to 'x' axis, which points RIGHT in our world 105 | 106 | # For visualization # 107 | y_min = np.amin(points3D[:,1]) 108 | y_max = np.amax(points3D[:,1]) 109 | z_min = np.amin(points3D[:,2]) 110 | z_max = np.amax(points3D[:,2]) 111 | Y, Z = np.meshgrid(np.arange(y_min, y_max, grid_size), np.arange(z_min, z_max, grid_size)) 112 | YY = Y.flatten() 113 | ZZ = Z.flatten() 114 | 115 | # 1. We start by fitting a plane to the points and obtaining its coefficients 116 | # The planes equation is: --> C[0]*X + C[1]*Y - Z + C[2] = 0 <-- 117 | # So we solve by least-squares the equation Ax=b 118 | A = np.c_[ points3D[:,1], points3D[:,2], np.ones(points3D.shape[0]) ] 119 | b = points3D[:,0] 120 | C,_,_,_ = scipy.linalg.lstsq(A, b) # coefficients 121 | 122 | # For visualization # 123 | X = C[0]*Y + C[1]*Z + C[2] 124 | XX = X.flatten() 125 | plane3D = np.c_[XX, YY, ZZ] 126 | colors_plane = np.ones(plane3D.shape)*plane_color 127 | 128 | 129 | # 2. Denoise - For every point in ``points3D``, compute if it belongs to the plane 130 | a = C[0]*points3D[:, 1] + C[1]*points3D[:, 2] - points3D[:, 0] + C[2] 131 | inliers_indices = np.where(abs(a) < threshold) # 2.7 good 132 | 133 | # Re-order the coefficients in such a way that the plane equation is 134 | # C0 * x + C1 * y + C2 * z + C3 = 0 135 | coefficients = {'Cx': -1.0, 'Cy': C[0], 'Cz': C[1], 'C': C[2]} 136 | 137 | 138 | elif axis == 1: # Plane perpendicular to 'y' axis, which points UP in our world 139 | 140 | # For visualization # 141 | x_min = np.amin(points3D[:,0]) 142 | x_max = np.amax(points3D[:,0]) 143 | z_min = np.amin(points3D[:,2]) 144 | z_max = np.amax(points3D[:,2]) 145 | X, Z = np.meshgrid(np.arange(x_min, x_max, grid_size), np.arange(z_min, z_max, grid_size)) 146 | XX = X.flatten() 147 | ZZ = Z.flatten() 148 | 149 | # 1. We start by fitting a plane to the points and obtaining its coefficients 150 | # The planes equation is: --> C[0]*X + C[1]*Y - Z + C[2] = 0 <-- 151 | # So we solve by least-squares the equation Ax=b 152 | A = np.c_[ points3D[:,0], points3D[:,2], np.ones(points3D.shape[0]) ] 153 | b = points3D[:,1] 154 | C,_,_,_ = scipy.linalg.lstsq(A, b) # coefficients 155 | 156 | # For visualization # 157 | Y = C[0]*X + C[1]*Z + C[2] 158 | YY = Y.flatten() 159 | plane3D = np.c_[XX, YY, ZZ] 160 | colors_plane = np.ones(plane3D.shape)*plane_color 161 | 162 | # 2. Denoise - For every point in ``points3D``, compute if it belongs to the plane 163 | a = C[0]*points3D[:, 0] + C[1]*points3D[:, 2] - points3D[:, 1] + C[2] 164 | inliers_indices = np.where(abs(a) < threshold) # 2.7 good 165 | 166 | # Re-order the coefficients in such a way that the plane equation is 167 | # C0 * x + C1 * y + C2 * z + C3 = 0 168 | coefficients = {'Cx': C[0], 'Cy': -1.0, 'Cz': C[1], 'C': C[2]} 169 | 170 | elif axis == 2: # Plane perpendicular to 'z' axis, which points INTO THE SCREEN in our world 171 | 172 | # For visualization # 173 | x_min = np.amin(points3D[:,0]) 174 | x_max = np.amax(points3D[:,0]) 175 | y_min = np.amin(points3D[:,1]) 176 | y_max = np.amax(points3D[:,1]) 177 | X, Y = np.meshgrid(np.arange(x_min, x_max, grid_size), np.arange(y_min, y_max, grid_size)) 178 | XX = X.flatten() 179 | YY = Y.flatten() 180 | 181 | # 1. We start by fitting a plane to the points and obtaining its coefficients 182 | # The planes equation is: --> C[0]*X + C[1]*Y - Z + C[2] = 0 <-- 183 | # So we solve by least-squares the equation Ax=b 184 | A = np.c_[ points3D[:,0], points3D[:,1], np.ones(points3D.shape[0]) ] 185 | b = points3D[:,2] 186 | C,_,_,_ = scipy.linalg.lstsq(A, b) # coefficients 187 | 188 | # For visualization # 189 | Z = C[0]*X + C[1]*Y + C[2] 190 | ZZ = Z.flatten() 191 | plane3D = np.c_[XX, YY, ZZ] 192 | colors_plane = np.ones(plane3D.shape)*plane_color 193 | 194 | 195 | # 2. Denoise - For every point in ``points3D``, compute if it belongs to the plane 196 | a = C[0]*points3D[:, 0] + C[1]*points3D[:, 1] - points3D[:, 2] + C[2] 197 | inliers_indices = np.where(abs(a) < threshold) # 2.7 good 198 | 199 | # Re-order the coefficients in such a way that the plane equation is 200 | # C0 * x + C1 * y + C2 * z + C3 = 0 201 | coefficients = {'Cx': C[0], 'Cy': C[1], 'Cz': -1.0, 'C': C[2]} 202 | 203 | 204 | # Finally, remove noisy points from original 3D Point Cloud (``poins3D``) 205 | # and remove also corresponding data from the colors array 206 | points3D = points3D[inliers_indices] 207 | colors = colors[inliers_indices] 208 | 209 | return points3D, colors, plane3D, colors_plane, coefficients 210 | 211 | 212 | def planes_intersection_at_certain_depth(C_p1, C_p2, z): 213 | 214 | # The depth is provided in absolute value. However, in our world, 215 | # the 'z' axis points into the screen. This means that increasing negative z values 216 | # represent increasing depth values 217 | z = - z 218 | 219 | # Now we solve a system of two equations and two variables, 220 | # since 'z' is known 221 | # C0*x + C1*y = - (C2*z + C2) 222 | # K0*x + K1*y = - (K2*z + K2) 223 | 224 | #print("Looking for intersection of two planes...") 225 | 226 | A = np.matrix([ [C_p1['Cx'], C_p1['Cy'] ], 227 | [C_p2['Cx'], C_p2['Cy'] ]]) 228 | 229 | B = np.matrix([ [ - (C_p1['Cz']*z + C_p1['C']) ], 230 | [ - (C_p2['Cz']*z + C_p2['C']) ]]) 231 | 232 | A_inverse = np.linalg.inv(A) 233 | X = A_inverse * B 234 | 235 | point = np.array( [[X[0], X[1], [z]]], np.float64) 236 | point = np.squeeze(point, axis=2) 237 | return point 238 | 239 | 240 | def threshold_complete(points3D, colors, axis, threshold=15.0): 241 | 242 | #print("\nMaintain only points whose ``axis`` coordinate is" 243 | # "smaller than ``threshold``") 244 | 245 | points3D_axis = points3D[:, axis] 246 | #print(points3D_axis) 247 | inliers_indices = np.where(abs(points3D_axis) < threshold) 248 | points3D = points3D[inliers_indices] 249 | colors = colors[inliers_indices] 250 | return points3D, colors 251 | 252 | 253 | def extract_pcls(points3D, colors, axis=0): 254 | 255 | #print("\nExtract 2 smaller Point Clouds from ``points3D``") 256 | 257 | points3D_axis = points3D[:, axis] 258 | mean = np.mean(points3D_axis) 259 | 260 | left_indices = np.where(points3D_axis < mean) 261 | left = points3D[left_indices] 262 | left_colors = colors[left_indices] 263 | 264 | right_indices = np.where(points3D_axis > mean) 265 | right = points3D[right_indices] 266 | right_colors = colors[right_indices] 267 | 268 | return left, left_colors, right, right_colors 269 | 270 | 271 | def get_end_points_of_road(points3D, depth): 272 | ''' 273 | Returns the left and right ends of a 3D segment which is perpendicular 274 | to the Z axis and situated at a 'z' value of ``depth``, 275 | which must be a POSITIVE number. 276 | * ``depth``: depth at which the segment must be found 277 | ''' 278 | 279 | # Get a numpy array with only the Z coordinates of the input 3D Point Cloud 280 | points3D_Z = points3D[:, 2] 281 | # Find the indices of the ``points3D_Z`` whose values are within a 282 | # range of the input variable ``depth`` 283 | indices = np.where(( (points3D_Z < -(depth-0.05)) & (points3D_Z > -(depth+0.05)) )) 284 | # Generate a 3D segment by getting the 3D points of the original 285 | # input Point Cloud whose 'z' components are situated at a depth of ``depth`` 286 | points3D_Z_segment = points3D[indices] 287 | # Find the length of the segment 288 | left_pt_naive, right_pt_naive = get_end_points_of_segment(points3D_Z_segment) 289 | 290 | return left_pt_naive, right_pt_naive 291 | 292 | 293 | def get_end_points_of_segment(segment): 294 | 295 | #print('\nComputing length of segment...') 296 | 297 | ''' Computes the length of a 3D segment ''' 298 | # First, we must find end points of segment, taking into account that the segment 299 | # has a fixed 'y' and 'z' value, only varying in the 'x' dimension. 300 | # Consequently, we only take the 'x' components of every 3D point that forms the segment 301 | segment_X = segment[:, 0] 302 | 303 | if segment_X.size == 0: 304 | return None, None 305 | 306 | # Find the points whose 'x' coordinates are min and max, respectively 307 | left_end_index = np.where(segment_X == np.amin(segment_X)) 308 | right_end_index = np.where(segment_X == np.amax(segment_X)) 309 | # Get those points 310 | left_end_pt = segment[left_end_index] 311 | right_end_pt = segment[right_end_index] 312 | 313 | return left_end_pt, right_end_pt 314 | 315 | 316 | def compute_distance_in_3D(pt3D_A, pt3D_B): 317 | ''' Computes euclidean distance between two 3D points ''' 318 | return np.linalg.norm(pt3D_A-pt3D_B) 319 | 320 | 321 | def create_3Dline_from_3Dpoints(left_pt, right_pt, color): 322 | left_pt[0][1] += 0.01 323 | right_pt[0][1] += 0.01 324 | v = right_pt - left_pt 325 | t_values = np.arange(0.0, 1.0, 0.001) 326 | line = left_pt 327 | for t in t_values: 328 | line = np.append(line, left_pt + (t * v), axis=0) 329 | colors_line = np.ones(line.shape) * color 330 | 331 | return line, colors_line -------------------------------------------------------------------------------- /semantic_depth_lib/point_cloud_2_ply.py: -------------------------------------------------------------------------------- 1 | # This file is licensed under a GPLv3 License. 2 | # 3 | # GPLv3 License 4 | # Copyright (C) 2018-2019 Pablo R. Palafox (pablo.rodriguez-palafox@tum.de) 5 | # 6 | # This program is free software: you can redistribute it and/or modify 7 | # it under the terms of the GNU General Public License as published by 8 | # the Free Software Foundation, either version 3 of the License, or 9 | # (at your option) any later version. 10 | # 11 | # This program is distributed in the hope that it will be useful, 12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | # GNU General Public License for more details. 15 | # 16 | # You should have received a copy of the GNU General Public License 17 | # along with this program. If not, see . 18 | 19 | 20 | ''' 21 | Point cloud class generated from two numpy arrays, one being 22 | the actual points and one the points' colors, which then saves theses 23 | points into a ply file 24 | 25 | Classes: 26 | 27 | * ``PointCloud2Ply`` - Point cloud with RGB colors 28 | 29 | ''' 30 | 31 | import numpy as np 32 | 33 | class PointCloud2Ply(): 34 | 35 | """3D point cloud tools.""" 36 | 37 | #: Header for exporting point cloud to PLY 38 | ply_header = ( 39 | '''ply 40 | format ascii 1.0 41 | element vertex {vertex_count} 42 | property float x 43 | property float y 44 | property float z 45 | property uchar red 46 | property uchar green 47 | property uchar blue 48 | end_header 49 | ''') 50 | 51 | def __init__(self, points3D, colors, output_name): 52 | """ 53 | Initialize point cloud with given coordinates and associated colors. 54 | 55 | ``coordinates`` and ``colors`` should be numpy arrays of the same 56 | length, in which ``coordinates`` is made of three-dimensional point 57 | positions (X, Y, Z) and ``colors`` is made of three-dimensional spectral 58 | data, e.g. (R, G, B). 59 | """ 60 | self.points3D = points3D.reshape(-1, 3) 61 | self.colors = colors.reshape(-1, 3) 62 | self.output_name = output_name 63 | 64 | def write_ply(self, output_file): 65 | """Export ``PointCloud`` to PLY file for viewing in MeshLab.""" 66 | points = np.hstack([self.points3D, self.colors]) 67 | try: 68 | with open(output_file, 'w') as f: 69 | f.write(self.ply_header.format(vertex_count=len(points))) 70 | np.savetxt(f, points, '%f %f %f %d %d %d') 71 | print("Point Cloud file generated!") 72 | except Exception as e: 73 | raise 74 | 75 | def add_extra_point_cloud(self, points3D_extra, colors_extra): 76 | """ 77 | If set, append the ``points3D_extra`` vector to the already existign 78 | ``points3D`` (do the same with the colors) 79 | """ 80 | self.points3D = np.append(self.points3D, points3D_extra, axis=0) 81 | self.colors = np.append(self.colors, colors_extra, axis=0) 82 | 83 | def prepare_and_save_point_cloud(self): 84 | """ 85 | Apply an inifitiy filter and, finally, save the points into a ply file. 86 | """ 87 | # Apply a mask to remove points with an infinite depth 88 | infinity_mask = self.points3D[:, 2] > self.points3D[:, 2].min() 89 | self.points3D = self.points3D[infinity_mask] 90 | self.colors = self.colors[infinity_mask] 91 | 92 | output_ply = '{}.ply'.format(self.output_name) 93 | self.write_ply(output_ply) 94 | -------------------------------------------------------------------------------- /utils/create_video_from_frames.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import glob 4 | 5 | # input_paths = ["../results/stuttgart_video/result_sequence_imgs/*.png", 6 | # "../results/stuttgart_video/rendered_sequence_top/*.png", 7 | # "../results/stuttgart_video/rendered_sequence_good_frontal/*.png"] 8 | 9 | # output_paths = ["../results/stuttgart_video/result_imgs.mp4", 10 | # "../results/stuttgart_video/result_top_render.mp4", 11 | # "../results/stuttgart_video/result_frontal_render.mp4"] 12 | 13 | 14 | 15 | input_paths = ["../results/stuttgart_video/result_sequence_imgs/*.png"] 16 | 17 | output_paths = ["../results/stuttgart_video/result_imgs.mp4"] 18 | 19 | for i in range(len(input_paths)): 20 | print("Reading from", input_paths[i]) 21 | 22 | test_frame = cv2.imread("../results/stuttgart_video/result_sequence_imgs/stuttgart_02_000000_005100_leftImg8bit.png") 23 | height, width = test_frame.shape[0], test_frame.shape[1] 24 | print(height, width) 25 | 26 | video = cv2.VideoWriter(output_paths[i], cv2.VideoWriter_fourcc(*"mp4v"), 30, (width, height)) 27 | 28 | for frame_path in sorted(glob.glob(input_paths[i])): 29 | frame = cv2.imread(frame_path) 30 | video.write(frame) 31 | print("Done", input_paths[i]) -------------------------------------------------------------------------------- /utils/outlier_removal.py: -------------------------------------------------------------------------------- 1 | from open3d import * 2 | import time 3 | 4 | def display_inlier_outlier(cloud, ind): 5 | inlier_cloud = select_down_sample(cloud, ind) 6 | outlier_cloud = select_down_sample(cloud, ind, invert=True) 7 | 8 | print("Showing outliers (red) and inliers (gray): ") 9 | outlier_cloud.paint_uniform_color([1, 0, 0]) 10 | inlier_cloud.paint_uniform_color([0.8, 0.8, 0.8]) 11 | draw_geometries([inlier_cloud, outlier_cloud]) 12 | 13 | 14 | if __name__ == "__main__": 15 | 16 | print("Load a ply point cloud, print it, and render it") 17 | start = time.time() 18 | # pcd = read_point_cloud("results/stuttgart_video/result_sequence_ply/" 19 | # "stuttgart_02_000000_005256_leftImg8bit_naive.ply") 20 | pcd = read_point_cloud("test_road.ply") 21 | end = time.time() 22 | print(end - start) 23 | 24 | # print("Downsample the point cloud with a voxel of 0.02") 25 | # start = time.time() 26 | # voxel_down_pcd = voxel_down_sample(pcd, voxel_size = 0.02) 27 | # end = time.time() 28 | # print(end - start) 29 | # #draw_geometries([voxel_down_pcd]) 30 | # print() 31 | 32 | print("Statistical oulier removal") 33 | start = time.time() 34 | cl,ind = statistical_outlier_removal(pcd, 35 | nb_neighbors=20, std_ratio=0.5) 36 | end = time.time() 37 | print(end - start) 38 | display_inlier_outlier(pcd, ind) 39 | print() 40 | 41 | pcd = select_down_sample(pcd, ind) 42 | 43 | print("Radius oulier removal") 44 | start = time.time() 45 | cl,ind = radius_outlier_removal(pcd, 46 | nb_points=80, radius=0.5) 47 | end = time.time() 48 | print(end - start) 49 | display_inlier_outlier(pcd, ind) 50 | 51 | # pcd = select_down_sample(pcd, ind) 52 | # pcd.paint_uniform_color([0, 1, 0]) 53 | # draw_geometries([pcd]) 54 | -------------------------------------------------------------------------------- /utils/render_ply.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from open3d import * 3 | import glob 4 | import os 5 | import matplotlib.pyplot as plt 6 | 7 | def render_plys(ply_file, ply_name): 8 | pcd = read_point_cloud(ply_file) 9 | 10 | vis = Visualizer() 11 | vis.create_window() 12 | ctr = vis.get_view_control() 13 | 14 | param = read_pinhole_camera_parameters("top.json") 15 | 16 | vis.add_geometry(pcd) 17 | ctr.convert_from_pinhole_camera_parameters(param) 18 | 19 | ########################################## 20 | ## UNCOMMENT TO SAVE INTRINSICS AS JSON 21 | # vis.run() 22 | # param = vis.get_view_control().convert_to_pinhole_camera_parameters() 23 | # write_pinhole_camera_parameters("frontal.json", param) 24 | # exit() 25 | ########################################## 26 | 27 | 28 | image = vis.capture_screen_float_buffer(True) 29 | plt.imsave(ply_name, np.asarray(image), dpi = 1) 30 | 31 | vis.destroy_window() 32 | 33 | 34 | 35 | if __name__ == "__main__": 36 | base_folder = "../results/stuttgart_video/" 37 | 38 | ply_files = glob.glob(base_folder + "result_sequence_ply/*") 39 | output_folder = base_folder + "rendered_sequence/" 40 | 41 | if not os.path.exists(output_folder): 42 | os.makedirs(output_folder) 43 | 44 | view_point_is_set = False 45 | 46 | for ply_file in ply_files: 47 | 48 | print(ply_file) 49 | 50 | ply_name = os.path.basename(ply_file) 51 | ply_name = os.path.splitext(ply_name)[0] + '.png' 52 | ply_name = output_folder + ply_name 53 | 54 | render_plys(ply_file, ply_name) 55 | 56 | 57 | 58 | 59 | --------------------------------------------------------------------------------