├── .gitignore ├── LICENSE ├── README.md ├── assets ├── architecture.png ├── banner.gif ├── example │ ├── 000018_09.png │ ├── 000018_10.png │ ├── 000018_11.png │ └── README.md ├── poster.pdf └── spotlight.mp4 ├── dataloaders ├── __init__.py ├── dataloader_one_frame.py ├── dataloader_three_frames.py ├── factory.py └── general_dataloader.py ├── evaluators ├── __init__.py ├── depth.py ├── flow.py ├── mask.py └── semantic.py ├── filenames ├── eigen_test.txt ├── kitti_2015_test.txt └── kitti_2015_test_semantic.txt ├── helpers ├── __init__.py ├── bilinear_sampler.py ├── depth_utils.py ├── flow_tool │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ └── flowlib.py └── utilities.py ├── networks ├── __init__.py ├── baseline.py ├── complete_network.py ├── general_network.py ├── network_components.py ├── ops.py └── selflow │ ├── LICENSE │ ├── __init__.py │ ├── selflow_network.py │ └── warp.py ├── requirements.txt ├── single_inference.py ├── test.py └── testers ├── __init__.py ├── error_tester.py ├── factory.py ├── general_tester.py ├── kitti_depth.py ├── kitti_flow.py ├── kitti_mask.py └── kitti_semantic.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | results* 3 | __pycache__ 4 | tf* 5 | artifacts* 6 | *.o -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 190 | Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Distilled semantics for comprehensive scene understanding from videos 2 | Demo code of "Distilled semantics for comprehensive scene understanding from videos", published at [CVPR 2020](http://cvpr2020.thecvf.com/) 3 | 4 | [[Paper]](https://arxiv.org/pdf/2003.14030.pdf) - [[Video]](assets/spotlight.mp4) - [[Poster]](assets/poster.pdf) 5 | 6 | ### Authors 7 | [Fabio Tosi †](https://vision.disi.unibo.it/~ftosi/) - [Filippo Aleotti †](https://filippoaleotti.github.io/website/) - [Pierluigi Zama Ramirez †](https://pierlui92.github.io/) - [Matteo Poggi](https://mattpoggi.github.io/) - [Samuele Salti](https://vision.deis.unibo.it/ssalti/) - [Luigi Di Stefano](https://www.unibo.it/sitoweb/luigi.distefano/) - [Stefano Mattoccia](http://vision.deis.unibo.it/~smatt/) 8 | 9 | † *joint first authorship* 10 | 11 | ![](assets/banner.gif) 12 | 13 | **At the moment, we do not plan to release the training code.** 14 | ## Abstract 15 | Whole understanding of the surroundings is paramount to autonomous systems. Recent works have shown that deep neural networks can learn geometry (depth) and motion (optical flow) from a monocular video without any explicit supervision from ground truth annotations, particularly hard to source for these two tasks. In this paper, we take an additional step toward holistic scene understanding with monocular cameras by learning depth and motion alongside with semantics, with supervision for the latter provided by a pre-trained network distilling proxy ground truth images. 16 | We address the three tasks jointly by a) a novel training protocol based on knowledge distillation and self-supervision and b) a compact network architecture which enables efficient scene understanding on both power hungry GPUs and low-power embedded platforms. 17 | We thoroughly assess the performance of our framework and show that it yields state-of-the-art results for monocular depth estimation, optical flow and motion segmentation. 18 | 19 | ## Architecture 20 | 21 | ![](assets/architecture.png) 22 | 23 | At training time, our final network is an ensamble of many sub networks (depicted in figure), where each one is in charge of a specific task: 24 | * Camera Network: network in charge of intrinsics and pose estimation 25 | * Depth Semantic Network (DSNet): network able to infer both depth and semantic for a given scene 26 | * Optical Flow Network (OFNet): teacher optical flow network 27 | * Self-Distilled Optical Flow Network: student optical flow network, used at testing time 28 | 29 | At testing time, we rely on DSNet, CameraNet and Self-Distilled OFNet depending on the task. 30 | 31 | ## Requirements 32 | For this project, you need TensorFlow version 1.8 and Python `2.x` or `3.x`. 33 | 34 | You can install all the requirements easily running the command: 35 | ```bash 36 | pip install -r requirements.txt 37 | ``` 38 | 39 | ## Pretrained Models 40 | Pretrained models are available for download: 41 | | Training | Network | Resolution | zip | 42 | |:-:|:--:|:--:|:--:| 43 | | KITTI | Omeganet | 640x192 | [weights](https://drive.google.com/file/d/15MfMcAIJeg7TV8lqxa69qXF-COLGtTZe/view?usp=sharing) | 44 | | CS + KITTI (EIGEN) | DSNet | 1024x320 | [weights](https://drive.google.com/file/d/1OUcq-ueT5i8-mkVRfpWOcxfH6ss8DY_F/view?usp=sharing) | 45 | | CS | DSNet | 1024x320 | [weights](https://drive.google.com/file/d/1YdteQxo4MZukVlb8gIBZM4Tj_4bLC5F2/view?usp=sharing) | 46 | 47 | ## How To 48 | 49 | ### Run a Single Inference 50 | 51 | You can run OmegaNet on a single image using the following command: 52 | 53 | ``` 54 | python single_inference.py --tgt $tgt_path [--ckpt $ckpt --tasks $tasks --dest $dest --src1 $src1 --src2 $src2] 55 | ``` 56 | where : 57 | * `tgt`: path to target image (ie, image at time t0). Required 58 | * `src1`: path to src1 image (ie, image at time t-1). Required only in case of `flow` or `mask` are in tasks list 59 | * `src2`: path to src2 image (ie, image at time t+1). Required only in case of `flow` or `mask` are in tasks list 60 | * `ckpt`: path to checkpoint. Required 61 | * `tasks`: list of tasks to perform, space separated. Default [`inverse_depth`, `semantic`, `flow`] 62 | * `dest`: destination folder. Default `results` 63 | 64 | For instance, the following command run OmegaNet on an example batch from KITTI 2015 test set 65 | 66 | ``` 67 | python single_inference.py --src1 assets/example/000018_09.png \ 68 | --tgt assets/example/000018_10.png \ 69 | --src2 assets/example/000018_11.png \ 70 | --ckpt models/omeganet 71 | ``` 72 | 73 | ## Test 74 | To test the network, you have to generate the artifacts for a specific task first, then you can test them. 75 | 76 | ### Generate Artifacts 77 | You can generate the artifacts for a specific `task` running the following command: 78 | 79 | ``` 80 | python test.py --task $task --ckpt $ckpt \ 81 | [--cpu --load_only_baseline --filenames_file $filenames ] \ 82 | [--height $height --width $width --dest $dest] 83 | ``` 84 | 85 | where: 86 | 87 | * `task`: task to perform. Can be [`depth`, `semantic`, `flow`]. Default `depth` 88 | * `filenames`: path to filename.txt, where are listed all the images to load. Default `filenames/eigen_test.txt` 89 | * `ckpt`: path to checkpoint. **Required** 90 | * `load_only_baseline`: if set, load only Baseline (CameraNet+DSNet). Otherwise, full OmegaNet will be loaded. For instance, 91 | if you want to test a Baseline model SD-OFNet weights are not available, so you do not expect to load them. 92 | * `height`: height of resized image. Default `192` 93 | * `width`: width of resized image. Default `640` 94 | * `dest`: where save artifacts. Default `artifacts` 95 | * `cpu`: run test on cpu 96 | 97 | #### Depth Artifacts 98 | You can generate depth artifacts using the following script: 99 | ``` 100 | export datapath="/path/to/full_kitti/" 101 | python test.py --task depth \ 102 | --datapath $datapath \ 103 | --filenames_file filenames/eigen_test.txt \ 104 | --ckpt models/omeganet \ 105 | --load_only_baseline 106 | ``` 107 | where: 108 | * `datapath`: path to your FULL KITTI dataset 109 | 110 | #### Flow Artifacts 111 | Artifacts for KITTI can be produced with the following command 112 | 113 | ``` 114 | export datapath="/path/to/3-frames-KITTI/" 115 | python test.py --task flow \ 116 | --datapath $datapath \ 117 | --filenames_file filenames/kitti_2015_test.txt \ 118 | --ckpt models/omeganet 119 | ``` 120 | 121 | where: 122 | * `datapath`: path to your 3-frames extended KITTI dataset 123 | 124 | #### Semantic Artifacts 125 | Artifacts for KITTI can be produced with the following command. 126 | 127 | ``` 128 | export datapath="/path_to_kitti/data_semantics/training/image_2" 129 | python test.py --task semantic \ 130 | --datapath $datapath \ 131 | --filenames_file filenames/kitti_2015_test_semantic.txt \ 132 | --ckpt path_to_ckpts/dsnet \ 133 | --load_only_baseline 134 | ``` 135 | where: 136 | * `datapath`: path to your images of the semantic kitti dataset 137 | 138 | #### Motion Mask Artifacts 139 | Artifacts for KITTI can be produced with the following command. 140 | 141 | ``` 142 | export datapath="/path/to/kitti/2015/" 143 | python test.py --task mask \ 144 | --ckpt path_to_ckpts/omeganet \ 145 | --datapath $datapath \ 146 | --filenames_file filenames/kitti_2015_test.txt 147 | ``` 148 | where: 149 | * `datapath`: path to your 3-frames extended KITTI dataset 150 | 151 | ### Run tests 152 | 153 | #### Depth 154 | You can evaluate the maps running the command: 155 | 156 | ``` 157 | cd evaluators 158 | python depth.py --datapath $datapath \ 159 | --prediction_folder $prediction_folder 160 | ``` 161 | 162 | where: 163 | * `datapath`: path to FULL KITTI dataset 164 | * `prediction_folder`: path to folder with npy files, e.g. `../artifacts/depth/` 165 | 166 | #### Flow 167 | To test optical flow artifacts, run the command: 168 | 169 | ``` 170 | cd evaluators 171 | python flow.py --datapath $datapath \ 172 | --prediction_folder $prediction_folder 173 | ``` 174 | 175 | where: 176 | * `datapath`: path to KITTI/2015 177 | * `prediction_folder`: path to flow predictions, e.g. `../artifacts/flow/` 178 | 179 | #### Semantic 180 | To test semantic run the following command: 181 | 182 | ``` 183 | cd evaluators 184 | python semantic.py --datapath $datapath \ 185 | --prediction_folder $prediction_folder 186 | ``` 187 | 188 | where: 189 | * `datapath`: path to KITTI/2015/data_semantics 190 | * `prediction_folder`: path to semantic predictions, e.g. `../artifacts/semantic/` 191 | 192 | #### Motion Mask 193 | When motion mask artifacts are ready, you can test them on KITTI. 194 | 195 | ``` 196 | cd evaluators 197 | python mask.py --datapath $datapath \ 198 | --prediction_folder $prediction_folder 199 | ``` 200 | 201 | where: 202 | * `datapath`: path to KITTI/2015 folder 203 | * `prediction_folder`: path to predicted moving masks, e.g. `../artifacts/mask` 204 | 205 | ## Citation 206 | If you find this code useful in your research, please cite: 207 | 208 | ``` 209 | @inproceedings{tosi2020distilled, 210 | title={Distilled semantics for comprehensive scene understanding from videos}, 211 | author={Tosi, Fabio and Aleotti, Filippo and Ramirez, Pierluigi Zama and Poggi, Matteo and Salti, Samuele and Di Stefano, Luigi and Mattoccia, Stefano}, 212 | booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition}, 213 | year={2020} 214 | } 215 | ``` 216 | 217 | ## License 218 | Code is licensed under Apache 2.0 License. More information in the `LICENSE` file. 219 | 220 | ## Acknowledgements 221 | 222 | Portions of our code are from other repositories: 223 | 224 | * `Depth evaluation` is from [monodepth](https://github.com/mrharicot/monodepth), for "Unsupervised Monocular Depth Estimation with Left-Right Consistency, by C. Godard, O Mac Aodha, G. Brostow, CVPR 2017". 225 | * `Flow Tools` are from https://github.com/liruoteng/OpticalFlowToolkit, licensed under MIT license. 226 | * `Rigid flow estimation` is from [SfMLearner](https://github.com/tinghuiz/SfMLearner/blob/master), for "Unsupervised Learning of Depth and Ego-Motion from Video, by T. Zhou, M. Brown, N. Snavely, D. G. Lowe, CVPR 2017". Code is licensed under MIT License. 227 | * `SelfFlow` network and utilities are from [SelfFlow](https://github.com/ppliuboy/SelFlow), for "SelFlow: Self-Supervised Learning of Optical Flow, by P. Liu, M. Lyu 228 | , I. King, J. Xu, CVPR 2019". Code is licensed under MIT License. 229 | * The `Teacher semantic network` is [DPC](https://github.com/tensorflow/models/tree/master/research/deeplab), for "Searching for Efficient Multi-Scale Architectures for Dense Image Prediction, by , L. C. Chen, M. D. Collins, Y. Zhu, G. Papandreou, B. Zoph, F. Schroff, H. Adam, J. Shlens, Advances in neural information processing systems 2018". Code is licensed under Apache v2 License. We used this network to generate proxy sematic maps. 230 | 231 | 232 | We would like to thank all these authors for making their code publicly available and, eventually, for sharing pretrained models. 233 | -------------------------------------------------------------------------------- /assets/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/architecture.png -------------------------------------------------------------------------------- /assets/banner.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/banner.gif -------------------------------------------------------------------------------- /assets/example/000018_09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/example/000018_09.png -------------------------------------------------------------------------------- /assets/example/000018_10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/example/000018_10.png -------------------------------------------------------------------------------- /assets/example/000018_11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/example/000018_11.png -------------------------------------------------------------------------------- /assets/example/README.md: -------------------------------------------------------------------------------- 1 | Images in this folder belong to [KITTI](http://www.cvlibs.net/datasets/kitti/) 2015 dataset, and are used only with demonstration purposes. 2 | 3 | Citations: 4 | 5 | ``` 6 | @ARTICLE{Menze2018JPRS, 7 | author = {Moritz Menze and Christian Heipke and Andreas Geiger}, 8 | title = {Object Scene Flow}, 9 | journal = {ISPRS Journal of Photogrammetry and Remote Sensing (JPRS)}, 10 | year = {2018} 11 | } 12 | @INPROCEEDINGS{Menze2015ISA, 13 | author = {Moritz Menze and Christian Heipke and Andreas Geiger}, 14 | title = {Joint 3D Estimation of Vehicles and Scene Flow}, 15 | booktitle = {ISPRS Workshop on Image Sequence Analysis (ISA)}, 16 | year = {2015} 17 | } 18 | ``` -------------------------------------------------------------------------------- /assets/poster.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/poster.pdf -------------------------------------------------------------------------------- /assets/spotlight.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/spotlight.mp4 -------------------------------------------------------------------------------- /dataloaders/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/dataloaders/__init__.py -------------------------------------------------------------------------------- /dataloaders/dataloader_one_frame.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """ 17 | Dataloader suited for 1 frame tasks 18 | """ 19 | import tensorflow as tf 20 | import numpy as np 21 | from dataloaders.general_dataloader import GeneralDataloader 22 | 23 | 24 | class TestDataloader(GeneralDataloader): 25 | def build(self): 26 | input_queue = tf.train.string_input_producer( 27 | [self.filenames_file], shuffle=False 28 | ) 29 | line_reader = tf.TextLineReader() 30 | _, line = line_reader.read(input_queue) 31 | split_line = tf.string_split([line]).values 32 | 33 | with tf.variable_scope("tester_dataloader_one_frame"): 34 | tgt_img_path = tf.string_join([self.datapath, split_line[0]]) 35 | tgt_img_o = self.read_image(tgt_img_path) 36 | self.tgt_img_batch = tf.stack([tgt_img_o], 0) 37 | self.tgt_img_batch.set_shape([1, None, None, 3]) 38 | 39 | def get_next_batch(self): 40 | with tf.variable_scope("get_next_batch"): 41 | batch = { 42 | "src_img_1": self.tgt_img_batch, 43 | "tgt_img": self.tgt_img_batch, 44 | "src_img_2": self.tgt_img_batch, 45 | } 46 | return batch 47 | -------------------------------------------------------------------------------- /dataloaders/dataloader_three_frames.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """ 17 | Dataloader suited for 3 frames tasks 18 | """ 19 | import tensorflow as tf 20 | import numpy as np 21 | from dataloaders.general_dataloader import GeneralDataloader 22 | 23 | 24 | class TestDataloader(GeneralDataloader): 25 | def build(self): 26 | input_queue = tf.train.string_input_producer( 27 | [self.filenames_file], shuffle=False 28 | ) 29 | line_reader = tf.TextLineReader() 30 | _, line = line_reader.read(input_queue) 31 | split_line = tf.string_split([line]).values 32 | 33 | with tf.variable_scope("tester_dataloader_three_frames"): 34 | with tf.variable_scope("image_reader"): 35 | src_img_1_path = tf.string_join([self.datapath, split_line[0]]) 36 | tgt_img_path = tf.string_join([self.datapath, split_line[1]]) 37 | src_img_2_path = tf.string_join([self.datapath, split_line[2]]) 38 | 39 | src_img_1_o = self.read_image(src_img_1_path) 40 | tgt_img_o = self.read_image(tgt_img_path) 41 | src_img_2_o = self.read_image(src_img_2_path) 42 | 43 | with tf.variable_scope("batch_creator"): 44 | self.src_img_1_batch = tf.stack([src_img_1_o], 0) 45 | self.tgt_img_batch = tf.stack([tgt_img_o], 0) 46 | self.src_img_2_batch = tf.stack([src_img_2_o], 0) 47 | 48 | with tf.variable_scope("shape_setter"): 49 | self.src_img_1_batch.set_shape([1, None, None, 3]) 50 | self.tgt_img_batch.set_shape([1, None, None, 3]) 51 | self.src_img_2_batch.set_shape([1, None, None, 3]) 52 | 53 | def get_next_batch(self): 54 | with tf.variable_scope("get_next_batch"): 55 | batch = { 56 | "src_img_1": self.src_img_1_batch, 57 | "tgt_img": self.tgt_img_batch, 58 | "src_img_2": self.src_img_2_batch, 59 | } 60 | return batch 61 | -------------------------------------------------------------------------------- /dataloaders/factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """ 17 | Factory for dataloaders 18 | """ 19 | 20 | import tensorflow as tf 21 | import numpy as np 22 | from dataloaders import dataloader_one_frame, dataloader_three_frames 23 | 24 | 25 | TESTER_DATALOADERS_FACTORY = { 26 | "semantic": dataloader_one_frame.TestDataloader, 27 | "depth": dataloader_one_frame.TestDataloader, 28 | "flow": dataloader_three_frames.TestDataloader, 29 | "mask": dataloader_three_frames.TestDataloader, 30 | } 31 | 32 | 33 | def get_dataloader(task): 34 | """Return the desired dataloader. 35 | :param task: task to perform 36 | :return dataloader: dataloader suited for the task 37 | """ 38 | assert task in TESTER_DATALOADERS_FACTORY.keys() 39 | return TESTER_DATALOADERS_FACTORY[task] 40 | -------------------------------------------------------------------------------- /dataloaders/general_dataloader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """ 17 | Dataloader for Test 18 | """ 19 | import tensorflow as tf 20 | import numpy as np 21 | from collections import namedtuple 22 | 23 | 24 | dataloader_parameters = namedtuple("dataloader_parameters", "height, width, task") 25 | 26 | 27 | def string_length_tf(t): 28 | return tf.py_func(len, [t], [tf.int64]) 29 | 30 | 31 | class GeneralDataloader(object): 32 | def __init__( 33 | self, datapath, filenames_file, params, 34 | ): 35 | if not datapath.endswith("/"): 36 | datapath = datapath + "/" 37 | self.datapath = datapath 38 | self.params = params 39 | self.filenames_file = filenames_file 40 | self.src_img_1_batch = None 41 | self.src_img_2_batch = None 42 | self.tgt_img_batch = None 43 | self.build() 44 | 45 | def build(self): 46 | pass 47 | 48 | def get_next_batch(self): 49 | pass 50 | 51 | def read_image(self, image_path): 52 | """Read an image from the file system 53 | :params image_path: string, path to image 54 | """ 55 | with tf.variable_scope("read_image"): 56 | path_length = string_length_tf(image_path)[0] 57 | file_extension = tf.substr(image_path, path_length - 3, 3) 58 | file_cond = tf.equal(file_extension, "jpg") 59 | 60 | image = tf.cond( 61 | file_cond, 62 | lambda: tf.image.decode_jpeg(tf.read_file(image_path)), 63 | lambda: tf.image.decode_png(tf.read_file(image_path)), 64 | ) 65 | 66 | self.image_w = tf.shape(image)[1] 67 | self.image_h = tf.shape(image)[0] 68 | 69 | image = tf.image.convert_image_dtype(image, tf.float32) 70 | image = tf.image.resize_images( 71 | image, 72 | [self.params.height, self.params.width], 73 | tf.image.ResizeMethod.AREA, 74 | ) 75 | return image 76 | -------------------------------------------------------------------------------- /evaluators/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /evaluators/depth.py: -------------------------------------------------------------------------------- 1 | """ 2 | Depth evaluation for KITTI Eigen split 3 | This code is based on https://github.com/mrharicot/monodepth/blob/master/utils/evaluate_kitti.py 4 | We would like to thank C. Godard and other authors for sharing their code 5 | """ 6 | from __future__ import division 7 | import os 8 | import argparse 9 | import numpy as np 10 | from tqdm import tqdm 11 | import sys 12 | 13 | sys.path.insert(0, os.path.abspath("..")) 14 | from helpers import depth_utils 15 | 16 | 17 | parser = argparse.ArgumentParser(description="Evaluation on the KITTI dataset") 18 | parser.add_argument( 19 | "--prediction_folder", type=str, help="path to estimated disparities", required=True 20 | ) 21 | parser.add_argument( 22 | "--datapath", type=str, help="path to ground truth disparities", required=True 23 | ) 24 | parser.add_argument( 25 | "--min_depth", type=float, help="minimum depth for evaluation", default=1e-3 26 | ) 27 | parser.add_argument( 28 | "--max_depth", type=float, help="maximum depth for evaluation", default=80 29 | ) 30 | parser.add_argument( 31 | "--filename_file", 32 | type=str, 33 | help="path to filename file", 34 | default="../filenames/eigen_test.txt", 35 | ) 36 | args = parser.parse_args() 37 | 38 | if __name__ == "__main__": 39 | print("Depth evaluation is started: loading ground-truths and predictions") 40 | pred_disparities = [] 41 | num_samples = 697 42 | 43 | for t_id in range(num_samples): 44 | pred_disparities.append( 45 | np.load(os.path.join(args.prediction_folder, str(t_id) + ".npy")) 46 | ) 47 | datapath = args.datapath 48 | if not datapath.endswith("/"): 49 | datapath += "/" 50 | test_files = depth_utils.read_text_lines(args.filename_file) 51 | gt_files, gt_calib, im_sizes, im_files, cams = depth_utils.read_file_data( 52 | test_files, datapath 53 | ) 54 | 55 | num_test = len(im_files) 56 | gt_depths = [] 57 | pred_depths = [] 58 | for t_id in range(num_samples): 59 | camera_id = cams[t_id] # 2 is left, 3 is right 60 | depth = depth_utils.generate_depth_map( 61 | gt_calib[t_id], gt_files[t_id], im_sizes[t_id], camera_id, False, True 62 | ) 63 | gt_depths.append(depth.astype(np.float32)) 64 | 65 | disp_pred = pred_disparities[t_id].squeeze() 66 | 67 | # need to convert from disparity to depth 68 | focal_length, baseline = depth_utils.get_focal_length_baseline( 69 | gt_calib[t_id], camera_id 70 | ) 71 | depth_pred = (baseline * focal_length) / disp_pred 72 | depth_pred[np.isinf(depth_pred)] = 0 73 | 74 | pred_depths.append(depth_pred) 75 | 76 | rms = np.zeros(num_samples, np.float32) 77 | log_rms = np.zeros(num_samples, np.float32) 78 | abs_rel = np.zeros(num_samples, np.float32) 79 | sq_rel = np.zeros(num_samples, np.float32) 80 | d1_all = np.zeros(num_samples, np.float32) 81 | a1 = np.zeros(num_samples, np.float32) 82 | a2 = np.zeros(num_samples, np.float32) 83 | a3 = np.zeros(num_samples, np.float32) 84 | 85 | with tqdm(total=num_samples) as pbar: 86 | for i in range(num_samples): 87 | 88 | gt_depth = gt_depths[i] 89 | pred_depth = pred_depths[i] 90 | mask = np.logical_and(gt_depth > args.min_depth, gt_depth < args.max_depth) 91 | 92 | gt_height, gt_width = gt_depth.shape 93 | crop = np.array( 94 | [ 95 | 0.40810811 * gt_height, 96 | 0.99189189 * gt_height, 97 | 0.03594771 * gt_width, 98 | 0.96405229 * gt_width, 99 | ] 100 | ).astype(np.int32) 101 | 102 | crop_mask = np.zeros(mask.shape) 103 | crop_mask[crop[0] : crop[1], crop[2] : crop[3]] = 1 104 | mask = np.logical_and(mask, crop_mask) 105 | 106 | # Scale matching 107 | scalor = np.median(gt_depth[mask]) / np.median(pred_depth[mask]) 108 | pred_depth[mask] *= scalor 109 | 110 | pred_depth[pred_depth < args.min_depth] = args.min_depth 111 | pred_depth[pred_depth > args.max_depth] = args.max_depth 112 | 113 | ( 114 | abs_rel[i], 115 | sq_rel[i], 116 | rms[i], 117 | log_rms[i], 118 | a1[i], 119 | a2[i], 120 | a3[i], 121 | ) = depth_utils.compute_errors(gt_depth[mask], pred_depth[mask]) 122 | pbar.update(1) 123 | print( 124 | "{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format( 125 | "abs_rel", "sq_rel", "rms", "log_rms", "d1_all", "a1", "a2", "a3" 126 | ) 127 | ) 128 | print( 129 | "{:10.4f}, {:10.4f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}".format( 130 | abs_rel.mean(), 131 | sq_rel.mean(), 132 | rms.mean(), 133 | log_rms.mean(), 134 | d1_all.mean(), 135 | a1.mean(), 136 | a2.mean(), 137 | a3.mean(), 138 | ) 139 | ) 140 | -------------------------------------------------------------------------------- /evaluators/flow.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import cv2 3 | import os 4 | import numpy as np 5 | import argparse 6 | import sys 7 | 8 | sys.path.insert(0, os.path.abspath("..")) 9 | import helpers.flow_tool.flowlib as fl 10 | 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("--datapath", type=str, help="Path to kitti stereo dataset") 13 | parser.add_argument("--prediction_folder", type=str, help="Path to the flow prediction") 14 | args = parser.parse_args() 15 | 16 | 17 | def main(): 18 | img_num = 200 19 | noc_epe = np.zeros(img_num, dtype=np.float) 20 | noc_acc = np.zeros(img_num, dtype=np.float) 21 | occ_epe = np.zeros(img_num, dtype=np.float) 22 | occ_acc = np.zeros(img_num, dtype=np.float) 23 | 24 | eval_log = os.path.join(args.prediction_folder, "flow_result.txt") 25 | with open(eval_log, "w") as el: 26 | for idx in range(img_num): 27 | # read groundtruth flow 28 | gt_noc_fn = args.datapath + "training/flow_noc/%.6d_10.png" % idx 29 | gt_occ_fn = args.datapath + "training/flow_occ/%.6d_10.png" % idx 30 | gt_noc_flow = fl.read_flow(gt_noc_fn) 31 | gt_occ_flow = fl.read_flow(gt_occ_fn) 32 | 33 | # read predicted flow (in png format) 34 | pred_flow_fn = args.prediction_folder + "%.6d_10.png" % idx 35 | pred_flow = fl.read_flow(pred_flow_fn) 36 | 37 | # resize pred_flow to the same size as gt_flow 38 | dst_h = gt_noc_flow.shape[0] 39 | dst_w = gt_noc_flow.shape[1] 40 | 41 | # evaluation 42 | (single_noc_epe, single_noc_acc) = fl.evaluate_kitti_flow( 43 | gt_noc_flow, pred_flow, None 44 | ) 45 | (single_occ_epe, single_occ_acc) = fl.evaluate_kitti_flow( 46 | gt_occ_flow, pred_flow, None 47 | ) 48 | noc_epe[idx] = single_noc_epe 49 | noc_acc[idx] = single_noc_acc 50 | occ_epe[idx] = single_occ_epe 51 | occ_acc[idx] = single_occ_acc 52 | output_line = ( 53 | "Flow %.6d Noc EPE = %.4f" 54 | + " Noc ACC = %.4f" 55 | + " Occ EPE = %.4f" 56 | + " Occ ACC = %.4f\n" 57 | ) 58 | el.write( 59 | output_line 60 | % (idx, noc_epe[idx], noc_acc[idx], occ_epe[idx], occ_acc[idx]) 61 | ) 62 | 63 | noc_mean_epe = np.mean(noc_epe) 64 | noc_mean_acc = (1 - np.mean(noc_acc)) * 100.0 65 | occ_mean_epe = np.mean(occ_epe) 66 | occ_mean_acc = (1 - np.mean(occ_acc)) * 100.0 67 | 68 | print("Mean Noc EPE = %.2f " % noc_mean_epe) 69 | print("F1 Noc = %.2f " % noc_mean_acc) 70 | print("Mean Occ EPE = %.2f " % occ_mean_epe) 71 | print("F1 Occ = %.2f " % occ_mean_acc) 72 | 73 | 74 | main() 75 | -------------------------------------------------------------------------------- /evaluators/mask.py: -------------------------------------------------------------------------------- 1 | """ 2 | Adopted from https://github.com/martinkersner/py_img_seg_eval 3 | """ 4 | import os 5 | import numpy as np 6 | import scipy.misc as sm 7 | import cv2 8 | import matplotlib.pyplot as plt 9 | import multiprocessing 10 | import argparse 11 | from PIL import Image 12 | 13 | from tensorflow.python.platform import flags 14 | 15 | parser = argparse.ArgumentParser(description="Argument parser") 16 | parser.add_argument("--datapath", type=str, help="path to KITTI folder") 17 | parser.add_argument("--prediction_folder", type=str, help="path to predicted masks") 18 | parser.add_argument("--rescale", action="store_true", help="upsample motion mask") 19 | 20 | args = parser.parse_args() 21 | 22 | 23 | class EvalSegErr(Exception): 24 | def __init__(self, value): 25 | self.value = value 26 | 27 | def __str__(self): 28 | return repr(self.value) 29 | 30 | 31 | def pixel_accuracy(eval_segm, gt_segm): 32 | """ 33 | sum_i(n_ii) / sum_i(t_i) 34 | """ 35 | 36 | check_size(eval_segm, gt_segm) 37 | 38 | cl, n_cl = extract_classes(gt_segm) 39 | eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl) 40 | 41 | sum_n_ii = 0 42 | sum_t_i = 0 43 | 44 | for i, c in enumerate(cl): 45 | curr_eval_mask = eval_mask[i, :, :] 46 | curr_gt_mask = gt_mask[i, :, :] 47 | 48 | sum_n_ii += np.sum(np.logical_and(curr_eval_mask, curr_gt_mask)) 49 | sum_t_i += np.sum(curr_gt_mask) 50 | 51 | if sum_t_i == 0: 52 | pixel_accuracy_ = 0 53 | else: 54 | pixel_accuracy_ = sum_n_ii / sum_t_i 55 | 56 | return pixel_accuracy_ 57 | 58 | 59 | def mean_accuracy(eval_segm, gt_segm): 60 | """ 61 | (1/n_cl) sum_i(n_ii/t_i) 62 | """ 63 | 64 | check_size(eval_segm, gt_segm) 65 | 66 | cl, n_cl = extract_classes(gt_segm) 67 | eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl) 68 | 69 | accuracy = list([0]) * n_cl 70 | 71 | for i, c in enumerate(cl): 72 | curr_eval_mask = eval_mask[i, :, :] 73 | curr_gt_mask = gt_mask[i, :, :] 74 | 75 | n_ii = np.sum(np.logical_and(curr_eval_mask, curr_gt_mask)) 76 | t_i = np.sum(curr_gt_mask) 77 | 78 | if t_i != 0: 79 | accuracy[i] = n_ii / t_i 80 | 81 | mean_accuracy_ = np.mean(accuracy) 82 | return mean_accuracy_ 83 | 84 | 85 | def mean_IU(eval_segm, gt_segm): 86 | """ 87 | (1/n_cl) * sum_i(n_ii / (t_i + sum_j(n_ji) - n_ii)) 88 | """ 89 | 90 | check_size(eval_segm, gt_segm) 91 | 92 | cl, n_cl = union_classes(eval_segm, gt_segm) 93 | _, n_cl_gt = extract_classes(gt_segm) 94 | eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl) 95 | 96 | IU = list([0]) * n_cl 97 | 98 | for i, c in enumerate(cl): 99 | curr_eval_mask = eval_mask[i, :, :] 100 | curr_gt_mask = gt_mask[i, :, :] 101 | 102 | if (np.sum(curr_eval_mask) == 0) or (np.sum(curr_gt_mask) == 0): 103 | continue 104 | 105 | n_ii = np.sum(np.logical_and(curr_eval_mask, curr_gt_mask)) 106 | t_i = np.sum(curr_gt_mask) 107 | n_ij = np.sum(curr_eval_mask) 108 | 109 | IU[i] = n_ii / (t_i + n_ij - n_ii) 110 | 111 | mean_IU_ = np.sum(IU) / n_cl_gt 112 | return mean_IU_, np.array(IU) 113 | 114 | 115 | def frequency_weighted_IU(eval_segm, gt_segm): 116 | """ 117 | sum_k(t_k)^(-1) * sum_i((t_i*n_ii)/(t_i + sum_j(n_ji) - n_ii)) 118 | """ 119 | 120 | check_size(eval_segm, gt_segm) 121 | 122 | cl, n_cl = union_classes(eval_segm, gt_segm) 123 | eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl) 124 | 125 | frequency_weighted_IU_ = list([0]) * n_cl 126 | 127 | for i, c in enumerate(cl): 128 | curr_eval_mask = eval_mask[i, :, :] 129 | curr_gt_mask = gt_mask[i, :, :] 130 | 131 | if (np.sum(curr_eval_mask) == 0) or (np.sum(curr_gt_mask) == 0): 132 | continue 133 | 134 | n_ii = np.sum(np.logical_and(curr_eval_mask, curr_gt_mask)) 135 | t_i = np.sum(curr_gt_mask) 136 | n_ij = np.sum(curr_eval_mask) 137 | 138 | frequency_weighted_IU_[i] = (t_i * n_ii) / (t_i + n_ij - n_ii) 139 | 140 | sum_k_t_k = get_pixel_area(eval_segm) 141 | 142 | frequency_weighted_IU_ = np.sum(frequency_weighted_IU_) / sum_k_t_k 143 | return frequency_weighted_IU_ 144 | 145 | 146 | """ 147 | Auxiliary functions used during evaluation. 148 | """ 149 | 150 | 151 | def get_pixel_area(segm): 152 | return segm.shape[0] * segm.shape[1] 153 | 154 | 155 | def extract_both_masks(eval_segm, gt_segm, cl, n_cl): 156 | eval_mask = extract_masks(eval_segm, cl, n_cl) 157 | gt_mask = extract_masks(gt_segm, cl, n_cl) 158 | 159 | return eval_mask, gt_mask 160 | 161 | 162 | def extract_classes(segm): 163 | cl = np.unique(segm) 164 | n_cl = len(cl) 165 | 166 | return cl, n_cl 167 | 168 | 169 | def union_classes(eval_segm, gt_segm): 170 | eval_cl, _ = extract_classes(eval_segm) 171 | gt_cl, _ = extract_classes(gt_segm) 172 | 173 | cl = np.union1d(eval_cl, gt_cl) 174 | n_cl = len(cl) 175 | 176 | return cl, n_cl 177 | 178 | 179 | def extract_masks(segm, cl, n_cl): 180 | h, w = segm_size(segm) 181 | masks = np.zeros((n_cl, h, w)) 182 | 183 | for i, c in enumerate(cl): 184 | masks[i, :, :] = segm == c 185 | 186 | return masks 187 | 188 | 189 | def segm_size(segm): 190 | try: 191 | height = segm.shape[0] 192 | width = segm.shape[1] 193 | except IndexError: 194 | raise 195 | 196 | return height, width 197 | 198 | 199 | def check_size(eval_segm, gt_segm): 200 | h_e, w_e = segm_size(eval_segm) 201 | h_g, w_g = segm_size(gt_segm) 202 | 203 | if (h_e != h_g) or (w_e != w_g): 204 | raise EvalSegErr("DiffDim: Different dimensions of matrices!") 205 | 206 | 207 | def read_mask_gt_worker(i): 208 | path = os.path.join(args.datapath, "training/obj_map", str(i).zfill(6) + "_10.png") 209 | return sm.imread(path, -1) 210 | 211 | 212 | def load_gt_mask(): 213 | results = [read_mask_gt_worker(i) for i in range(200)] 214 | gt_masks = [] 215 | for m in results: 216 | m[m > 0.0] = 1.0 217 | gt_masks.append(m) 218 | 219 | return gt_masks 220 | 221 | 222 | def eval_mask(pred_masks, gt_masks): 223 | grey_cmap = plt.get_cmap("Greys") 224 | 225 | pa_res, ma_res, mIU_res, fwIU_res = 0.0, 0.0, 0.0, 0.0 226 | IU_res = np.array([0.0, 0.0]) 227 | 228 | for i in range(200): 229 | gt_mask = gt_masks[i] 230 | pred_mask = pred_masks[i] 231 | 232 | if args.rescale: 233 | H, W = gt_mask.shape[0:2] 234 | pred_mask = cv2.resize(pred_mask, (W, H), interpolation=cv2.INTER_NEAREST) 235 | 236 | th = 0.5 237 | 238 | pred_mask[pred_mask > th] = 1.0 239 | pred_mask[pred_mask <= th] = 0.0 240 | # pred_mask = 1.0 - pred_mask 241 | 242 | pa_res += pixel_accuracy(pred_mask, gt_mask) 243 | ma_res += mean_accuracy(pred_mask, gt_mask) 244 | 245 | mIU, IU = mean_IU(pred_mask, gt_mask) 246 | mIU_res += mIU 247 | IU_res += IU 248 | 249 | fwIU_res += frequency_weighted_IU(pred_mask, gt_mask) 250 | 251 | return ( 252 | pa_res / 200.0, 253 | ma_res / 200.0, 254 | mIU_res / 200.0, 255 | fwIU_res / 200.0, 256 | IU_res / 200.0, 257 | ) 258 | 259 | 260 | def read_mask_pred_worker(i): 261 | img = ( 262 | cv2.imread(args.prediction_folder + "/" + str(i).zfill(6) + "_10.png", -1) / 255.0 263 | ) 264 | return img 265 | 266 | 267 | def load_pred_mask(): 268 | results = [read_mask_pred_worker(i) for i in range(200)] 269 | 270 | pred_masks = [] 271 | for m in results: 272 | pred_masks.append(m) 273 | 274 | return pred_masks 275 | 276 | 277 | def evaluate(): 278 | gt_masks = load_gt_mask() 279 | predicted_masks = load_pred_mask() 280 | 281 | pa, ma, miu, fwiu, iu = eval_mask(predicted_masks, gt_masks) 282 | print( 283 | "PA:{:3.2f} MA:{:3.2f} mIU:{:3.2f} fwIU:{:3.2f} IU:[{:3.2f}, {:3.2f}]".format( 284 | pa, ma, miu, fwiu, iu[0], iu[1] 285 | ) 286 | ) 287 | 288 | 289 | if __name__ == "__main__": 290 | evaluate() 291 | -------------------------------------------------------------------------------- /evaluators/semantic.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | from __future__ import print_function 18 | 19 | import tensorflow as tf 20 | import cv2 21 | import numpy as np 22 | import argparse 23 | import os 24 | 25 | id2trainId = { 26 | 0 : 255, 27 | 1 : 255, 28 | 2 : 255, 29 | 3 : 255, 30 | 4 : 255, 31 | 5 : 255, 32 | 6 : 255, 33 | 7 : 0, 34 | 8 : 1, 35 | 9 : 255, 36 | 10 : 255, 37 | 11 : 2, 38 | 12 : 3, 39 | 13 : 4, 40 | 14 : 255, 41 | 15 : 255, 42 | 16 : 255, 43 | 17 : 5, 44 | 18 : 255, 45 | 19 : 6, 46 | 20 : 7, 47 | 21 : 8, 48 | 22 : 9, 49 | 23 : 10, 50 | 24 : 11, 51 | 25 : 12, 52 | 26 : 13, 53 | 27 : 14, 54 | 28 : 15, 55 | 29 : 255, 56 | 30 : 255, 57 | 31 : 16, 58 | 32 : 17, 59 | 33 : 18 60 | } 61 | 62 | 63 | trainId2cat = { 64 | 0 : 0, 65 | 1 : 0, 66 | 2 : 1, 67 | 3 : 1, 68 | 4 : 1, 69 | 5 : 2, 70 | 6 : 2, 71 | 7 : 2, 72 | 8 : 3, 73 | 9 : 3, 74 | 10 : 4, 75 | 11 : 5, 76 | 12 : 5, 77 | 13 : 6, 78 | 14 : 6, 79 | 15 : 6, 80 | 16 : 6, 81 | 17 : 6, 82 | 18 : 6, 83 | } 84 | 85 | trainId2name = { 86 | 0 : "road", 87 | 1 : "sidewalk", 88 | 2 : "building", 89 | 3 : "wall", 90 | 4 : "fence", 91 | 5 : "pole", 92 | 6 : "traffic_light", 93 | 7 : "traffic_sign", 94 | 8 : "vegetation", 95 | 9 : "terrain", 96 | 10 : "sky", 97 | 11 : "person", 98 | 12 : "rider", 99 | 13 : "car", 100 | 14 : "truck", 101 | 15 : "bus", 102 | 16 : "train", 103 | 17 : "motorcycle", 104 | 18 : "bicycle" 105 | } 106 | 107 | num_train_classes = 19 108 | num_categories = 7 109 | num_total_classes = 34 110 | 111 | parser = argparse.ArgumentParser(description="Evaluation Semantic") 112 | ### PATHS 113 | parser.add_argument( 114 | "--dataset", 115 | dest="dataset", 116 | choices=["kitti", "cityscapes"], 117 | default="kitti", 118 | help="kitti, cityscapes", 119 | ) 120 | parser.add_argument( 121 | "--datapath", 122 | type=str, 123 | help="Path to dataset (e.g. data_semantics Kitti 2015)" 124 | ) 125 | parser.add_argument( 126 | "--prediction_folder", 127 | type=str, 128 | help="Path to predictions" 129 | ) 130 | parser.add_argument( 131 | "--filename_file", 132 | default="../filenames/kitti_2015_test_semantic.txt", 133 | help="Path to txt input list" 134 | ) 135 | ### PARAMS 136 | parser.add_argument( 137 | "--ignore_label", 138 | type=int, 139 | default=255, 140 | help="label to ignore in evaluation", 141 | ) 142 | parser.add_argument( 143 | "--format_pred", 144 | type=str, 145 | choices=["id", "trainId"], 146 | default="trainId", 147 | help="encoding of predictions, trainId or id", 148 | ) 149 | parser.add_argument( 150 | "--format_gt", 151 | type=str, 152 | choices=["id", "trainId"], 153 | default="id", 154 | help="encoding of gt, trainId or id", 155 | ) 156 | args = parser.parse_args() 157 | 158 | 159 | def convert_labels(sem, mapping): 160 | p = tf.cast(sem, tf.uint8) 161 | m = tf.ones_like(p) * 255 162 | for i in range(0, len(mapping)): 163 | mi = tf.multiply(tf.ones_like(p), mapping[i]) 164 | m = tf.where(tf.equal(p, i), mi, m) 165 | return m 166 | 167 | 168 | prediction_placeholder = tf.placeholder(tf.int32) 169 | prediction_placeholder.set_shape([None, None, 1]) 170 | gt_placeholder = tf.placeholder(tf.int32) 171 | 172 | gt = gt_placeholder 173 | prediction = prediction_placeholder 174 | 175 | if args.format_pred == "id": 176 | prediction = convert_labels(prediction, id2trainId) 177 | if args.format_gt == "id": 178 | gt = convert_labels(gt, id2trainId) 179 | 180 | 181 | pred_cat = convert_labels(prediction, trainId2cat) 182 | gt_cat = convert_labels(gt, trainId2cat) 183 | 184 | ### INIT WEIGHTS MIOU 185 | weightsValue = tf.to_float(tf.not_equal(gt, args.ignore_label)) 186 | ### IGNORE LABELS TO 0, WE HAVE ALREADY MASKED THOSE PIXELS WITH WEIGHTS 0### 187 | gt = tf.where(tf.equal(gt, args.ignore_label), tf.zeros_like(gt), gt) 188 | prediction = tf.where( 189 | tf.equal(prediction, args.ignore_label), tf.zeros_like(prediction), prediction 190 | ) 191 | ### ACCURACY ### 192 | acc, update_op_acc = tf.metrics.accuracy(gt, prediction, weights=weightsValue) 193 | ### MIOU ### 194 | miou, update_op = tf.metrics.mean_iou( 195 | labels=tf.reshape(gt, [-1]), 196 | predictions=tf.reshape(prediction, [-1]), 197 | num_classes=num_train_classes, 198 | weights=tf.reshape(weightsValue, [-1]), 199 | ) 200 | 201 | # CATEGORIES 202 | ### INIT WEIGHTS MIOU 203 | weightsValue_cat = tf.to_float(tf.not_equal(gt_cat, args.ignore_label)) 204 | ### IGNORE LABELS TO 0, WE HAVE ALREADY MASKED THOSE PIXELS WITH WEIGHTS 0### 205 | gt_cat = tf.where(tf.equal(gt_cat, args.ignore_label), tf.zeros_like(gt_cat), gt_cat) 206 | pred_cat = tf.where( 207 | tf.equal(pred_cat, args.ignore_label), tf.zeros_like(pred_cat), pred_cat 208 | ) 209 | ### MIOU ### 210 | miou_cat, update_op_cat = tf.metrics.mean_iou( 211 | labels=tf.reshape(gt_cat, [-1]), 212 | predictions=tf.reshape(pred_cat, [-1]), 213 | num_classes=num_categories, 214 | weights=tf.reshape(weightsValue_cat, [-1]), 215 | name="mean_iou_cat" 216 | ) 217 | 218 | init_op = [tf.global_variables_initializer(), tf.local_variables_initializer()] 219 | 220 | miou_value = 0 221 | with tf.Session() as sess: 222 | sess.run(init_op) 223 | lines = open(args.filename_file).readlines() 224 | lenght = len(lines) 225 | 226 | for idx, line in enumerate(lines): 227 | base_path = line.strip() 228 | prediction_folder = os.path.join(args.prediction_folder, base_path) 229 | datapath = os.path.join(args.datapath, "training/semantic", base_path) 230 | print("GT: ", datapath, " Pred: ", prediction_folder, idx, "/", lenght, end="\r") 231 | 232 | gt_value = cv2.imread(datapath, cv2.IMREAD_GRAYSCALE) 233 | pred_value = cv2.imread(prediction_folder, cv2.IMREAD_GRAYSCALE) 234 | 235 | image_w = gt_value.shape[1] 236 | image_h = gt_value.shape[0] 237 | 238 | if args.dataset == "cityscapes": 239 | crop_height = (image_h * 4) // 5 240 | gt_value = gt_value[:crop_height, :] 241 | gt_value = cv2.resize( 242 | gt_value, (image_w, image_h), interpolation=cv2.INTER_NEAREST 243 | ) 244 | 245 | _, _, _ = sess.run( 246 | [update_op_acc, update_op, update_op_cat], 247 | feed_dict={ 248 | prediction_placeholder: np.expand_dims(pred_value, axis=-1), 249 | gt_placeholder: np.expand_dims(gt_value, axis=-1), 250 | }, 251 | ) 252 | acc_value, miou_value, miou_cat_value = sess.run( 253 | [acc, miou, miou_cat], 254 | feed_dict={ 255 | prediction_placeholder: np.expand_dims(pred_value, axis=-1), 256 | gt_placeholder: np.expand_dims(gt_value, axis=-1), 257 | }, 258 | ) 259 | 260 | confusion_matrix = ( 261 | tf.get_default_graph() 262 | .get_tensor_by_name("mean_iou/total_confusion_matrix:0") 263 | .eval() 264 | ) 265 | print("") 266 | for cl in range(confusion_matrix.shape[0]): 267 | tp_fn = np.sum(confusion_matrix[cl, :]) 268 | tp_fp = np.sum(confusion_matrix[:, cl]) 269 | tp = confusion_matrix[cl, cl] 270 | if tp == 0 and (tp_fn + tp_fp - tp) == 0: 271 | IoU_cl = float("nan") 272 | else: 273 | IoU_cl = tp / (tp_fn + tp_fp - tp) 274 | print(trainId2name[cl] + ": {:.8f}".format(IoU_cl)) 275 | print("mIoU: " + str(miou_value)) 276 | print("mIoU Categories: " + str(miou_cat_value)) 277 | print("Pix. Acc.: " + str(acc_value)) 278 | -------------------------------------------------------------------------------- /filenames/kitti_2015_test.txt: -------------------------------------------------------------------------------- 1 | 000000_09.png 000000_10.png 000000_11.png 2 | 000001_09.png 000001_10.png 000001_11.png 3 | 000002_09.png 000002_10.png 000002_11.png 4 | 000003_09.png 000003_10.png 000003_11.png 5 | 000004_09.png 000004_10.png 000004_11.png 6 | 000005_09.png 000005_10.png 000005_11.png 7 | 000006_09.png 000006_10.png 000006_11.png 8 | 000007_09.png 000007_10.png 000007_11.png 9 | 000008_09.png 000008_10.png 000008_11.png 10 | 000009_09.png 000009_10.png 000009_11.png 11 | 000010_09.png 000010_10.png 000010_11.png 12 | 000011_09.png 000011_10.png 000011_11.png 13 | 000012_09.png 000012_10.png 000012_11.png 14 | 000013_09.png 000013_10.png 000013_11.png 15 | 000014_09.png 000014_10.png 000014_11.png 16 | 000015_09.png 000015_10.png 000015_11.png 17 | 000016_09.png 000016_10.png 000016_11.png 18 | 000017_09.png 000017_10.png 000017_11.png 19 | 000018_09.png 000018_10.png 000018_11.png 20 | 000019_09.png 000019_10.png 000019_11.png 21 | 000020_09.png 000020_10.png 000020_11.png 22 | 000021_09.png 000021_10.png 000021_11.png 23 | 000022_09.png 000022_10.png 000022_11.png 24 | 000023_09.png 000023_10.png 000023_11.png 25 | 000024_09.png 000024_10.png 000024_11.png 26 | 000025_09.png 000025_10.png 000025_11.png 27 | 000026_09.png 000026_10.png 000026_11.png 28 | 000027_09.png 000027_10.png 000027_11.png 29 | 000028_09.png 000028_10.png 000028_11.png 30 | 000029_09.png 000029_10.png 000029_11.png 31 | 000030_09.png 000030_10.png 000030_11.png 32 | 000031_09.png 000031_10.png 000031_11.png 33 | 000032_09.png 000032_10.png 000032_11.png 34 | 000033_09.png 000033_10.png 000033_11.png 35 | 000034_09.png 000034_10.png 000034_11.png 36 | 000035_09.png 000035_10.png 000035_11.png 37 | 000036_09.png 000036_10.png 000036_11.png 38 | 000037_09.png 000037_10.png 000037_11.png 39 | 000038_09.png 000038_10.png 000038_11.png 40 | 000039_09.png 000039_10.png 000039_11.png 41 | 000040_09.png 000040_10.png 000040_11.png 42 | 000041_09.png 000041_10.png 000041_11.png 43 | 000042_09.png 000042_10.png 000042_11.png 44 | 000043_09.png 000043_10.png 000043_11.png 45 | 000044_09.png 000044_10.png 000044_11.png 46 | 000045_09.png 000045_10.png 000045_11.png 47 | 000046_09.png 000046_10.png 000046_11.png 48 | 000047_09.png 000047_10.png 000047_11.png 49 | 000048_09.png 000048_10.png 000048_11.png 50 | 000049_09.png 000049_10.png 000049_11.png 51 | 000050_09.png 000050_10.png 000050_11.png 52 | 000051_09.png 000051_10.png 000051_11.png 53 | 000052_09.png 000052_10.png 000052_11.png 54 | 000053_09.png 000053_10.png 000053_11.png 55 | 000054_09.png 000054_10.png 000054_11.png 56 | 000055_09.png 000055_10.png 000055_11.png 57 | 000056_09.png 000056_10.png 000056_11.png 58 | 000057_09.png 000057_10.png 000057_11.png 59 | 000058_09.png 000058_10.png 000058_11.png 60 | 000059_09.png 000059_10.png 000059_11.png 61 | 000060_09.png 000060_10.png 000060_11.png 62 | 000061_09.png 000061_10.png 000061_11.png 63 | 000062_09.png 000062_10.png 000062_11.png 64 | 000063_09.png 000063_10.png 000063_11.png 65 | 000064_09.png 000064_10.png 000064_11.png 66 | 000065_09.png 000065_10.png 000065_11.png 67 | 000066_09.png 000066_10.png 000066_11.png 68 | 000067_09.png 000067_10.png 000067_11.png 69 | 000068_09.png 000068_10.png 000068_11.png 70 | 000069_09.png 000069_10.png 000069_11.png 71 | 000070_09.png 000070_10.png 000070_11.png 72 | 000071_09.png 000071_10.png 000071_11.png 73 | 000072_09.png 000072_10.png 000072_11.png 74 | 000073_09.png 000073_10.png 000073_11.png 75 | 000074_09.png 000074_10.png 000074_11.png 76 | 000075_09.png 000075_10.png 000075_11.png 77 | 000076_09.png 000076_10.png 000076_11.png 78 | 000077_09.png 000077_10.png 000077_11.png 79 | 000078_09.png 000078_10.png 000078_11.png 80 | 000079_09.png 000079_10.png 000079_11.png 81 | 000080_09.png 000080_10.png 000080_11.png 82 | 000081_09.png 000081_10.png 000081_11.png 83 | 000082_09.png 000082_10.png 000082_11.png 84 | 000083_09.png 000083_10.png 000083_11.png 85 | 000084_09.png 000084_10.png 000084_11.png 86 | 000085_09.png 000085_10.png 000085_11.png 87 | 000086_09.png 000086_10.png 000086_11.png 88 | 000087_09.png 000087_10.png 000087_11.png 89 | 000088_09.png 000088_10.png 000088_11.png 90 | 000089_09.png 000089_10.png 000089_11.png 91 | 000090_09.png 000090_10.png 000090_11.png 92 | 000091_09.png 000091_10.png 000091_11.png 93 | 000092_09.png 000092_10.png 000092_11.png 94 | 000093_09.png 000093_10.png 000093_11.png 95 | 000094_09.png 000094_10.png 000094_11.png 96 | 000095_09.png 000095_10.png 000095_11.png 97 | 000096_09.png 000096_10.png 000096_11.png 98 | 000097_09.png 000097_10.png 000097_11.png 99 | 000098_09.png 000098_10.png 000098_11.png 100 | 000099_09.png 000099_10.png 000099_11.png 101 | 000100_09.png 000100_10.png 000100_11.png 102 | 000101_09.png 000101_10.png 000101_11.png 103 | 000102_09.png 000102_10.png 000102_11.png 104 | 000103_09.png 000103_10.png 000103_11.png 105 | 000104_09.png 000104_10.png 000104_11.png 106 | 000105_09.png 000105_10.png 000105_11.png 107 | 000106_09.png 000106_10.png 000106_11.png 108 | 000107_09.png 000107_10.png 000107_11.png 109 | 000108_09.png 000108_10.png 000108_11.png 110 | 000109_09.png 000109_10.png 000109_11.png 111 | 000110_09.png 000110_10.png 000110_11.png 112 | 000111_09.png 000111_10.png 000111_11.png 113 | 000112_09.png 000112_10.png 000112_11.png 114 | 000113_09.png 000113_10.png 000113_11.png 115 | 000114_09.png 000114_10.png 000114_11.png 116 | 000115_09.png 000115_10.png 000115_11.png 117 | 000116_09.png 000116_10.png 000116_11.png 118 | 000117_09.png 000117_10.png 000117_11.png 119 | 000118_09.png 000118_10.png 000118_11.png 120 | 000119_09.png 000119_10.png 000119_11.png 121 | 000120_09.png 000120_10.png 000120_11.png 122 | 000121_09.png 000121_10.png 000121_11.png 123 | 000122_09.png 000122_10.png 000122_11.png 124 | 000123_09.png 000123_10.png 000123_11.png 125 | 000124_09.png 000124_10.png 000124_11.png 126 | 000125_09.png 000125_10.png 000125_11.png 127 | 000126_09.png 000126_10.png 000126_11.png 128 | 000127_09.png 000127_10.png 000127_11.png 129 | 000128_09.png 000128_10.png 000128_11.png 130 | 000129_09.png 000129_10.png 000129_11.png 131 | 000130_09.png 000130_10.png 000130_11.png 132 | 000131_09.png 000131_10.png 000131_11.png 133 | 000132_09.png 000132_10.png 000132_11.png 134 | 000133_09.png 000133_10.png 000133_11.png 135 | 000134_09.png 000134_10.png 000134_11.png 136 | 000135_09.png 000135_10.png 000135_11.png 137 | 000136_09.png 000136_10.png 000136_11.png 138 | 000137_09.png 000137_10.png 000137_11.png 139 | 000138_09.png 000138_10.png 000138_11.png 140 | 000139_09.png 000139_10.png 000139_11.png 141 | 000140_09.png 000140_10.png 000140_11.png 142 | 000141_09.png 000141_10.png 000141_11.png 143 | 000142_09.png 000142_10.png 000142_11.png 144 | 000143_09.png 000143_10.png 000143_11.png 145 | 000144_09.png 000144_10.png 000144_11.png 146 | 000145_09.png 000145_10.png 000145_11.png 147 | 000146_09.png 000146_10.png 000146_11.png 148 | 000147_09.png 000147_10.png 000147_11.png 149 | 000148_09.png 000148_10.png 000148_11.png 150 | 000149_09.png 000149_10.png 000149_11.png 151 | 000150_09.png 000150_10.png 000150_11.png 152 | 000151_09.png 000151_10.png 000151_11.png 153 | 000152_09.png 000152_10.png 000152_11.png 154 | 000153_09.png 000153_10.png 000153_11.png 155 | 000154_09.png 000154_10.png 000154_11.png 156 | 000155_09.png 000155_10.png 000155_11.png 157 | 000156_09.png 000156_10.png 000156_11.png 158 | 000157_09.png 000157_10.png 000157_11.png 159 | 000158_09.png 000158_10.png 000158_11.png 160 | 000159_09.png 000159_10.png 000159_11.png 161 | 000160_09.png 000160_10.png 000160_11.png 162 | 000161_09.png 000161_10.png 000161_11.png 163 | 000162_09.png 000162_10.png 000162_11.png 164 | 000163_09.png 000163_10.png 000163_11.png 165 | 000164_09.png 000164_10.png 000164_11.png 166 | 000165_09.png 000165_10.png 000165_11.png 167 | 000166_09.png 000166_10.png 000166_11.png 168 | 000167_09.png 000167_10.png 000167_11.png 169 | 000168_09.png 000168_10.png 000168_11.png 170 | 000169_09.png 000169_10.png 000169_11.png 171 | 000170_09.png 000170_10.png 000170_11.png 172 | 000171_09.png 000171_10.png 000171_11.png 173 | 000172_09.png 000172_10.png 000172_11.png 174 | 000173_09.png 000173_10.png 000173_11.png 175 | 000174_09.png 000174_10.png 000174_11.png 176 | 000175_09.png 000175_10.png 000175_11.png 177 | 000176_09.png 000176_10.png 000176_11.png 178 | 000177_09.png 000177_10.png 000177_11.png 179 | 000178_09.png 000178_10.png 000178_11.png 180 | 000179_09.png 000179_10.png 000179_11.png 181 | 000180_09.png 000180_10.png 000180_11.png 182 | 000181_09.png 000181_10.png 000181_11.png 183 | 000182_09.png 000182_10.png 000182_11.png 184 | 000183_09.png 000183_10.png 000183_11.png 185 | 000184_09.png 000184_10.png 000184_11.png 186 | 000185_09.png 000185_10.png 000185_11.png 187 | 000186_09.png 000186_10.png 000186_11.png 188 | 000187_09.png 000187_10.png 000187_11.png 189 | 000188_09.png 000188_10.png 000188_11.png 190 | 000189_09.png 000189_10.png 000189_11.png 191 | 000190_09.png 000190_10.png 000190_11.png 192 | 000191_09.png 000191_10.png 000191_11.png 193 | 000192_09.png 000192_10.png 000192_11.png 194 | 000193_09.png 000193_10.png 000193_11.png 195 | 000194_09.png 000194_10.png 000194_11.png 196 | 000195_09.png 000195_10.png 000195_11.png 197 | 000196_09.png 000196_10.png 000196_11.png 198 | 000197_09.png 000197_10.png 000197_11.png 199 | 000198_09.png 000198_10.png 000198_11.png 200 | 000199_09.png 000199_10.png 000199_11.png 201 | -------------------------------------------------------------------------------- /filenames/kitti_2015_test_semantic.txt: -------------------------------------------------------------------------------- 1 | 000000_10.png 2 | 000001_10.png 3 | 000002_10.png 4 | 000003_10.png 5 | 000004_10.png 6 | 000005_10.png 7 | 000006_10.png 8 | 000007_10.png 9 | 000008_10.png 10 | 000009_10.png 11 | 000010_10.png 12 | 000011_10.png 13 | 000012_10.png 14 | 000013_10.png 15 | 000014_10.png 16 | 000015_10.png 17 | 000016_10.png 18 | 000017_10.png 19 | 000018_10.png 20 | 000019_10.png 21 | 000020_10.png 22 | 000021_10.png 23 | 000022_10.png 24 | 000023_10.png 25 | 000024_10.png 26 | 000025_10.png 27 | 000026_10.png 28 | 000027_10.png 29 | 000028_10.png 30 | 000029_10.png 31 | 000030_10.png 32 | 000031_10.png 33 | 000032_10.png 34 | 000033_10.png 35 | 000034_10.png 36 | 000035_10.png 37 | 000036_10.png 38 | 000037_10.png 39 | 000038_10.png 40 | 000039_10.png 41 | 000040_10.png 42 | 000041_10.png 43 | 000042_10.png 44 | 000043_10.png 45 | 000044_10.png 46 | 000045_10.png 47 | 000046_10.png 48 | 000047_10.png 49 | 000048_10.png 50 | 000049_10.png 51 | 000050_10.png 52 | 000051_10.png 53 | 000052_10.png 54 | 000053_10.png 55 | 000054_10.png 56 | 000055_10.png 57 | 000056_10.png 58 | 000057_10.png 59 | 000058_10.png 60 | 000059_10.png 61 | 000060_10.png 62 | 000061_10.png 63 | 000062_10.png 64 | 000063_10.png 65 | 000064_10.png 66 | 000065_10.png 67 | 000066_10.png 68 | 000067_10.png 69 | 000068_10.png 70 | 000069_10.png 71 | 000070_10.png 72 | 000071_10.png 73 | 000072_10.png 74 | 000073_10.png 75 | 000074_10.png 76 | 000075_10.png 77 | 000076_10.png 78 | 000077_10.png 79 | 000078_10.png 80 | 000079_10.png 81 | 000080_10.png 82 | 000081_10.png 83 | 000082_10.png 84 | 000083_10.png 85 | 000084_10.png 86 | 000085_10.png 87 | 000086_10.png 88 | 000087_10.png 89 | 000088_10.png 90 | 000089_10.png 91 | 000090_10.png 92 | 000091_10.png 93 | 000092_10.png 94 | 000093_10.png 95 | 000094_10.png 96 | 000095_10.png 97 | 000096_10.png 98 | 000097_10.png 99 | 000098_10.png 100 | 000099_10.png 101 | 000100_10.png 102 | 000101_10.png 103 | 000102_10.png 104 | 000103_10.png 105 | 000104_10.png 106 | 000105_10.png 107 | 000106_10.png 108 | 000107_10.png 109 | 000108_10.png 110 | 000109_10.png 111 | 000110_10.png 112 | 000111_10.png 113 | 000112_10.png 114 | 000113_10.png 115 | 000114_10.png 116 | 000115_10.png 117 | 000116_10.png 118 | 000117_10.png 119 | 000118_10.png 120 | 000119_10.png 121 | 000120_10.png 122 | 000121_10.png 123 | 000122_10.png 124 | 000123_10.png 125 | 000124_10.png 126 | 000125_10.png 127 | 000126_10.png 128 | 000127_10.png 129 | 000128_10.png 130 | 000129_10.png 131 | 000130_10.png 132 | 000131_10.png 133 | 000132_10.png 134 | 000133_10.png 135 | 000134_10.png 136 | 000135_10.png 137 | 000136_10.png 138 | 000137_10.png 139 | 000138_10.png 140 | 000139_10.png 141 | 000140_10.png 142 | 000141_10.png 143 | 000142_10.png 144 | 000143_10.png 145 | 000144_10.png 146 | 000145_10.png 147 | 000146_10.png 148 | 000147_10.png 149 | 000148_10.png 150 | 000149_10.png 151 | 000150_10.png 152 | 000151_10.png 153 | 000152_10.png 154 | 000153_10.png 155 | 000154_10.png 156 | 000155_10.png 157 | 000156_10.png 158 | 000157_10.png 159 | 000158_10.png 160 | 000159_10.png 161 | 000160_10.png 162 | 000161_10.png 163 | 000162_10.png 164 | 000163_10.png 165 | 000164_10.png 166 | 000165_10.png 167 | 000166_10.png 168 | 000167_10.png 169 | 000168_10.png 170 | 000169_10.png 171 | 000170_10.png 172 | 000171_10.png 173 | 000172_10.png 174 | 000173_10.png 175 | 000174_10.png 176 | 000175_10.png 177 | 000176_10.png 178 | 000177_10.png 179 | 000178_10.png 180 | 000179_10.png 181 | 000180_10.png 182 | 000181_10.png 183 | 000182_10.png 184 | 000183_10.png 185 | 000184_10.png 186 | 000185_10.png 187 | 000186_10.png 188 | 000187_10.png 189 | 000188_10.png 190 | 000189_10.png 191 | 000190_10.png 192 | 000191_10.png 193 | 000192_10.png 194 | 000193_10.png 195 | 000194_10.png 196 | 000195_10.png 197 | 000196_10.png 198 | 000197_10.png 199 | 000198_10.png 200 | 000199_10.png 201 | -------------------------------------------------------------------------------- /helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/helpers/__init__.py -------------------------------------------------------------------------------- /helpers/bilinear_sampler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions 3 | 4 | Part of this code is based on https://github.com/tinghuiz/SfMLearner/blob/master/utils.py, 5 | published under MIT License. 6 | We would like to thank T. Zhou and other authors for sharing their code 7 | 8 | """ 9 | from __future__ import division 10 | import numpy as np 11 | import tensorflow as tf 12 | 13 | 14 | def euler2mat(z, y, x): 15 | """Converts euler angles to rotation matrix 16 | TODO: remove the dimension for 'N' (deprecated for converting all source 17 | poses altogether) 18 | Reference: https://github.com/pulkitag/pycaffe-utils/blob/master/rot_utils.py#L174 19 | Args: 20 | z: rotation angle along z axis (in radians) -- size = [B, N] 21 | y: rotation angle along y axis (in radians) -- size = [B, N] 22 | x: rotation angle along x axis (in radians) -- size = [B, N] 23 | Returns: 24 | Rotation matrix corresponding to the euler angles -- size = [B, N, 3, 3] 25 | """ 26 | with tf.variable_scope("euler2mat"): 27 | B = tf.shape(z)[0] 28 | N = 1 29 | z = tf.clip_by_value(z, -np.pi, np.pi) 30 | y = tf.clip_by_value(y, -np.pi, np.pi) 31 | x = tf.clip_by_value(x, -np.pi, np.pi) 32 | 33 | # Expand to B x N x 1 x 1 34 | z = tf.expand_dims(tf.expand_dims(z, -1), -1) 35 | y = tf.expand_dims(tf.expand_dims(y, -1), -1) 36 | x = tf.expand_dims(tf.expand_dims(x, -1), -1) 37 | 38 | zeros = tf.zeros([B, N, 1, 1]) 39 | ones = tf.ones([B, N, 1, 1]) 40 | 41 | cosz = tf.cos(z) 42 | sinz = tf.sin(z) 43 | rotz_1 = tf.concat([cosz, -sinz, zeros], axis=3) 44 | rotz_2 = tf.concat([sinz, cosz, zeros], axis=3) 45 | rotz_3 = tf.concat([zeros, zeros, ones], axis=3) 46 | zmat = tf.concat([rotz_1, rotz_2, rotz_3], axis=2) 47 | 48 | cosy = tf.cos(y) 49 | siny = tf.sin(y) 50 | roty_1 = tf.concat([cosy, zeros, siny], axis=3) 51 | roty_2 = tf.concat([zeros, ones, zeros], axis=3) 52 | roty_3 = tf.concat([-siny, zeros, cosy], axis=3) 53 | ymat = tf.concat([roty_1, roty_2, roty_3], axis=2) 54 | 55 | cosx = tf.cos(x) 56 | sinx = tf.sin(x) 57 | rotx_1 = tf.concat([ones, zeros, zeros], axis=3) 58 | rotx_2 = tf.concat([zeros, cosx, -sinx], axis=3) 59 | rotx_3 = tf.concat([zeros, sinx, cosx], axis=3) 60 | xmat = tf.concat([rotx_1, rotx_2, rotx_3], axis=2) 61 | 62 | rotMat = tf.matmul(tf.matmul(xmat, ymat), zmat) 63 | return rotMat 64 | 65 | 66 | def pose_vec2mat(vec): 67 | """Converts 6DoF parameters to transformation matrix 68 | Args: 69 | vec: 6DoF parameters in the order of tx, ty, tz, rx, ry, rz -- [B, 6] 70 | Returns: 71 | A transformation matrix -- [B, 4, 4] 72 | """ 73 | with tf.variable_scope("vec2mat"): 74 | batch_size, _ = vec.get_shape().as_list() 75 | translation = tf.slice(vec, [0, 0], [-1, 3]) 76 | translation = tf.expand_dims(translation, -1) 77 | rx = tf.slice(vec, [0, 3], [-1, 1]) 78 | ry = tf.slice(vec, [0, 4], [-1, 1]) 79 | rz = tf.slice(vec, [0, 5], [-1, 1]) 80 | rot_mat = euler2mat(rz, ry, rx) 81 | rot_mat = tf.squeeze(rot_mat, axis=[1]) 82 | filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4]) 83 | filler = tf.tile(filler, [batch_size, 1, 1]) 84 | transform_mat = tf.concat([rot_mat, translation], axis=2) 85 | transform_mat = tf.concat([transform_mat, filler], axis=1) 86 | return transform_mat 87 | 88 | 89 | def pixel2cam(depth, pixel_coords, intrinsics, is_homogeneous=True): 90 | """Transforms coordinates in the pixel frame to the camera frame. 91 | Args: 92 | depth: [batch, height, width] 93 | pixel_coords: homogeneous pixel coordinates [batch, 3, height, width] 94 | intrinsics: camera intrinsics [batch, 3, 3] 95 | is_homogeneous: return in homogeneous coordinates 96 | Returns: 97 | Coords in the camera frame [batch, 3 (4 if homogeneous), height, width] 98 | """ 99 | with tf.variable_scope("pixel2cam"): 100 | batch, height, width = depth.get_shape().as_list() 101 | depth = tf.reshape(depth, [batch, 1, -1]) 102 | pixel_coords = tf.reshape(pixel_coords, [batch, 3, -1]) 103 | cam_coords = tf.matmul(tf.matrix_inverse(intrinsics), pixel_coords) * depth 104 | if is_homogeneous: 105 | ones = tf.ones([batch, 1, height * width]) 106 | cam_coords = tf.concat([cam_coords, ones], axis=1) 107 | cam_coords = tf.reshape(cam_coords, [batch, -1, height, width]) 108 | return cam_coords 109 | 110 | 111 | def cam2pixel(cam_coords, proj): 112 | """Transforms coordinates in a camera frame to the pixel frame. 113 | Args: 114 | cam_coords: [batch, 4, height, width] 115 | proj: [batch, 4, 4] 116 | Returns: 117 | Pixel coordinates projected from the camera frame [batch, height, width, 2] 118 | """ 119 | with tf.variable_scope("cam2pixel"): 120 | batch, _, height, width = cam_coords.get_shape().as_list() 121 | cam_coords = tf.reshape(cam_coords, [batch, 4, -1]) 122 | unnormalized_pixel_coords = tf.matmul(proj, cam_coords) 123 | x_u = tf.slice(unnormalized_pixel_coords, [0, 0, 0], [-1, 1, -1]) 124 | y_u = tf.slice(unnormalized_pixel_coords, [0, 1, 0], [-1, 1, -1]) 125 | z_u = tf.slice(unnormalized_pixel_coords, [0, 2, 0], [-1, 1, -1]) 126 | x_n = x_u / (z_u + 1e-10) 127 | y_n = y_u / (z_u + 1e-10) 128 | pixel_coords = tf.concat([x_n, y_n], axis=1) 129 | pixel_coords = tf.reshape(pixel_coords, [batch, 2, height, width]) 130 | return tf.transpose(pixel_coords, perm=[0, 2, 3, 1]) 131 | 132 | 133 | def meshgrid(batch, height, width, is_homogeneous=True): 134 | """Construct a 2D meshgrid. 135 | Args: 136 | batch: batch size 137 | height: height of the grid 138 | width: width of the grid 139 | is_homogeneous: whether to return in homogeneous coordinates 140 | Returns: 141 | x,y grid coordinates [batch, 2 (3 if homogeneous), height, width] 142 | """ 143 | x_t = tf.matmul( 144 | tf.ones(shape=tf.stack([height, 1])), 145 | tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0]), 146 | ) 147 | y_t = tf.matmul( 148 | tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1), 149 | tf.ones(shape=tf.stack([1, width])), 150 | ) 151 | x_t = (x_t + 1.0) * 0.5 * tf.cast(width - 1, tf.float32) 152 | y_t = (y_t + 1.0) * 0.5 * tf.cast(height - 1, tf.float32) 153 | if is_homogeneous: 154 | ones = tf.ones_like(x_t) 155 | coords = tf.stack([x_t, y_t, ones], axis=0) 156 | else: 157 | coords = tf.stack([x_t, y_t], axis=0) 158 | coords = tf.tile(tf.expand_dims(coords, 0), [batch, 1, 1, 1]) 159 | return coords 160 | 161 | 162 | def flow_warp(src_img, flow): 163 | """ inverse warp a source image to the target image plane based on flow field 164 | Args: 165 | src_img: the source image [batch, height_s, width_s, 3] 166 | flow: target image to source image flow [batch, height_t, width_t, 2] 167 | Returns: 168 | Source image inverse warped to the target image plane [batch, height_t, width_t, 3] 169 | """ 170 | with tf.variable_scope("flow_warp"): 171 | batch, height, width, _ = src_img.get_shape().as_list() 172 | tgt_pixel_coords = tf.transpose( 173 | meshgrid(batch, height, width, False), [0, 2, 3, 1] 174 | ) 175 | src_pixel_coords = tgt_pixel_coords + flow 176 | output_img = bilinear_sampler(src_img, src_pixel_coords) 177 | return output_img 178 | 179 | 180 | def compute_rigid_flow(depth, pose, intrinsics, reverse_pose=False): 181 | """Compute the rigid flow from target image plane to source image 182 | Args: 183 | depth: depth map of the target image [batch, height_t, width_t] 184 | pose: target to source (or source to target if reverse_pose=True) 185 | camera transformation matrix [batch, 6], in the order of 186 | tx, ty, tz, rx, ry, rz; 187 | intrinsics: camera intrinsics [batch, 3, 3] 188 | Returns: 189 | Rigid flow from target image to source image [batch, height_t, width_t, 2] 190 | """ 191 | with tf.variable_scope("compute_rigid_flow"): 192 | batch, height, width = depth.get_shape().as_list() 193 | # Convert pose vector to matrix 194 | pose = pose_vec2mat(pose) 195 | if reverse_pose: 196 | pose = tf.matrix_inverse(pose) 197 | # Construct pixel grid coordinates 198 | pixel_coords = meshgrid(batch, height, width) 199 | tgt_pixel_coords = tf.transpose(pixel_coords[:, :2, :, :], [0, 2, 3, 1]) 200 | # Convert pixel coordinates to the camera frame 201 | cam_coords = pixel2cam(depth, pixel_coords, intrinsics) 202 | # Construct a 4x4 intrinsic matrix 203 | filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4]) 204 | filler = tf.tile(filler, [batch, 1, 1]) 205 | intrinsics = tf.concat([intrinsics, tf.zeros([batch, 3, 1])], axis=2) 206 | intrinsics = tf.concat([intrinsics, filler], axis=1) 207 | # Get a 4x4 transformation matrix from 'target' camera frame to 'source' 208 | # pixel frame. 209 | proj_tgt_cam_to_src_pixel = tf.matmul(intrinsics, pose) 210 | src_pixel_coords = cam2pixel(cam_coords, proj_tgt_cam_to_src_pixel) 211 | rigid_flow = src_pixel_coords - tgt_pixel_coords 212 | return rigid_flow 213 | 214 | 215 | def bilinear_sampler(imgs, coords): 216 | """Construct a new image by bilinear sampling from the input image. 217 | Points falling outside the source image boundary have value 0. 218 | Args: 219 | imgs: source image to be sampled from [batch, height_s, width_s, channels] 220 | coords: coordinates of source pixels to sample from [batch, height_t, 221 | width_t, 2]. height_t/width_t correspond to the dimensions of the output 222 | image (don't need to be the same as height_s/width_s). The two channels 223 | correspond to x and y coordinates respectively. 224 | Returns: 225 | A new sampled image [batch, height_t, width_t, channels] 226 | """ 227 | 228 | def _repeat(x, n_repeats): 229 | rep = tf.transpose( 230 | tf.expand_dims(tf.ones(shape=tf.stack([n_repeats,])), 1), [1, 0] 231 | ) 232 | rep = tf.cast(rep, "float32") 233 | x = tf.matmul(tf.reshape(x, (-1, 1)), rep) 234 | return tf.reshape(x, [-1]) 235 | 236 | with tf.name_scope("image_sampling"): 237 | coords_x, coords_y = tf.split(coords, [1, 1], axis=3) 238 | inp_size = imgs.get_shape() 239 | coord_size = coords.get_shape() 240 | out_size = coords.get_shape().as_list() 241 | out_size[3] = imgs.get_shape().as_list()[3] 242 | 243 | coords_x = tf.cast(coords_x, "float32") 244 | coords_y = tf.cast(coords_y, "float32") 245 | 246 | x0 = tf.floor(coords_x) 247 | x1 = x0 + 1 248 | y0 = tf.floor(coords_y) 249 | y1 = y0 + 1 250 | 251 | y_max = tf.cast(tf.shape(imgs)[1] - 1, "float32") 252 | x_max = tf.cast(tf.shape(imgs)[2] - 1, "float32") 253 | # zero = tf.zeros([1], dtype='float32') 254 | zero = tf.constant(0, dtype=tf.float32) 255 | 256 | x0_safe = tf.clip_by_value(x0, zero, x_max) 257 | y0_safe = tf.clip_by_value(y0, zero, y_max) 258 | x1_safe = tf.clip_by_value(x1, zero, x_max) 259 | y1_safe = tf.clip_by_value(y1, zero, y_max) 260 | 261 | wt_x0 = x1_safe - coords_x 262 | wt_x1 = coords_x - x0_safe 263 | wt_y0 = y1_safe - coords_y 264 | wt_y1 = coords_y - y0_safe 265 | 266 | ## indices in the flat image to sample from 267 | dim2 = tf.cast(inp_size[2], "float32") 268 | dim1 = tf.cast(inp_size[2] * inp_size[1], "float32") 269 | base = tf.reshape( 270 | _repeat( 271 | tf.cast(tf.range(coord_size[0]), "float32") * dim1, 272 | coord_size[1] * coord_size[2], 273 | ), 274 | [out_size[0], out_size[1], out_size[2], 1], 275 | ) 276 | 277 | base_y0 = base + y0_safe * dim2 278 | base_y1 = base + y1_safe * dim2 279 | idx00 = tf.reshape(x0_safe + base_y0, [-1]) 280 | idx01 = x0_safe + base_y1 281 | idx10 = x1_safe + base_y0 282 | idx11 = x1_safe + base_y1 283 | 284 | ## sample from imgs 285 | imgs_flat = tf.reshape(imgs, tf.stack([-1, inp_size[3]])) 286 | imgs_flat = tf.cast(imgs_flat, "float32") 287 | im00 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx00, "int32")), out_size) 288 | im01 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx01, "int32")), out_size) 289 | im10 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx10, "int32")), out_size) 290 | im11 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx11, "int32")), out_size) 291 | 292 | w00 = wt_x0 * wt_y0 293 | w01 = wt_x0 * wt_y1 294 | w10 = wt_x1 * wt_y0 295 | w11 = wt_x1 * wt_y1 296 | 297 | output = tf.add_n([w00 * im00, w01 * im01, w10 * im10, w11 * im11]) 298 | return output 299 | -------------------------------------------------------------------------------- /helpers/depth_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluation utils. 3 | This code is based on https://github.com/mrharicot/monodepth/blob/master/utils/evaluation_utils.py 4 | We would like to thank C. Godard and other authors for sharing their code 5 | """ 6 | 7 | import os 8 | import numpy as np 9 | import pandas as pd 10 | import cv2 11 | from collections import Counter 12 | import pickle 13 | 14 | 15 | def compute_errors(gt, pred): 16 | thresh = np.maximum((gt / pred), (pred / gt)) 17 | a1 = (thresh < 1.25).mean() 18 | a2 = (thresh < 1.25 ** 2).mean() 19 | a3 = (thresh < 1.25 ** 3).mean() 20 | 21 | rmse = (gt - pred) ** 2 22 | rmse = np.sqrt(rmse.mean()) 23 | 24 | rmse_log = (np.log(gt) - np.log(pred)) ** 2 25 | rmse_log = np.sqrt(rmse_log.mean()) 26 | 27 | abs_rel = np.mean(np.abs(gt - pred) / gt) 28 | 29 | sq_rel = np.mean(((gt - pred) ** 2) / gt) 30 | 31 | return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3 32 | 33 | 34 | ############################################################################### 35 | ####################### KITTI 36 | 37 | width_to_focal = dict() 38 | width_to_focal[1242] = 721.5377 39 | width_to_focal[1241] = 718.856 40 | width_to_focal[1224] = 707.0493 41 | width_to_focal[1238] = 718.3351 42 | 43 | 44 | def load_gt_disp_kitti(path): 45 | gt_disparities = [] 46 | for i in range(200): 47 | disp = cv2.imread( 48 | path + "/training/disp_noc_0/" + str(i).zfill(6) + "_10.png", -1 49 | ) 50 | disp = disp.astype(np.float32) / 256 51 | gt_disparities.append(disp) 52 | return gt_disparities 53 | 54 | 55 | def convert_disps_to_depths_kitti(gt_disparities, pred_disparities): 56 | gt_depths = [] 57 | pred_depths = [] 58 | pred_disparities_resized = [] 59 | 60 | for i in range(len(gt_disparities)): 61 | gt_disp = gt_disparities[i] 62 | height, width = gt_disp.shape 63 | 64 | pred_disp = pred_disparities[i] 65 | pred_disparities_resized.append(pred_disp) 66 | 67 | mask = gt_disp > 0 68 | 69 | gt_depth = width_to_focal[width] * 0.54 / (gt_disp + (1.0 - mask)) 70 | pred_depth = width_to_focal[width] * 0.54 / pred_disp 71 | 72 | gt_depths.append(gt_depth) 73 | pred_depths.append(pred_depth) 74 | return gt_depths, pred_depths, pred_disparities_resized 75 | 76 | 77 | ############################################################################### 78 | ####################### EIGEN 79 | 80 | 81 | def read_text_lines(file_path): 82 | f = open(file_path, "r") 83 | lines = f.readlines() 84 | f.close() 85 | lines = [l.rstrip() for l in lines] 86 | return lines 87 | 88 | 89 | def read_file_data(files, data_root): 90 | gt_files = [] 91 | gt_calib = [] 92 | im_sizes = [] 93 | im_files = [] 94 | cams = [] 95 | num_probs = 0 96 | for filename in files: 97 | filename = filename.split()[0] 98 | splits = filename.split("/") 99 | camera_id = np.int32(splits[2][-1:]) # 2 is left, 3 is right 100 | date = splits[0] 101 | im_id = splits[4][:10] 102 | file_root = "{}/{}" 103 | 104 | im = filename 105 | vel = "{}/{}/velodyne_points/data/{}.bin".format(splits[0], splits[1], im_id) 106 | 107 | if os.path.isfile(data_root + im): 108 | gt_files.append(data_root + vel) 109 | gt_calib.append(data_root + date + "/") 110 | im_sizes.append(cv2.imread(data_root + im).shape[:2]) 111 | im_files.append(data_root + im) 112 | cams.append(2) 113 | else: 114 | num_probs += 1 115 | print("{} missing".format(data_root + im)) 116 | 117 | return gt_files, gt_calib, im_sizes, im_files, cams 118 | 119 | 120 | def load_velodyne_points(file_name): 121 | # adapted from https://github.com/hunse/kitti 122 | points = np.fromfile(file_name, dtype=np.float32).reshape(-1, 4) 123 | points[:, 3] = 1.0 # homogeneous 124 | return points 125 | 126 | 127 | def lin_interp(shape, xyd): 128 | # taken from https://github.com/hunse/kitti 129 | m, n = shape 130 | ij, d = xyd[:, 1::-1], xyd[:, 2] 131 | f = LinearNDInterpolator(ij, d, fill_value=0) 132 | J, I = np.meshgrid(np.arange(n), np.arange(m)) 133 | IJ = np.vstack([I.flatten(), J.flatten()]).T 134 | disparity = f(IJ).reshape(shape) 135 | return disparity 136 | 137 | 138 | def read_calib_file(path): 139 | # taken from https://github.com/hunse/kitti 140 | float_chars = set("0123456789.e+- ") 141 | data = {} 142 | with open(path, "r") as f: 143 | for line in f.readlines(): 144 | key, value = line.split(":", 1) 145 | value = value.strip() 146 | data[key] = value 147 | if float_chars.issuperset(value): 148 | # try to cast to float array 149 | try: 150 | # NOTE: as reported in ISSUE #224 of Monodepth 151 | # https://github.com/mrharicot/monodepth/issues/224 152 | 153 | # data[key] = np.array(map(float, value.split(" "))) 154 | data[key] = np.array(list(map(float, value.split(" ")))) 155 | 156 | except ValueError: 157 | # casting error: data[key] already eq. value, so pass 158 | pass 159 | 160 | return data 161 | 162 | 163 | def get_focal_length_baseline(calib_dir, cam): 164 | cam2cam = read_calib_file(calib_dir + "calib_cam_to_cam.txt") 165 | P2_rect = cam2cam["P_rect_02"].reshape(3, 4) 166 | P3_rect = cam2cam["P_rect_03"].reshape(3, 4) 167 | 168 | # cam 2 is left of camera 0 -6cm 169 | # cam 3 is to the right +54cm 170 | b2 = P2_rect[0, 3] / -P2_rect[0, 0] 171 | b3 = P3_rect[0, 3] / -P3_rect[0, 0] 172 | baseline = b3 - b2 173 | 174 | if cam == 2: 175 | focal_length = P2_rect[0, 0] 176 | elif cam == 3: 177 | focal_length = P3_rect[0, 0] 178 | 179 | return focal_length, baseline 180 | 181 | 182 | def sub2ind(matrixSize, rowSub, colSub): 183 | m, n = matrixSize 184 | return rowSub * (n - 1) + colSub - 1 185 | 186 | 187 | def generate_depth_map( 188 | calib_dir, velo_file_name, im_shape, cam=2, interp=False, vel_depth=False 189 | ): 190 | # load calibration files 191 | cam2cam = read_calib_file(calib_dir + "calib_cam_to_cam.txt") 192 | velo2cam = read_calib_file(calib_dir + "calib_velo_to_cam.txt") 193 | velo2cam = np.hstack((velo2cam["R"].reshape(3, 3), velo2cam["T"][..., np.newaxis])) 194 | velo2cam = np.vstack((velo2cam, np.array([0, 0, 0, 1.0]))) 195 | 196 | # compute projection matrix velodyne->image plane 197 | R_cam2rect = np.eye(4) 198 | R_cam2rect[:3, :3] = cam2cam["R_rect_00"].reshape(3, 3) 199 | P_rect = cam2cam["P_rect_0" + str(cam)].reshape(3, 4) 200 | P_velo2im = np.dot(np.dot(P_rect, R_cam2rect), velo2cam) 201 | 202 | # load velodyne points and remove all behind image plane (approximation) 203 | # each row of the velodyne data is forward, left, up, reflectance 204 | velo = load_velodyne_points(velo_file_name) 205 | velo = velo[velo[:, 0] >= 0, :] 206 | 207 | # project the points to the camera 208 | velo_pts_im = np.dot(P_velo2im, velo.T).T 209 | velo_pts_im[:, :2] = velo_pts_im[:, :2] / velo_pts_im[:, 2][..., np.newaxis] 210 | 211 | if vel_depth: 212 | velo_pts_im[:, 2] = velo[:, 0] 213 | 214 | # check if in bounds 215 | # use minus 1 to get the exact same value as KITTI matlab code 216 | velo_pts_im[:, 0] = np.round(velo_pts_im[:, 0]) - 1 217 | velo_pts_im[:, 1] = np.round(velo_pts_im[:, 1]) - 1 218 | val_inds = (velo_pts_im[:, 0] >= 0) & (velo_pts_im[:, 1] >= 0) 219 | val_inds = ( 220 | val_inds & (velo_pts_im[:, 0] < im_shape[1]) & (velo_pts_im[:, 1] < im_shape[0]) 221 | ) 222 | velo_pts_im = velo_pts_im[val_inds, :] 223 | 224 | # project to image 225 | depth = np.zeros((im_shape)) 226 | depth[ 227 | velo_pts_im[:, 1].astype(np.int), velo_pts_im[:, 0].astype(np.int) 228 | ] = velo_pts_im[:, 2] 229 | 230 | # find the duplicate points and choose the closest depth 231 | inds = sub2ind(depth.shape, velo_pts_im[:, 1], velo_pts_im[:, 0]) 232 | dupe_inds = [item for item, count in Counter(inds).items() if count > 1] 233 | for dd in dupe_inds: 234 | pts = np.where(inds == dd)[0] 235 | x_loc = int(velo_pts_im[pts[0], 0]) 236 | y_loc = int(velo_pts_im[pts[0], 1]) 237 | depth[y_loc, x_loc] = velo_pts_im[pts, 2].min() 238 | depth[depth < 0] = 0 239 | 240 | if interp: 241 | # interpolate the depth map to fill in holes 242 | depth_interp = lin_interp(im_shape, velo_pts_im) 243 | return depth, depth_interp 244 | else: 245 | return depth 246 | 247 | 248 | def load_priors(path, num_samples, split): 249 | """ load semantic priors """ 250 | priors = [] 251 | for t_id in range(num_samples): 252 | name = ( 253 | str(t_id).zfill(6) + "_10.png" if split == "kitti" else str(t_id) + ".png" 254 | ) 255 | prior = cv2.imread(os.path.join(path, name), cv2.IMREAD_GRAYSCALE) 256 | assert prior is not None, "{} not found".format(os.path.join(path, name)) 257 | priors.append(prior) 258 | return priors 259 | 260 | 261 | def load_objects_mask(path, num_samples): 262 | """ Load object mask from kitti dataset """ 263 | priors = [] 264 | for t_id in range(num_samples): 265 | name = str(t_id).zfill(6) + "_10.png" 266 | full_name = os.path.join(path, "training", "obj_map", name) 267 | prior = cv2.imread(full_name, cv2.IMREAD_GRAYSCALE) 268 | assert prior is not None, "{} not found".format(full_name) 269 | priors.append(prior) 270 | return priors 271 | -------------------------------------------------------------------------------- /helpers/flow_tool/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2019 LI RUOTENG 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. -------------------------------------------------------------------------------- /helpers/flow_tool/README.md: -------------------------------------------------------------------------------- 1 | Code from https://github.com/liruoteng/OpticalFlowToolkit, licensed with MIT license -------------------------------------------------------------------------------- /helpers/flow_tool/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/helpers/flow_tool/__init__.py -------------------------------------------------------------------------------- /helpers/utilities.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | """ Utility functions 18 | """ 19 | from collections import namedtuple 20 | import numpy as np 21 | import tensorflow as tf 22 | import cv2 23 | import os 24 | import matplotlib 25 | import matplotlib.cm 26 | 27 | Label = namedtuple( 28 | "Label", 29 | [ 30 | "name", # The identifier of this label, e.g. 'car', 'person', ... . 31 | # We use them to uniquely name a class 32 | "id", # An integer ID that is associated with this label. 33 | # The IDs are used to represent the label in ground truth images 34 | # An ID of -1 means that this label does not have an ID and thus 35 | # is ignored when creating ground truth images (e.g. license plate). 36 | "trainId", # An integer ID that overwrites the ID above, when creating ground truth 37 | # images for training. 38 | # For training, multiple labels might have the same ID. Then, these labels 39 | # are mapped to the same class in the ground truth images. For the inverse 40 | # mapping, we use the label that is defined first in the list below. 41 | # For example, mapping all void-type classes to the same ID in training, 42 | # might make sense for some approaches. 43 | "category", # The name of the category that this label belongs to 44 | "categoryId", # The ID of this category. Used to create ground truth images 45 | # on category level. 46 | "hasInstances", # Whether this label distinguishes between single instances or not 47 | "ignoreInEval", # Whether pixels having this class as ground truth label are ignored 48 | # during evaluations or not 49 | "color", # The color of this label 50 | ], 51 | ) 52 | 53 | labels_all = [ 54 | # name id trainId category catId hasInstances ignoreInEval color 55 | Label("unlabeled", 0, 255, "void", 0, False, True, (0, 0, 0)), 56 | Label("ego vehicle", 1, 255, "void", 0, False, True, (0, 0, 0)), 57 | Label("rectification border", 2, 255, "void", 0, False, True, (0, 0, 0)), 58 | Label("out of roi", 3, 255, "void", 0, False, True, (0, 0, 0)), 59 | Label("static", 4, 255, "void", 0, False, True, (0, 0, 0)), 60 | Label("dynamic", 5, 255, "void", 0, False, True, (111, 74, 0)), 61 | Label("ground", 6, 255, "void", 0, False, True, (81, 0, 81)), 62 | Label("road", 7, 0, "flat", 1, False, False, (128, 64, 128)), 63 | Label("sidewalk", 8, 1, "flat", 1, False, False, (244, 35, 232)), 64 | Label("parking", 9, 255, "flat", 1, False, True, (250, 170, 160)), 65 | Label("rail track", 10, 255, "flat", 1, False, True, (230, 150, 140)), 66 | Label("building", 11, 2, "construction", 2, False, False, (70, 70, 70)), 67 | Label("wall", 12, 3, "construction", 2, False, False, (102, 102, 156)), 68 | Label("fence", 13, 4, "construction", 2, False, False, (190, 153, 153)), 69 | Label("guard rail", 14, 255, "construction", 2, False, True, (180, 165, 180)), 70 | Label("bridge", 15, 255, "construction", 2, False, True, (150, 100, 100)), 71 | Label("tunnel", 16, 255, "construction", 2, False, True, (150, 120, 90)), 72 | Label("pole", 17, 5, "object", 3, False, False, (153, 153, 153)), 73 | Label("polegroup", 18, 255, "object", 3, False, True, (153, 153, 153)), 74 | Label("traffic light", 19, 6, "object", 3, False, False, (250, 170, 30)), 75 | Label("traffic sign", 20, 7, "object", 3, False, False, (220, 220, 0)), 76 | Label("vegetation", 21, 8, "nature", 4, False, False, (107, 142, 35)), 77 | Label("terrain", 22, 9, "nature", 4, False, False, (152, 251, 152)), 78 | Label("sky", 23, 10, "sky", 5, False, False, (70, 130, 180)), 79 | Label("person", 24, 11, "human", 6, True, False, (220, 20, 60)), 80 | Label("rider", 25, 12, "human", 6, True, False, (255, 0, 0)), 81 | Label("car", 26, 13, "vehicle", 7, True, False, (0, 0, 142)), 82 | Label("truck", 27, 14, "vehicle", 7, True, False, (0, 0, 70)), 83 | Label("bus", 28, 15, "vehicle", 7, True, False, (0, 60, 100)), 84 | Label("caravan", 29, 255, "vehicle", 7, True, True, (0, 0, 90)), 85 | Label("trailer", 30, 255, "vehicle", 7, True, True, (0, 0, 110)), 86 | Label("train", 31, 16, "vehicle", 7, True, False, (0, 80, 100)), 87 | Label("motorcycle", 32, 17, "vehicle", 7, True, False, (0, 0, 230)), 88 | Label("bicycle", 33, 18, "vehicle", 7, True, False, (119, 11, 32)), 89 | ] 90 | 91 | labels_train = [ 92 | # name id trainId category catId hasInstances ignoreInEval color 93 | Label("road", 0, 0, "flat", 1, False, False, (128, 64, 128)), 94 | Label("sidewalk", 1, 1, "flat", 1, False, False, (244, 35, 232)), 95 | Label("building", 2, 2, "construction", 2, False, False, (70, 70, 70)), 96 | Label("wall", 3, 3, "construction", 2, False, False, (102, 102, 156)), 97 | Label("fence", 4, 4, "construction", 2, False, False, (190, 153, 153)), 98 | Label("pole", 5, 5, "object", 3, False, False, (153, 153, 153)), 99 | Label("traffic light", 6, 6, "object", 3, False, False, (250, 170, 30)), 100 | Label("traffic sign", 7, 7, "object", 3, False, False, (220, 220, 0)), 101 | Label("vegetation", 8, 8, "nature", 4, False, False, (107, 142, 35)), 102 | Label("terrain", 9, 9, "nature", 4, False, False, (152, 251, 152)), 103 | Label("sky", 10, 10, "sky", 5, False, False, (70, 130, 180)), 104 | Label("person", 11, 11, "human", 6, True, False, (220, 20, 60)), 105 | Label("rider", 12, 12, "human", 6, True, False, (255, 0, 0)), 106 | Label("car", 13, 13, "vehicle", 7, True, False, (0, 0, 142)), 107 | Label("truck", 14, 14, "vehicle", 7, True, False, (0, 0, 70)), 108 | Label("bus", 15, 15, "vehicle", 7, True, False, (0, 60, 100)), 109 | Label("train", 16, 16, "vehicle", 7, True, False, (0, 80, 100)), 110 | Label("motorcycle", 17, 17, "vehicle", 7, True, False, (0, 0, 230)), 111 | Label("bicycle", 18, 18, "vehicle", 7, True, False, (119, 11, 32)), 112 | ] 113 | 114 | 115 | labels_static_dynamic_trainIds = [ 116 | # name id trainId category catId hasInstances ignoreInEval color 117 | Label("road", 0, 0, "flat", 1, False, False, (128, 64, 128)), 118 | Label("sidewalk", 1, 0, "flat", 1, False, False, (244, 35, 232)), 119 | Label("building", 2, 0, "construction", 2, False, False, (70, 70, 70)), 120 | Label("wall", 3, 0, "construction", 2, False, False, (102, 102, 156)), 121 | Label("fence", 4, 0, "construction", 2, False, False, (190, 153, 153)), 122 | Label("pole", 5, 0, "object", 3, False, False, (153, 153, 153)), 123 | Label("traffic light", 6, 0, "object", 3, False, False, (250, 170, 30)), 124 | Label("traffic sign", 7, 0, "object", 3, False, False, (220, 220, 0)), 125 | Label("vegetation", 8, 0, "nature", 4, False, False, (107, 142, 35)), 126 | Label("terrain", 9, 0, "nature", 4, False, False, (152, 251, 152)), 127 | Label("sky", 10, 0, "sky", 5, False, False, (70, 130, 180)), 128 | Label("person", 11, 1, "human", 6, True, False, (220, 20, 60)), 129 | Label("rider", 12, 1, "human", 6, True, False, (255, 0, 0)), 130 | Label("car", 13, 1, "vehicle", 7, True, False, (0, 0, 142)), 131 | Label("truck", 14, 1, "vehicle", 7, True, False, (0, 0, 70)), 132 | Label("bus", 15, 1, "vehicle", 7, True, False, (0, 60, 100)), 133 | Label("train", 16, 1, "vehicle", 7, True, False, (0, 80, 100)), 134 | Label("motorcycle", 17, 1, "vehicle", 7, True, False, (0, 0, 230)), 135 | Label("bicycle", 18, 1, "vehicle", 7, True, False, (119, 11, 32)), 136 | ] 137 | 138 | labels = labels_train 139 | id2Color = {label.id: label.color for label in labels} 140 | id2trainId = {label.id: label.trainId for label in labels_all} 141 | id2name = {label.id: label.name for label in labels} 142 | 143 | labels2priors = np.array( 144 | [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] 145 | ) # labels_static_dynamic_trainIds.trainId 146 | 147 | 148 | def extract_semantic_priors(predictions): 149 | """ Extract priors from a semantic map 150 | Return a new map, with the same shape of the input, with 1 for possibly moving 151 | objects and 0 otherwise. 152 | Params: 153 | predictions: BxHxWx1 154 | Returns: 155 | priors: BxHxWx1 156 | """ 157 | priors = [] 158 | b, h, w, _ = predictions.shape 159 | for i in range(b): 160 | p = tf.py_func(label_to_priors, [predictions[i]], tf.uint8) 161 | p = tf.cast(p, tf.float32) 162 | priors.append(p) 163 | priors = tf.stack(priors, axis=0) 164 | priors.set_shape(predictions.get_shape()) 165 | return priors 166 | 167 | 168 | def label_to_priors(predictions): 169 | predictions = predictions.astype(np.uint8) 170 | predictions = predictions.squeeze() 171 | priors = labels2priors[predictions] 172 | priors = np.expand_dims(priors, -1) 173 | return priors.astype(np.uint8) 174 | 175 | 176 | def colormap_semantic(pred_sem, dict_id2color=id2Color): 177 | p = tf.squeeze(tf.cast(pred_sem, tf.uint8), axis=-1) 178 | p = tf.stack([p, p, p], axis=-1) 179 | m = tf.zeros_like(p) 180 | for i in range(0, len(dict_id2color)): 181 | mi = tf.multiply(tf.ones_like(p), dict_id2color[i]) 182 | m = tf.where(tf.equal(p, i), mi, m) 183 | return m 184 | 185 | 186 | def get_num_classes(): 187 | return len(labels) 188 | 189 | 190 | def colorize(value, vmin=None, vmax=None, cmap=None): 191 | """ 192 | A utility function for TensorFlow that maps a grayscale image to a matplotlib 193 | colormap for use with TensorBoard image summaries. 194 | By default it will normalize the input value to the range 0..1 before mapping 195 | to a grayscale colormap. 196 | Arguments: 197 | - value: 2D Tensor of shape [height, width] or 3D Tensor of shape 198 | [height, width, 1]. 199 | - vmin: the minimum value of the range used for normalization. 200 | (Default: value minimum) 201 | - vmax: the maximum value of the range used for normalization. 202 | (Default: value maximum) 203 | - cmap: a valid cmap named for use with matplotlib's `get_cmap`. 204 | (Default: 'gray') 205 | Example usage: 206 | ``` 207 | output = tf.random_uniform(shape=[256, 256, 1]) 208 | output_color = colorize(output, vmin=0.0, vmax=1.0, cmap='viridis') 209 | tf.summary.image('output', output_color) 210 | ``` 211 | 212 | Returns a 3D tensor of shape [height, width, 3]. 213 | """ 214 | 215 | # normalize 216 | vmin = tf.reduce_min(value) if vmin is None else vmin 217 | vmax = tf.reduce_max(value) if vmax is None else vmax 218 | value = (value - vmin) / (vmax - vmin) # vmin..vmax 219 | 220 | # squeeze last dim if it exists 221 | value = tf.squeeze(value) 222 | 223 | # quantize 224 | indices = tf.to_int32(tf.round(value * 255)) 225 | 226 | # gather 227 | cm = matplotlib.cm.get_cmap(cmap if cmap is not None else "gray") 228 | colors = tf.constant(cm.colors, dtype=tf.float32) 229 | value = tf.gather(colors, indices) 230 | 231 | return value 232 | 233 | 234 | def count_text_lines(file_path): 235 | f = open(file_path, "r") 236 | lines = f.readlines() 237 | f.close() 238 | return len(lines) 239 | 240 | 241 | def flow_to_color(flow, mask=None, max_flow=None): 242 | """ 243 | From Unflow by Meister et al 244 | https://arxiv.org/pdf/1711.07837.pdf 245 | https://github.com/simonmeister/UnFlow 246 | 247 | Converts flow to 3-channel color image. 248 | Args: 249 | flow: tensor of shape [num_batch, height, width, 2]. 250 | mask: flow validity mask of shape [num_batch, height, width, 1]. 251 | """ 252 | n = 8 253 | num_batch, height, width, _ = tf.unstack(tf.shape(flow)) 254 | mask = tf.ones([num_batch, height, width, 1]) if mask is None else mask 255 | flow_u, flow_v = tf.unstack(flow, axis=3) 256 | if max_flow is not None: 257 | max_flow = tf.maximum(max_flow, 1) 258 | else: 259 | max_flow = tf.reduce_max(tf.abs(flow * mask)) 260 | mag = tf.sqrt(tf.reduce_sum(tf.square(flow), 3)) 261 | angle = tf.atan2(flow_v, flow_u) 262 | 263 | im_h = tf.mod(angle / (2 * np.pi) + 1.0, 1.0) 264 | im_s = tf.clip_by_value(mag * n / max_flow, 0, 1) 265 | im_v = tf.clip_by_value(n - im_s, 0, 1) 266 | im_hsv = tf.stack([im_h, im_s, im_v], 3) 267 | im = tf.image.hsv_to_rgb(im_hsv) 268 | return im * mask 269 | 270 | 271 | def tf_color_prior(prior): 272 | mapping = {0: (0, 0, 255), 1: (0, 255, 0)} 273 | return colormap_semantic(prior, mapping) 274 | 275 | 276 | def get_height_width(img): 277 | s = tf.shape(img) 278 | h = tf.to_int32(s[1]) 279 | w = tf.to_int32(s[2]) 280 | return h, w 281 | 282 | 283 | def get_priors_or_default(priors, img, params, mode): 284 | return ( 285 | priors 286 | if (params.use_priors and mode == "semantic") 287 | else tf.zeros_like(img[:, :, :, 0:1]) 288 | ) 289 | 290 | 291 | def create_dir(dirname): 292 | """Create a directory if not exists 293 | :param dirname: path of the directory to create 294 | """ 295 | if not os.path.exists(dirname): 296 | os.makedirs(dirname) 297 | 298 | 299 | def mask(img, mask, active): 300 | with tf.variable_scope("mask"): 301 | if active: 302 | return img * mask 303 | return img 304 | 305 | 306 | def flow_resize(flow, out_size, is_scale=True, method=0): 307 | """ 308 | method: 0 mean bilinear, 1 means nearest 309 | """ 310 | flow_size = tf.to_float(tf.shape(flow)[-3:-1]) 311 | b, _, _, c = flow.get_shape().as_list() 312 | flow = tf.image.resize_images(flow, out_size, method=method, align_corners=True) 313 | if is_scale: 314 | scale = tf.to_float(out_size) / flow_size 315 | scale = tf.stack([scale[1], scale[0]]) 316 | flow = tf.multiply(flow, scale) 317 | return flow 318 | 319 | 320 | def color_semantic(semantic_map, mapping=None): 321 | """Color a semantic map in numpy 322 | :param x: input semantic map 323 | :param mapping: optional color scheme. If not set, a default 324 | color scheme will be applied 325 | :return colored: colored semantic map 326 | """ 327 | if mapping is None: 328 | mapping = [ 329 | (128, 64, 128), 330 | (244, 35, 232), 331 | (70, 70, 70), 332 | (102, 102, 156), 333 | (190, 153, 153), 334 | (153, 153, 153), 335 | (250, 170, 30), 336 | (220, 220, 0), 337 | (107, 142, 35), 338 | (152, 251, 152), 339 | (70, 130, 180), 340 | (220, 20, 60), 341 | (255, 0, 0), 342 | (0, 0, 142), 343 | (0, 0, 70), 344 | (0, 60, 100), 345 | (0, 80, 100), 346 | (0, 0, 230), 347 | (119, 11, 32), 348 | ] 349 | h, w = semantic_map.shape[:2] 350 | colored = np.ones([h, w, 3], np.uint8) 351 | for x in range(len(mapping)): 352 | 353 | color = np.ones_like(colored) * mapping[x] 354 | current_sem = np.stack((semantic_map, semantic_map, semantic_map), axis=-1) 355 | index = np.ones_like(current_sem) * x 356 | colored = np.where(current_sem == index, color, colored) 357 | return colored 358 | 359 | 360 | def check_model_exists(ckpt): 361 | """Check if model exists 362 | :param ckpt: path to checkpoint 363 | :return exist: flag. True if model exists 364 | """ 365 | expected_data = ckpt + ".data-00000-of-00001" 366 | return os.path.exists(expected_data) 367 | 368 | 369 | def write_kitti_png_flow(dest, flow_data, mask_data=None): 370 | """Save optical flow in KITTI format, ie 16 bit png image" 371 | :param dest: where image will be saved 372 | :param flow_data: optical flow field. Array with shape (H,W,2) 373 | :param mask_data: optional mask 374 | """ 375 | flow_img = np.zeros((flow_data.shape[0], flow_data.shape[1], 3), dtype=np.uint16) 376 | flow_img[:, :, 2] = flow_data[:, :, 0] * 64.0 + 2 ** 15 377 | flow_img[:, :, 1] = flow_data[:, :, 1] * 64.0 + 2 ** 15 378 | if mask_data is None: 379 | mask_data = np.ones_like(flow_img[:, :, 2]) 380 | flow_img[:, :, 0] = mask_data[:, :] 381 | cv2.imwrite(dest, flow_img) 382 | 383 | 384 | def color_motion_mask(mask, color=None): 385 | """Apply a color scheme to a motion mask 386 | :param mask: input motion mask 387 | :param color: RGB tuple, color applied to moving objects. Default (220, 20, 60) 388 | :return final_mask: colored mask, as np.uint8 389 | """ 390 | if color is None: 391 | color = (220, 20, 60) 392 | h, w = mask.shape 393 | ext_mask = np.stack([mask, mask, mask], -1).astype(np.uint8) 394 | color = np.ones_like(ext_mask) * color 395 | index = np.ones_like(ext_mask) * 1.0 396 | final_mask = np.where(ext_mask == index, color, ext_mask).astype(np.uint8) 397 | return final_mask 398 | -------------------------------------------------------------------------------- /networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/networks/__init__.py -------------------------------------------------------------------------------- /networks/baseline.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | """ 18 | Baseline network 19 | We learn to predict depth, pose of the camera, intrinsics and the semantic 20 | """ 21 | 22 | import tensorflow as tf 23 | import os 24 | from networks.ops import * 25 | from networks.general_network import GeneralNetwork 26 | from networks.network_components import * 27 | 28 | 29 | class BaselineNet(GeneralNetwork): 30 | """Baseline network, w/o OFNet and SD-OFNet. 31 | It contains DSNet and CameraNet 32 | """ 33 | 34 | def __init__(self, batch, is_training, params): 35 | """BaselineNet constructor: 36 | :param batch: input of the network. Dictionary 37 | :param is_training: training flag. For batchnorm 38 | :params: network settings 39 | """ 40 | super(BaselineNet, self).__init__(batch, is_training, params) 41 | self.name = "MultiViewNetwork" 42 | self.depth_tgt = None 43 | self.disp_tgt = None 44 | self.semantic_tgt = None 45 | 46 | def get_features(self, src_img_1, tgt_img, src_img_2, is_training, scope): 47 | """Extract features from images 48 | :param src_img_1: tensor with src1 image, (B,H,W,3) 49 | :param tgt_img: tensor with tgt image, (B,H,W,3) 50 | :param src_img_2: tensor with src1 image, (B,H,W,3) 51 | :param is_training: training flag. For batchnorm 52 | :param scope: name used in the feature extractor 53 | :return features: list of extracted features 54 | """ 55 | return feature_extractor(src_img_1, tgt_img, src_img_2, is_training, scope) 56 | 57 | def get_DSNet(self, features, classes, is_training): 58 | """Build DSNet, in charge of depth and semantic estimation 59 | :return DSNet: DSNet network 60 | """ 61 | return DSNet(features, classes, is_training) 62 | 63 | def get_CameraNet(self, src_img_1, tgt_img, src_img_2, is_training, scope="pose"): 64 | """Build CameraNet, in charge of pose and intrinsic estimation 65 | :return CameraNet: CameraNet network 66 | """ 67 | features = self.get_features(src_img_1, tgt_img, src_img_2, is_training, scope) 68 | return CameraNet(features, self.is_training) 69 | 70 | def disp_normalize(self, disp): 71 | """Apply spatial normalizer defined in 72 | :param disp: disparity (inverse depth) 73 | :return normalized_disp: tensor with same shape of disp 74 | """ 75 | with tf.variable_scope("disp_normalize"): 76 | return spatial_normalize(disp) 77 | 78 | def disp2depth(self, disp): 79 | """Turn disparity into depth 80 | :param disp: disparity (inverse depth) 81 | :return depth: tensor with same shape of disp 82 | """ 83 | with tf.variable_scope("disp2depth"): 84 | return 1.0 / disp 85 | 86 | def get_rigid_flow(self, depth, pose, intrinsics, pose_index, reversed_pose): 87 | """ 88 | Get rigid flow using depth and pose projection 89 | :param depth: depth estimated by DSNet. Tensor with shape (B,H,W) 90 | :param pose: pose estimated by CameraNet. Tensor with shape (1,2,6) 91 | :param pose_index: index of pose to use 92 | :param reversed_pose: if True, use reversed pose 93 | :return rigid flow: BxHxWx2 rigid optical flow 94 | :raise ValueError: if pose_index is not in [0,1] 95 | """ 96 | with tf.variable_scope("get_rigid_flow"): 97 | if pose_index not in [0, 1]: 98 | raise ValueError("pose index must be in [0,1]") 99 | rigid_flow = compute_rigid_flow( 100 | depth, pose[:, pose_index, :], intrinsics[:, 0, :, :], reversed_pose 101 | ) 102 | return rigid_flow 103 | 104 | def prepare_depth(self, disp): 105 | """ 106 | Turn disp into depth 107 | :param disp: tensor with disparity estimations 108 | """ 109 | with tf.variable_scope("prepare_depth"): 110 | normalized = tf.image.resize_bilinear( 111 | self.disp_normalize(disp), [self.h, self.w] 112 | ) 113 | depth = self.disp2depth(normalized) 114 | depth.set_shape([None, self.params.height, self.params.width, 1]) 115 | depth = tf.squeeze(depth, axis=3) 116 | return depth 117 | 118 | def prepare_disp(self, disp): 119 | """ First, normalization is applied to disp, then the result is 120 | upsampled to (self.params.height, self.params.width). 121 | :param disp: tensor with shape (B,H,W) 122 | :return upsampled_normalized_disp: tensor with shape (B, self.params.height, self.params.width) 123 | """ 124 | with tf.variable_scope("prepare_disp"): 125 | disp = tf.image.resize_bilinear(self.disp_normalize(disp), [self.h, self.w]) 126 | disp.set_shape([None, self.params.height, self.params.width, 1]) 127 | return disp 128 | 129 | def upsample_semantic(self, semantic): 130 | """Upsample semantic to [self.params.height,self.params.width] 131 | :param semantic: tensor with logits or semantic labels 132 | """ 133 | with tf.variable_scope("upsample_semantic"): 134 | semantic = tf.image.resize_images( 135 | semantic, [self.params.height, self.params.width] 136 | ) 137 | return semantic 138 | 139 | def build_network(self): 140 | """Build baseline network, 141 | composed of DSNet and CameraNet 142 | """ 143 | with tf.variable_scope(self.name): 144 | 145 | self.features = self.get_features( 146 | self.src_img_1, 147 | self.tgt_img, 148 | self.src_img_2, 149 | self.is_training, 150 | scope=None, 151 | ) 152 | self.pred_disp_tgt, self.pred_semantic_logits_tgt = self.get_DSNet( 153 | self.features[1], self.classes, self.is_training 154 | ) 155 | print(" [*] Building DSNet: SUCCESS") 156 | 157 | self.pose, self.intrinsics = self.get_CameraNet( 158 | self.src_img_1, self.tgt_img, self.src_img_2, self.is_training 159 | ) 160 | print(" [*] Building CameraNet: SUCCESS") 161 | 162 | def build_outputs(self): 163 | """ Output generated by the network. 164 | Attributes semantic_tgt, depth_tgt and disp_tgt are updated 165 | """ 166 | with tf.variable_scope("build_baseline_outputs"): 167 | self.semantic_tgt = self.upsample_semantic(self.pred_semantic_logits_tgt) 168 | self.depth_tgt = self.prepare_depth(self.pred_disp_tgt[0]) 169 | self.disp_tgt = self.prepare_disp(self.pred_disp_tgt[0]) 170 | 171 | def get_network_params(self): 172 | """Get network variables to load. 173 | This function is valid only in the case test, since 174 | no Adam state is loaded and training from scratch 175 | is not supported. 176 | Note that also Batchnorm params are loaded 177 | """ 178 | with tf.variable_scope("get_network_params"): 179 | var = [x for x in tf.trainable_variables() if self.name in x.name] 180 | batch_norm_variables = [ 181 | x 182 | for x in tf.all_variables() 183 | if "moving_mean" in x.name or "moving_variance" in x.name 184 | ] 185 | var += batch_norm_variables 186 | return var 187 | -------------------------------------------------------------------------------- /networks/complete_network.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | 17 | """ 18 | Complete OmegaNet 19 | """ 20 | import tensorflow as tf 21 | import os 22 | 23 | from networks.general_network import GeneralNetwork 24 | from networks.baseline import BaselineNet 25 | from helpers import bilinear_sampler 26 | from networks.selflow.selflow_network import flownet 27 | from helpers.utilities import extract_semantic_priors 28 | 29 | 30 | class OmegaNet(GeneralNetwork): 31 | """OmegaNet. It contains DSNet, CameraNet and SD-OFNet 32 | """ 33 | 34 | def __init__(self, batch, is_training, params): 35 | """OmegaNet constructor: 36 | :param batch: input of the network. Dictionary 37 | :param is_training: training flag. For batchnorm 38 | :params: network settings 39 | """ 40 | super(OmegaNet, self).__init__(batch, is_training, params) 41 | self.name = "OmegaNet" 42 | self.disp = None 43 | self.optical_flow = None 44 | self.semantic_logits = None 45 | self.motion_mask = None 46 | 47 | def build_network(self): 48 | """Build OmegaNet: first, DSNet and CameraNet are instantiated, 49 | then SD-OFNet 50 | """ 51 | self.baselineNet = BaselineNet(self.batch, self.is_training, self.params) 52 | self.baselineNet.build_network() 53 | self.baselineNet.build_outputs() 54 | 55 | # prepare semantic stuff 56 | self.semantic_logits = self.baselineNet.pred_semantic_logits_tgt 57 | self.__semantic = self.prepare_semantic(self.semantic_logits) 58 | self.__priors = extract_semantic_priors(self.__semantic) 59 | self.__dynamic_tgt_mask, self.__static_tgt_mask = self.build_semantic_masks() 60 | 61 | # get rigid flow using depth and pose 62 | self.__sflow_src2_tgt = self.baselineNet.get_rigid_flow( 63 | self.baselineNet.depth_tgt, 64 | self.baselineNet.pose, 65 | self.baselineNet.intrinsics, 66 | pose_index=1, 67 | reversed_pose=False, 68 | ) 69 | 70 | # self-distilled optical flow network 71 | load_flow = not self.params.load_only_baseline 72 | self.__optical_flow_src2_tgt, _ = flownet( 73 | self.tgt_img.shape, 74 | self.src_img_1, 75 | self.tgt_img, 76 | self.src_img_2, 77 | train=False, 78 | trainable=load_flow, 79 | reuse=tf.AUTO_REUSE, 80 | regularizer=None, 81 | is_scale=True, 82 | scope="superflow", 83 | ) 84 | 85 | def prepare_final_motion_mask(self): 86 | """ 87 | :return final_motion_mask: motion binary mask. 1 if pixel is moving 88 | """ 89 | moving_src2_tgt = self.build_moving_probability_mask( 90 | self.__optical_flow_src2_tgt, self.__sflow_src2_tgt 91 | ) 92 | final_motion_mask = self.__dynamic_tgt_mask * tf.where( 93 | moving_src2_tgt > self.params.tau, 94 | tf.ones_like(moving_src2_tgt), 95 | tf.zeros_like(moving_src2_tgt), 96 | ) 97 | return final_motion_mask 98 | 99 | def prepare_semantic(self, logits, height=None, width=None): 100 | """Extract semantic map from logits. 101 | :param logits: semantic logits 102 | :param height: height of image. Optional (default is params.height) 103 | :param width: width of image. Optional (default is params.width) 104 | """ 105 | with tf.variable_scope("prepare_semantic"): 106 | if height is None: 107 | height = self.params.height 108 | if width is None: 109 | width = self.params.width 110 | logits = tf.image.resize_images(logits, [height, width]) 111 | semantic = tf.argmax(logits, axis=-1) 112 | semantic = tf.expand_dims(semantic, -1) 113 | semantic = tf.cast(semantic, tf.float32) 114 | return semantic 115 | 116 | def build_outputs(self): 117 | """Build outputs of the network 118 | """ 119 | with tf.variable_scope("build_outputs"): 120 | 121 | self.optical_flow = self.__optical_flow_src2_tgt 122 | self.disp = self.baselineNet.disp_tgt 123 | self.semantic = self.__semantic 124 | self.motion_mask = self.prepare_final_motion_mask() 125 | 126 | def tf_cosine_distance(self, a, b): 127 | """Measure cosine distance between a and b 128 | :param a: tensor 129 | :param b: tensor 130 | :return cosine similarity 131 | """ 132 | normalize_a = tf.nn.l2_normalize(a, -1) 133 | normalize_b = tf.nn.l2_normalize(b, -1) 134 | cos_similarity = tf.reduce_sum( 135 | tf.multiply(normalize_a, normalize_b), axis=-1, keep_dims=True 136 | ) 137 | return (1.0 - cos_similarity) / 2.0 138 | 139 | def get_occlusion_mask_from_rigid_flow(self, rigid_flow): 140 | """Prepare occlusion mask due to rigid motion 141 | :param rigid_flow: Tensor with rigid flow 142 | :return mask: mask of occlusions due to rigid camera motion 143 | """ 144 | with tf.variable_scope("get_occlusion_mask_from_rigid_flow"): 145 | b, h, w, _ = rigid_flow.shape 146 | rigid_flow = tf.stop_gradient(rigid_flow) 147 | mask = bilinear_sampler.flow_warp( 148 | tf.ones([b, h, w, 1], dtype=tf.float32), rigid_flow 149 | ) 150 | mask = tf.clip_by_value(mask, 0.0, 1.0) 151 | return mask 152 | 153 | def build_moving_probability_mask(self, optical_flow, rigid_flow): 154 | """ 155 | Masks of moving objects 156 | If the object is moving, this value should be low. 157 | """ 158 | with tf.variable_scope("build_moving_probability_mask"): 159 | epsylon = 1e-7 160 | optical_flow = tf.stop_gradient(optical_flow) 161 | rigid_flow = tf.stop_gradient(rigid_flow) 162 | normalized_optical_flow = tf.norm( 163 | optical_flow, axis=-1, keep_dims=True, name="optical_flow_norm" 164 | ) 165 | normalized_rigid_flow = tf.norm( 166 | rigid_flow, axis=-1, keep_dims=True, name="rigid_flow_norm" 167 | ) 168 | cosine_distance = self.tf_cosine_distance(optical_flow, rigid_flow) 169 | ratio = ( 170 | epsylon + tf.minimum(normalized_optical_flow, normalized_rigid_flow) 171 | ) / (epsylon + tf.maximum(normalized_optical_flow, normalized_rigid_flow)) 172 | ratio_distance = 1.0 - ratio 173 | moving_probability = tf.maximum(cosine_distance, ratio_distance) 174 | return moving_probability 175 | 176 | def get_network_params(self): 177 | """Load network params. 178 | In particular, OmegaNet relies on DSNet, Camnet and self-distilled OFNet 179 | """ 180 | with tf.variable_scope("get_network_params"): 181 | baseline_vars = self.baselineNet.get_network_params() 182 | reflownet_vars = [ 183 | x for x in tf.trainable_variables() if "superflow" in x.name 184 | ] 185 | return baseline_vars + reflownet_vars 186 | 187 | def build_semantic_masks(self): 188 | """ 189 | Prepare masks based on semantic priors 190 | :return dynamic_tgt_mask: mask of potentially dinamyc objects 191 | :return static_tgt_mask: mask of potentially static objects 192 | """ 193 | with tf.variable_scope("build_semantic_masks"): 194 | dynamic_tgt_mask = self.__priors 195 | static_tgt_mask = 1.0 - dynamic_tgt_mask 196 | return dynamic_tgt_mask, static_tgt_mask 197 | -------------------------------------------------------------------------------- /networks/general_network.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """ 17 | General network, superclass for other networks 18 | """ 19 | 20 | from abc import ABCMeta, abstractmethod 21 | import tensorflow as tf 22 | from helpers.utilities import get_num_classes, get_height_width, colormap_semantic 23 | from collections import namedtuple 24 | 25 | network_parameters = namedtuple( 26 | "network_parameters", "height, width, load_only_baseline, tau", 27 | ) 28 | 29 | 30 | class GeneralNetwork(object): 31 | """Template for other networks 32 | """ 33 | 34 | __metaclass__ = ABCMeta 35 | 36 | def __init__(self, batch, is_training, params): 37 | """ Prepare the network and create the graph""" 38 | self.is_training = is_training 39 | self.classes = get_num_classes() 40 | self.params = params 41 | self.src_img_1 = batch["src_img_1"] 42 | self.tgt_img = batch["tgt_img"] 43 | self.src_img_2 = batch["src_img_2"] 44 | self.h, self.w = get_height_width(self.tgt_img) 45 | self.batch = batch 46 | 47 | def build(self): 48 | """ Build the model and the outputs """ 49 | self.build_network() 50 | self.build_outputs() 51 | 52 | @abstractmethod 53 | def build_network(self): 54 | """ Network specification""" 55 | 56 | @abstractmethod 57 | def build_outputs(self): 58 | """ Output generated by the network. """ 59 | 60 | def build_masks(self): 61 | """ Build masks used in the stage """ 62 | -------------------------------------------------------------------------------- /networks/network_components.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from networks.ops import * 17 | from helpers.bilinear_sampler import * 18 | 19 | NUM_FEATURES = 16 20 | FLOW_SCALING = 0.1 21 | DISP_SCALING = 10.0 22 | MIN_DISP = 0.01 23 | POSE_SCALING = 0.01 24 | 25 | 26 | def feature_extractor(src_img_1, tgt_img, src_img_2, is_training, name=None): 27 | """Features extractor 28 | :param src_img_1: image at time t-1. Tensor with shape [1,H,W,3], dtype=tf.float32 29 | :param tgt_img: image at time t. Tensor with shape [1,H,W,3], dtype=tf.float32 30 | :param src_img_2: image at time t+1. Tensor with shape [1,H,W,3], dtype=tf.float32 31 | :param is_training: training flag. For batchnorm 32 | :param name: name of the extractor. If name is not None, the name will be feature_extractor_NAME 33 | """ 34 | batch_norm_params = {"is_training": is_training} 35 | final_name = "feature_extractor" 36 | if name is not None: 37 | final_name = "{}_{}".format(final_name, name) 38 | with tf.variable_scope(final_name): 39 | pyramid_src_img_1 = build_pyramid( 40 | src_img_1, normalizer_params=batch_norm_params 41 | ) 42 | pyramid_tgt_img = build_pyramid(tgt_img, normalizer_params=batch_norm_params) 43 | pyramid_src_img_2 = build_pyramid( 44 | src_img_2, normalizer_params=batch_norm_params 45 | ) 46 | return pyramid_src_img_1, pyramid_tgt_img, pyramid_src_img_2 47 | 48 | 49 | def CameraNet(features, is_training): 50 | """CameraNet 51 | It estimates both the pose and camera intrinsics. 52 | :param features: list of features from [src1, tgt, src2] 53 | :param is_training: training flag. For batchnorm 54 | 55 | :return pose_final: tensor with shape (1, 2, 6) 56 | :return intrinsics_mat: tensor with shape (1, 1, 3, 3) 57 | """ 58 | with tf.variable_scope("pose_net"): 59 | batch_norm_params = {"is_training": is_training} 60 | 61 | pyramid_src_img_1 = features[0] 62 | pyramid_tgt_img = features[1] 63 | pyramid_src_img_2 = features[2] 64 | input_batch = tf.concat( 65 | [pyramid_src_img_1[4], pyramid_tgt_img[4], pyramid_src_img_2[4]], axis=3 66 | ) 67 | 68 | with tf.variable_scope("conv1_a"): 69 | conv1_a = conv2d( 70 | input_batch, 71 | NUM_FEATURES * 8, 72 | 3, 73 | 1, 74 | normalizer_params=batch_norm_params, 75 | activation_fn=tf.nn.relu, 76 | ) 77 | with tf.variable_scope("conv1_b"): 78 | conv1_b = conv2d( 79 | conv1_a, 80 | NUM_FEATURES * 8, 81 | 3, 82 | 2, 83 | normalizer_params=batch_norm_params, 84 | activation_fn=tf.nn.relu, 85 | ) 86 | with tf.variable_scope("conv2_a"): 87 | conv2_a = conv2d( 88 | conv1_b, 89 | NUM_FEATURES * 16, 90 | 3, 91 | 1, 92 | normalizer_params=batch_norm_params, 93 | activation_fn=tf.nn.relu, 94 | ) 95 | with tf.variable_scope("conv2_b"): 96 | conv2_b = conv2d( 97 | conv2_a, 98 | NUM_FEATURES * 16, 99 | 3, 100 | 2, 101 | normalizer_params=batch_norm_params, 102 | activation_fn=tf.nn.relu, 103 | ) 104 | 105 | # POSE ESTIMATOR 106 | with tf.variable_scope("pred"): 107 | pose_pred = conv2d( 108 | conv2_b, 12, 1, 1, normalizer_fn=None, activation_fn=None 109 | ) 110 | pose_avg = tf.reduce_mean(pose_pred, [1, 2]) 111 | pose_final = POSE_SCALING * tf.reshape(pose_avg, [-1, 2, 6]) 112 | 113 | # INTRINSIC ESTIMATOR 114 | s = tf.shape(pyramid_tgt_img[0]) 115 | h = tf.to_float(s[1]) 116 | w = tf.to_float(s[2]) 117 | intrinsics_mat = _estimate_intrinsics(conv2_b, w, h) 118 | 119 | return pose_final, intrinsics_mat 120 | 121 | 122 | def _estimate_intrinsics(bottleneck, image_width, image_height): 123 | """Estimate intrinsic 124 | :param bottleneck: feature bottleneck tensor 125 | :param image_width: width of the resized image 126 | :param image_height: height of the resized image 127 | 128 | :return intrinsic_mat: tensor with shape (1, 1, 3, 3) 129 | """ 130 | with tf.variable_scope("intrinsics"): 131 | bottleneck = tf.reduce_mean(bottleneck, axis=[1, 2], keepdims=True) 132 | focal_lengths = tf.squeeze( 133 | tf.contrib.layers.conv2d( 134 | bottleneck, 135 | 2, 136 | [1, 1], 137 | stride=1, 138 | activation_fn=tf.nn.softplus, 139 | weights_regularizer=None, 140 | scope="foci", 141 | ), 142 | axis=(1, 2), 143 | ) * tf.to_float(tf.convert_to_tensor([[image_width, image_height]])) 144 | 145 | offsets = ( 146 | tf.squeeze( 147 | tf.contrib.layers.conv2d( 148 | bottleneck, 149 | 2, 150 | [1, 1], 151 | stride=1, 152 | activation_fn=None, 153 | weights_regularizer=None, 154 | biases_initializer=None, 155 | scope="offsets", 156 | ), 157 | axis=(1, 2), 158 | ) 159 | + 0.5 160 | ) * tf.to_float(tf.convert_to_tensor([[image_width, image_height]])) 161 | 162 | foci = tf.linalg.diag(focal_lengths) 163 | intrinsic_mat = tf.concat([foci, tf.expand_dims(offsets, -1)], axis=2) 164 | batch_size = tf.shape(bottleneck)[0] 165 | last_row = tf.tile([[[0.0, 0.0, 1.0]]], [batch_size, 1, 1]) 166 | intrinsic_mat = tf.concat([intrinsic_mat, last_row], axis=1) 167 | intrinsic_mat = tf.expand_dims(intrinsic_mat, axis=1) 168 | return intrinsic_mat 169 | 170 | 171 | def DSNet(pyramid_tgt_img, classes, is_training): 172 | """DSNet 173 | """ 174 | with tf.variable_scope("monocular_depthnet", reuse=tf.AUTO_REUSE): 175 | 176 | batch_norm_params = {"is_training": is_training} 177 | 178 | # SCALE 5 179 | with tf.variable_scope("L5"): 180 | with tf.variable_scope("estimator"): 181 | conv5 = build_estimator( 182 | pyramid_tgt_img[5], normalizer_params=batch_norm_params 183 | ) 184 | with tf.variable_scope("disparity"): 185 | disp5 = get_disp(conv5, normalizer_params=batch_norm_params) 186 | updisp5 = depth_upsampling(disp5, 1) 187 | with tf.variable_scope("upsampler"): 188 | upconv5 = bilinear_upsampling_by_convolution( 189 | conv5, 2, normalizer_params=batch_norm_params 190 | ) 191 | # SCALE 4 192 | with tf.variable_scope("L4"): 193 | with tf.variable_scope("estimator"): 194 | conv4 = build_estimator( 195 | pyramid_tgt_img[4], upconv5, normalizer_params=batch_norm_params 196 | ) 197 | with tf.variable_scope("disparity"): 198 | disp4 = ( 199 | get_disp(conv4, normalizer_params=batch_norm_params) + updisp5[0] 200 | ) 201 | updisp4 = depth_upsampling(disp4, 1) 202 | with tf.variable_scope("upsampler"): 203 | upconv4 = bilinear_upsampling_by_convolution( 204 | conv4, 2, normalizer_params=batch_norm_params 205 | ) 206 | # SCALE 3 207 | with tf.variable_scope("L3"): 208 | with tf.variable_scope("estimator"): 209 | conv3 = build_estimator( 210 | pyramid_tgt_img[3], upconv4, normalizer_params=batch_norm_params 211 | ) 212 | with tf.variable_scope("disparity"): 213 | disp3 = ( 214 | get_disp(conv3, normalizer_params=batch_norm_params) + updisp4[0] 215 | ) 216 | updisp3 = depth_upsampling(disp3, 1) 217 | with tf.variable_scope("upsampler"): 218 | upconv3 = bilinear_upsampling_by_convolution( 219 | conv3, 2, normalizer_params=batch_norm_params 220 | ) 221 | # SCALE 2 222 | with tf.variable_scope("L2"): 223 | with tf.variable_scope("estimator"): 224 | conv2 = build_estimator( 225 | pyramid_tgt_img[2], upconv3, normalizer_params=batch_norm_params 226 | ) 227 | with tf.variable_scope("disparity"): 228 | disp2 = ( 229 | get_disp(conv2, normalizer_params=batch_norm_params) + updisp3[0] 230 | ) 231 | updisp2 = depth_upsampling(disp2, 1) 232 | with tf.variable_scope("upsampler"): 233 | upconv2 = bilinear_upsampling_by_convolution( 234 | conv2, 2, normalizer_params=batch_norm_params 235 | ) 236 | # SCALE 1 237 | with tf.variable_scope("L1"): 238 | with tf.variable_scope("estimator"): 239 | conv1 = build_estimator( 240 | pyramid_tgt_img[1], upconv2, normalizer_params=batch_norm_params 241 | ) 242 | with tf.variable_scope("disparity"): 243 | disp1 = ( 244 | get_disp(conv1, normalizer_params=batch_norm_params) + updisp2[0] 245 | ) 246 | 247 | with tf.variable_scope("semantic"): 248 | sem1 = get_semantic(conv1, classes, normalizer_params=batch_norm_params) 249 | 250 | return [disp1, disp2, disp3, disp4, disp5], sem1 251 | 252 | 253 | def build_pyramid(input_batch, normalizer_params=None, scope="img_pyramid"): 254 | """Pyramidal feature extractor 255 | """ 256 | with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): 257 | features = [] 258 | features.append(input_batch) 259 | 260 | with tf.variable_scope("conv1a"): 261 | conv1a = conv2d( 262 | input_batch, NUM_FEATURES, 3, 2, normalizer_params=normalizer_params 263 | ) 264 | with tf.variable_scope("conv1b"): 265 | conv1b = conv2d( 266 | conv1a, NUM_FEATURES, 3, 1, normalizer_params=normalizer_params 267 | ) 268 | features.append(conv1b) 269 | with tf.variable_scope("conv2a"): 270 | conv2a = conv2d( 271 | conv1b, NUM_FEATURES * 2, 3, 2, normalizer_params=normalizer_params 272 | ) 273 | with tf.variable_scope("conv2b"): 274 | conv2b = conv2d( 275 | conv2a, NUM_FEATURES * 2, 3, 1, normalizer_params=normalizer_params 276 | ) 277 | features.append(conv2b) 278 | with tf.variable_scope("conv3a"): 279 | conv3a = conv2d( 280 | conv2b, NUM_FEATURES * 4, 3, 2, normalizer_params=normalizer_params 281 | ) 282 | with tf.variable_scope("conv3b"): 283 | conv3b = conv2d( 284 | conv3a, NUM_FEATURES * 4, 3, 1, normalizer_params=normalizer_params 285 | ) 286 | features.append(conv3b) 287 | with tf.variable_scope("conv4a"): 288 | conv4a = conv2d( 289 | conv3b, NUM_FEATURES * 8, 3, 2, normalizer_params=normalizer_params 290 | ) 291 | with tf.variable_scope("conv4b"): 292 | conv4b = conv2d( 293 | conv4a, NUM_FEATURES * 8, 3, 1, normalizer_params=normalizer_params 294 | ) 295 | features.append(conv4b) 296 | with tf.variable_scope("conv5a"): 297 | conv5a = conv2d( 298 | conv4b, NUM_FEATURES * 16, 3, 2, normalizer_params=normalizer_params 299 | ) 300 | with tf.variable_scope("conv5b"): 301 | conv5b = conv2d( 302 | conv5a, NUM_FEATURES * 16, 3, 1, normalizer_params=normalizer_params 303 | ) 304 | features.append(conv5b) 305 | return features 306 | 307 | 308 | def build_estimator(features, upsampled_disp=None, normalizer_params=None): 309 | """Single scale estimator 310 | """ 311 | with tf.variable_scope("build_estimator"): 312 | if upsampled_disp is not None: 313 | disp2 = tf.concat([features, upsampled_disp], -1) 314 | else: 315 | disp2 = features 316 | with tf.variable_scope("disp-3"): 317 | disp3 = conv2d( 318 | disp2, NUM_FEATURES * 4, 3, 1, normalizer_params=normalizer_params 319 | ) 320 | with tf.variable_scope("disp-4"): 321 | disp4 = conv2d( 322 | disp3, NUM_FEATURES * 3, 3, 1, normalizer_params=normalizer_params 323 | ) 324 | with tf.variable_scope("disp-5"): 325 | disp5 = conv2d( 326 | disp4, NUM_FEATURES * 2, 3, 1, normalizer_params=normalizer_params 327 | ) 328 | with tf.variable_scope("disp-6"): 329 | disp6 = conv2d( 330 | disp5, NUM_FEATURES, 3, 1, normalizer_params=normalizer_params 331 | ) 332 | return disp6 333 | 334 | 335 | def get_disp(x, normalizer_params=None, rates=[1, 1]): 336 | """Disparity prediction layer 337 | """ 338 | with tf.variable_scope("disparity_estimator"): 339 | with tf.variable_scope("conv1"): 340 | conv1 = conv2d( 341 | x, NUM_FEATURES * 4, 3, 1, normalizer_params=normalizer_params 342 | ) 343 | with tf.variable_scope("conv2"): 344 | conv2 = conv2d( 345 | conv1, 346 | NUM_FEATURES * 2, 347 | 3, 348 | 1, 349 | normalizer_params=normalizer_params, 350 | rate=rates[0], 351 | ) 352 | with tf.variable_scope("conv3"): 353 | conv3 = conv2d( 354 | conv2, 355 | NUM_FEATURES, 356 | 3, 357 | 1, 358 | normalizer_params=normalizer_params, 359 | rate=rates[1], 360 | ) 361 | with tf.variable_scope("disparity"): 362 | disparity = ( 363 | DISP_SCALING 364 | * conv2d( 365 | conv3, 1, 3, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None 366 | ) 367 | + MIN_DISP 368 | ) 369 | return disparity 370 | 371 | 372 | def get_semantic(x, classes, normalizer_params=None, rates=[1, 1]): 373 | """Semantic estimator layer 374 | """ 375 | with tf.variable_scope("semantic_estimator"): 376 | with tf.variable_scope("conv1"): 377 | conv1 = conv2d( 378 | x, NUM_FEATURES * 4, 3, 1, normalizer_params=normalizer_params 379 | ) 380 | with tf.variable_scope("conv2"): 381 | conv2 = conv2d( 382 | conv1, 383 | NUM_FEATURES * 2, 384 | 3, 385 | 1, 386 | normalizer_params=normalizer_params, 387 | rate=rates[0], 388 | ) 389 | with tf.variable_scope("conv3"): 390 | conv3 = conv2d( 391 | conv2, 392 | NUM_FEATURES, 393 | 3, 394 | 1, 395 | normalizer_params=normalizer_params, 396 | rate=rates[1], 397 | ) 398 | with tf.variable_scope("disparity"): 399 | sem = conv2d(conv3, classes, 3, 1, normalizer_params=normalizer_params) 400 | return sem 401 | -------------------------------------------------------------------------------- /networks/ops.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | import tensorflow.contrib.slim as slim 20 | 21 | import tensorflow as tf 22 | 23 | 24 | def upsample_nn(x, ratio): 25 | s = x.get_shape().as_list() 26 | h = s[1] 27 | w = s[2] 28 | return tf.image.resize_nearest_neighbor(x, [h * ratio, w * ratio]) 29 | 30 | 31 | def conv2d( 32 | inputs, 33 | num_outputs, 34 | kernel_size, 35 | stride, 36 | normalizer_fn=slim.batch_norm, 37 | activation_fn=tf.nn.relu, 38 | weights_regularizer=slim.l2_regularizer(0.0001), 39 | normalizer_params=True, 40 | padding=(1, 1), 41 | reflect=True, 42 | rate=1, 43 | ): 44 | 45 | if rate > 1: 46 | w_pad, h_pad = (rate, rate) 47 | else: 48 | w_pad, h_pad = tuple(padding) 49 | 50 | if reflect: 51 | inputs = tf.pad( 52 | inputs, [[0, 0], [h_pad, h_pad], [w_pad, w_pad], [0, 0]], "REFLECT" 53 | ) 54 | 55 | return tf.contrib.layers.conv2d( 56 | inputs, 57 | num_outputs, 58 | kernel_size, 59 | stride, 60 | padding="VALID", 61 | normalizer_fn=normalizer_fn, 62 | activation_fn=activation_fn, 63 | weights_regularizer=weights_regularizer, 64 | normalizer_params=normalizer_params, 65 | rate=rate, 66 | ) 67 | 68 | 69 | def upconv( 70 | inputs, 71 | num_outputs, 72 | kernel_size, 73 | stride, 74 | normalizer_fn=slim.batch_norm, 75 | activation_fn=tf.nn.relu, 76 | weights_regularizer=slim.l2_regularizer(0.0001), 77 | normalizer_params=True, 78 | padding=(1, 1), 79 | ): 80 | upsample = upsample_nn(inputs, stride) 81 | return conv2d( 82 | upsample, 83 | num_outputs, 84 | kernel_size, 85 | 1, 86 | padding=padding, 87 | normalizer_fn=normalizer_fn, 88 | activation_fn=activation_fn, 89 | weights_regularizer=weights_regularizer, 90 | normalizer_params=normalizer_params, 91 | ) 92 | 93 | 94 | def gradient_x(img): 95 | gx = img[:, :, :-1, :] - img[:, :, 1:, :] 96 | return gx 97 | 98 | 99 | def gradient_y(img): 100 | gy = img[:, :-1, :, :] - img[:, 1:, :, :] 101 | return gy 102 | 103 | 104 | def L2_norm(x, axis=3, keepdims=True): 105 | curr_offset = 1e-10 106 | l2_norm = tf.norm(tf.abs(x) + curr_offset, axis=axis, keepdims=keepdims) 107 | return l2_norm 108 | 109 | 110 | def spatial_normalize(disp): 111 | with tf.variable_scope("spatial_normalizer"): 112 | _, curr_h, curr_w, curr_c = disp.get_shape().as_list() 113 | disp_mean = tf.reduce_mean(disp, axis=[1, 2, 3], keepdims=True) 114 | disp_mean = tf.tile(disp_mean, [1, curr_h, curr_w, curr_c]) 115 | return disp / disp_mean 116 | 117 | 118 | def post_process_disparity(disp): 119 | _, h, w = disp.shape 120 | l_disp = disp[0, :, :] 121 | r_disp = np.fliplr(disp[1, :, :]) 122 | m_disp = 0.5 * (l_disp + r_disp) 123 | l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h)) 124 | l_mask = 1.0 - np.clip(20 * (l - 0.05), 0, 1) 125 | r_mask = np.fliplr(l_mask) 126 | return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp 127 | 128 | 129 | def reduce_mean_masked(tensor, mask): 130 | with tf.variable_scope("reduce_mean_masked"): 131 | valid_points = tf.maximum(tf.reduce_sum(mask), 1) 132 | loss = tf.reduce_sum(tensor * mask) / valid_points 133 | return loss 134 | 135 | 136 | def reduce_mean_probability_masked(tensor, mask, probability): 137 | with tf.variable_scope("reduce_mean_masked"): 138 | valid_points = tf.maximum(tf.reduce_sum(mask), 1) 139 | loss = tf.reduce_sum(tensor * mask * probability) / valid_points 140 | return loss 141 | 142 | 143 | # Upsampling layer 144 | def bilinear_upsampling_by_convolution(x, stride, normalizer_params=None): 145 | with tf.variable_scope("bilinear_upsampling_by_convolution"): 146 | f = x.get_shape().as_list()[-1] 147 | return upconv(x, f, 3, stride, normalizer_params=normalizer_params) 148 | 149 | 150 | def depth_upsampling(x, scales): 151 | with tf.variable_scope("depth_upsampling"): 152 | features = [] 153 | for i in range(1, scales + 1): 154 | with tf.variable_scope("upsampler_pred_" + str(i)): 155 | up = tf.image.resize_bilinear( 156 | x, 157 | [ 158 | x.get_shape().as_list()[1] * (2 ** i), 159 | x.get_shape().as_list()[2] * (2 ** i), 160 | ], 161 | ) 162 | features.append(up) 163 | return features 164 | 165 | 166 | def stop_features_gradient(features): 167 | with tf.variable_scope("stop_features_gradient"): 168 | new_features = [] 169 | for img_x_features in features: 170 | new_img_x_features = [] 171 | for feat in img_x_features: 172 | new_img_x_features.append(tf.stop_gradient(feat)) 173 | new_features.append(new_img_x_features) 174 | return new_features 175 | 176 | 177 | def couple_imgs_features(features): 178 | with tf.variable_scope("couple_imgs_features"): 179 | coupled_features = [] 180 | for tgt_feat, src2_feat in zip(features[1], features[2]): 181 | couple_feat = tf.concat([tgt_feat, src2_feat], axis=-1) 182 | coupled_features.append(couple_feat) 183 | return coupled_features 184 | -------------------------------------------------------------------------------- /networks/selflow/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Pengpeng Liu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /networks/selflow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/networks/selflow/__init__.py -------------------------------------------------------------------------------- /networks/selflow/selflow_network.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.contrib import slim 3 | from helpers.utilities import flow_resize 4 | from networks.selflow.warp import tf_warp 5 | 6 | 7 | def lrelu(x, leak=0.2, name="leaky_relu"): 8 | return tf.maximum(x, leak * x) 9 | 10 | 11 | def feature_extractor( 12 | x, 13 | train=True, 14 | trainable=True, 15 | reuse=None, 16 | regularizer=None, 17 | name="feature_extractor", 18 | ): 19 | with tf.variable_scope(name, reuse=reuse, regularizer=regularizer): 20 | with slim.arg_scope( 21 | [slim.conv2d], 22 | activation_fn=lrelu, 23 | kernel_size=3, 24 | padding="SAME", 25 | trainable=trainable, 26 | ): 27 | net = {} 28 | net["conv1_1"] = slim.conv2d(x, 16, stride=2, scope="conv1_1") 29 | net["conv1_2"] = slim.conv2d(net["conv1_1"], 16, stride=1, scope="conv1_2") 30 | 31 | net["conv2_1"] = slim.conv2d(net["conv1_2"], 32, stride=2, scope="conv2_1") 32 | net["conv2_2"] = slim.conv2d(net["conv2_1"], 32, stride=1, scope="conv2_2") 33 | 34 | net["conv3_1"] = slim.conv2d(net["conv2_2"], 64, stride=2, scope="conv3_1") 35 | net["conv3_2"] = slim.conv2d(net["conv3_1"], 64, stride=1, scope="conv3_2") 36 | 37 | net["conv4_1"] = slim.conv2d(net["conv3_2"], 96, stride=2, scope="conv4_1") 38 | net["conv4_2"] = slim.conv2d(net["conv4_1"], 96, stride=1, scope="conv4_2") 39 | 40 | net["conv5_1"] = slim.conv2d(net["conv4_2"], 128, stride=2, scope="conv5_1") 41 | net["conv5_2"] = slim.conv2d(net["conv5_1"], 128, stride=1, scope="conv5_2") 42 | 43 | net["conv6_1"] = slim.conv2d(net["conv5_2"], 192, stride=2, scope="conv6_1") 44 | net["conv6_2"] = slim.conv2d(net["conv6_1"], 192, stride=1, scope="conv6_2") 45 | 46 | return net 47 | 48 | 49 | def context_network( 50 | x, 51 | flow, 52 | train=True, 53 | trainable=True, 54 | reuse=None, 55 | regularizer=None, 56 | name="context_network", 57 | ): 58 | x_input = tf.concat([x, flow], axis=-1) 59 | with tf.variable_scope(name, reuse=reuse, regularizer=regularizer): 60 | with slim.arg_scope( 61 | [slim.conv2d], 62 | activation_fn=lrelu, 63 | kernel_size=3, 64 | padding="SAME", 65 | trainable=trainable, 66 | ): 67 | net = {} 68 | net["dilated_conv1"] = slim.conv2d( 69 | x_input, 128, rate=1, scope="dilated_conv1" 70 | ) 71 | net["dilated_conv2"] = slim.conv2d( 72 | net["dilated_conv1"], 128, rate=2, scope="dilated_conv2" 73 | ) 74 | net["dilated_conv3"] = slim.conv2d( 75 | net["dilated_conv2"], 128, rate=4, scope="dilated_conv3" 76 | ) 77 | net["dilated_conv4"] = slim.conv2d( 78 | net["dilated_conv3"], 96, rate=8, scope="dilated_conv4" 79 | ) 80 | net["dilated_conv5"] = slim.conv2d( 81 | net["dilated_conv4"], 64, rate=16, scope="dilated_conv5" 82 | ) 83 | net["dilated_conv6"] = slim.conv2d( 84 | net["dilated_conv5"], 32, rate=1, scope="dilated_conv6" 85 | ) 86 | net["dilated_conv7"] = slim.conv2d( 87 | net["dilated_conv6"], 88 | 2, 89 | rate=1, 90 | activation_fn=None, 91 | scope="dilated_conv7", 92 | ) 93 | 94 | refined_flow = net["dilated_conv7"] 95 | return refined_flow 96 | 97 | 98 | def estimator_network( 99 | x1, 100 | cost_volume, 101 | flow, 102 | train=True, 103 | trainable=True, 104 | reuse=None, 105 | regularizer=None, 106 | name="estimator", 107 | ): 108 | net_input = tf.concat([cost_volume, x1, flow], axis=-1) 109 | with tf.variable_scope(name, reuse=reuse, regularizer=regularizer): 110 | with slim.arg_scope( 111 | [slim.conv2d], 112 | activation_fn=lrelu, 113 | kernel_size=3, 114 | padding="SAME", 115 | trainable=trainable, 116 | ): 117 | net = {} 118 | net["conv1"] = slim.conv2d(net_input, 128, scope="conv1") 119 | net["conv2"] = slim.conv2d(net["conv1"], 128, scope="conv2") 120 | net["conv3"] = slim.conv2d(net["conv2"], 96, scope="conv3") 121 | net["conv4"] = slim.conv2d(net["conv3"], 64, scope="conv4") 122 | net["conv5"] = slim.conv2d(net["conv4"], 32, scope="conv5") 123 | net["conv6"] = slim.conv2d( 124 | net["conv5"], 2, activation_fn=None, scope="conv6" 125 | ) 126 | 127 | return net 128 | 129 | 130 | def compute_cost_volume(x1, x2, H, W, channel, d=9): 131 | x1 = tf.nn.l2_normalize(x1, axis=3) 132 | x2 = tf.nn.l2_normalize(x2, axis=3) 133 | 134 | x2_patches = tf.extract_image_patches( 135 | x2, [1, d, d, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding="SAME" 136 | ) 137 | x2_patches = tf.reshape(x2_patches, [-1, H, W, d, d, channel]) 138 | x1_reshape = tf.reshape(x1, [-1, H, W, 1, 1, channel]) 139 | x1_dot_x2 = tf.multiply(x1_reshape, x2_patches) 140 | 141 | cost_volume = tf.reduce_sum(x1_dot_x2, axis=-1) 142 | # cost_volume = tf.reduce_mean(x1_dot_x2, axis=-1) 143 | cost_volume = tf.reshape(cost_volume, [-1, H, W, d * d]) 144 | return cost_volume 145 | 146 | 147 | def estimator( 148 | x0, 149 | x1, 150 | x2, 151 | flow_fw, 152 | flow_bw, 153 | train=True, 154 | trainable=True, 155 | reuse=None, 156 | regularizer=None, 157 | name="estimator", 158 | ): 159 | # warp x2 according to flow 160 | if train: 161 | x_shape = x1.get_shape().as_list() 162 | else: 163 | x_shape = tf.shape(x1) 164 | H = x_shape[1] 165 | W = x_shape[2] 166 | channel = x_shape[3] 167 | x2_warp = tf_warp(x2, flow_fw, H, W) 168 | x0_warp = tf_warp(x0, flow_bw, H, W) 169 | 170 | # ---------------cost volume----------------- 171 | 172 | cost_volume_fw = compute_cost_volume(x1, x2_warp, H, W, channel, d=9) 173 | cost_volume_bw = compute_cost_volume(x1, x0_warp, H, W, channel, d=9) 174 | 175 | cv_concat_fw = tf.concat([cost_volume_fw, cost_volume_bw], -1) 176 | cv_concat_bw = tf.concat([cost_volume_bw, cost_volume_fw], -1) 177 | 178 | flow_concat_fw = tf.concat([flow_fw, -flow_bw], -1) 179 | flow_concat_bw = tf.concat([flow_bw, -flow_fw], -1) 180 | 181 | net_fw = estimator_network( 182 | x1, 183 | cv_concat_fw, 184 | flow_concat_fw, 185 | train=train, 186 | trainable=trainable, 187 | reuse=reuse, 188 | regularizer=regularizer, 189 | name=name, 190 | ) 191 | net_bw = estimator_network( 192 | x1, 193 | cv_concat_bw, 194 | flow_concat_bw, 195 | train=train, 196 | trainable=trainable, 197 | reuse=True, 198 | regularizer=regularizer, 199 | name=name, 200 | ) 201 | 202 | return net_fw, net_bw 203 | 204 | 205 | def pyramid_processing_three_frame( 206 | shape, 207 | src1_features, 208 | tgt_features, 209 | src2_features, 210 | train=True, 211 | trainable=True, 212 | reuse=None, 213 | regularizer=None, 214 | is_scale=True, 215 | ): 216 | x_shape = tf.shape(tgt_features["conv6_2"]) 217 | initial_flow_fw = tf.zeros( 218 | [x_shape[0], x_shape[1], x_shape[2], 2], 219 | dtype=tf.float32, 220 | name="initial_flow_fw", 221 | ) 222 | initial_flow_bw = tf.zeros( 223 | [x_shape[0], x_shape[1], x_shape[2], 2], 224 | dtype=tf.float32, 225 | name="initial_flow_bw", 226 | ) 227 | flow_fw = {} 228 | flow_bw = {} 229 | net_fw, net_bw = estimator( 230 | src1_features["conv6_2"], 231 | tgt_features["conv6_2"], 232 | src2_features["conv6_2"], 233 | initial_flow_fw, 234 | initial_flow_bw, 235 | train=train, 236 | trainable=trainable, 237 | reuse=reuse, 238 | regularizer=regularizer, 239 | name="estimator_level_6", 240 | ) 241 | flow_fw["level_6"] = net_fw["conv6"] 242 | flow_bw["level_6"] = net_bw["conv6"] 243 | 244 | for i in range(4): 245 | feature_name = "conv%d_2" % (5 - i) 246 | level = "level_%d" % (5 - i) 247 | feature_size = tf.shape(tgt_features[feature_name])[1:3] 248 | 249 | initial_flow_fw = flow_resize( 250 | flow_fw["level_%d" % (6 - i)], feature_size, is_scale=is_scale 251 | ) 252 | initial_flow_bw = flow_resize( 253 | flow_bw["level_%d" % (6 - i)], feature_size, is_scale=is_scale 254 | ) 255 | 256 | net_fw, net_bw = estimator( 257 | src1_features[feature_name], 258 | tgt_features[feature_name], 259 | src2_features[feature_name], 260 | initial_flow_fw, 261 | initial_flow_bw, 262 | train=train, 263 | trainable=trainable, 264 | reuse=reuse, 265 | regularizer=regularizer, 266 | name="estimator_level_%d" % (5 - i), 267 | ) 268 | flow_fw[level] = net_fw["conv6"] 269 | flow_bw[level] = net_bw["conv6"] 270 | 271 | flow_concat_fw = tf.concat([flow_fw["level_2"], -flow_bw["level_2"]], -1) 272 | flow_concat_bw = tf.concat([flow_bw["level_2"], -flow_fw["level_2"]], -1) 273 | 274 | x_feature = tf.concat([net_fw["conv5"], net_bw["conv5"]], axis=-1) 275 | flow_fw["refined"] = context_network( 276 | x_feature, 277 | flow_concat_fw, 278 | train=train, 279 | trainable=trainable, 280 | reuse=reuse, 281 | regularizer=regularizer, 282 | name="context_network", 283 | ) 284 | flow_size = shape[1:3] 285 | flow_fw["full_res"] = flow_resize(flow_fw["refined"], flow_size, is_scale=is_scale) 286 | 287 | x_feature = tf.concat([net_bw["conv5"], net_fw["conv5"]], axis=-1) 288 | flow_bw["refined"] = context_network( 289 | x_feature, 290 | flow_concat_bw, 291 | train=train, 292 | trainable=trainable, 293 | reuse=True, 294 | regularizer=regularizer, 295 | name="context_network", 296 | ) 297 | flow_bw["full_res"] = flow_resize(flow_bw["refined"], flow_size, is_scale=is_scale) 298 | 299 | return flow_fw, flow_bw 300 | 301 | 302 | def flownet( 303 | shape, 304 | src1, 305 | tgt, 306 | src2, 307 | train=True, 308 | trainable=True, 309 | reuse=None, 310 | regularizer=None, 311 | is_scale=True, 312 | scope="flownet", 313 | ): 314 | """ Get the flow 315 | Returns: 316 | forward flow between tgt and src2, backward flow between tgt and src1 317 | Both flows are tgt aligned 318 | """ 319 | with tf.variable_scope(scope, reuse=reuse): 320 | src1_features = feature_extractor( 321 | src1, 322 | train=train, 323 | trainable=trainable, 324 | reuse=reuse, 325 | regularizer=regularizer, 326 | name="feature_extractor", 327 | ) 328 | tgt_features = feature_extractor( 329 | tgt, 330 | train=train, 331 | trainable=trainable, 332 | reuse=True, 333 | regularizer=regularizer, 334 | name="feature_extractor", 335 | ) 336 | src2_features = feature_extractor( 337 | src2, 338 | train=train, 339 | trainable=trainable, 340 | reuse=True, 341 | regularizer=regularizer, 342 | name="feature_extractor", 343 | ) 344 | 345 | flow_src2_tgt, flow_src1_tgt = pyramid_processing_three_frame( 346 | shape, 347 | src1_features, 348 | tgt_features, 349 | src2_features, 350 | train=train, 351 | trainable=trainable, 352 | reuse=reuse, 353 | regularizer=regularizer, 354 | is_scale=is_scale, 355 | ) 356 | 357 | return flow_src2_tgt["full_res"], flow_src1_tgt["full_res"] 358 | -------------------------------------------------------------------------------- /networks/selflow/warp.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def get_pixel_value(img, x, y): 5 | """ 6 | Utility function to get pixel value for coordinate 7 | vectors x and y from a 4D tensor image. 8 | Input 9 | ----- 10 | - img: tensor of shape (B, H, W, C) 11 | - x: flattened tensor of shape (B*H*W, ) 12 | - y: flattened tensor of shape (B*H*W, ) 13 | Returns 14 | ------- 15 | - output: tensor of shape (B, H, W, C) 16 | """ 17 | shape = tf.shape(x) 18 | batch_size = shape[0] 19 | height = shape[1] 20 | width = shape[2] 21 | 22 | batch_idx = tf.range(0, batch_size) 23 | batch_idx = tf.reshape(batch_idx, (batch_size, 1, 1)) 24 | b = tf.tile(batch_idx, (1, height, width)) 25 | 26 | indices = tf.stack([b, y, x], 3) 27 | 28 | return tf.gather_nd(img, indices) 29 | 30 | 31 | def tf_warp(img, flow, H, W): 32 | # H = 256 33 | # W = 256 34 | x, y = tf.meshgrid(tf.range(W), tf.range(H)) 35 | x = tf.expand_dims(x, 0) 36 | x = tf.expand_dims(x, -1) 37 | 38 | y = tf.expand_dims(y, 0) 39 | y = tf.expand_dims(y, -1) 40 | 41 | x = tf.cast(x, tf.float32) 42 | y = tf.cast(y, tf.float32) 43 | grid = tf.concat([x, y], axis=-1) 44 | # print grid.shape 45 | flows = grid + flow 46 | # print(flows.shape) 47 | max_y = tf.cast(H - 1, tf.int32) 48 | max_x = tf.cast(W - 1, tf.int32) 49 | zero = tf.zeros([], dtype=tf.int32) 50 | 51 | x = flows[:, :, :, 0] 52 | y = flows[:, :, :, 1] 53 | x0 = x 54 | y0 = y 55 | x0 = tf.cast(x0, tf.int32) 56 | x1 = x0 + 1 57 | y0 = tf.cast(y0, tf.int32) 58 | y1 = y0 + 1 59 | 60 | # clip to range [0, H/W] to not violate img boundaries 61 | x0 = tf.clip_by_value(x0, zero, max_x) 62 | x1 = tf.clip_by_value(x1, zero, max_x) 63 | y0 = tf.clip_by_value(y0, zero, max_y) 64 | y1 = tf.clip_by_value(y1, zero, max_y) 65 | 66 | # get pixel value at corner coords 67 | Ia = get_pixel_value(img, x0, y0) 68 | Ib = get_pixel_value(img, x0, y1) 69 | Ic = get_pixel_value(img, x1, y0) 70 | Id = get_pixel_value(img, x1, y1) 71 | 72 | # recast as float for delta calculation 73 | x0 = tf.cast(x0, tf.float32) 74 | x1 = tf.cast(x1, tf.float32) 75 | y0 = tf.cast(y0, tf.float32) 76 | y1 = tf.cast(y1, tf.float32) 77 | 78 | # calculate deltas 79 | wa = (x1 - x) * (y1 - y) 80 | wb = (x1 - x) * (y - y0) 81 | wc = (x - x0) * (y1 - y) 82 | wd = (x - x0) * (y - y0) 83 | 84 | # add dimension for addition 85 | wa = tf.expand_dims(wa, axis=3) 86 | wb = tf.expand_dims(wb, axis=3) 87 | wc = tf.expand_dims(wc, axis=3) 88 | wd = tf.expand_dims(wd, axis=3) 89 | 90 | # compute output 91 | out = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id]) 92 | return out 93 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas==0.24.2 2 | tqdm==4.36.1 3 | opencv-python==4.2.0.34 4 | matplotlib==3.0.3 5 | numpy==1.16.4 6 | tensorflow-gpu==1.8.0 7 | Pillow==6.1.0 8 | pypng==0.0.20 9 | pfm==0.6.0 10 | scipy==1.1.0 -------------------------------------------------------------------------------- /single_inference.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """ 17 | Run OmegaNet in a one-shot way: 18 | Given a single tgt image or three images, we run OmegaNet to get the results 19 | for a set of tasks. 20 | At the end, colored images will be saved in the destinatio folder. 21 | """ 22 | from __future__ import division 23 | import tensorflow as tf 24 | import cv2 25 | import numpy as np 26 | import os 27 | import argparse 28 | import matplotlib.pyplot as plt 29 | from helpers import utilities 30 | from helpers.flow_tool import flowlib 31 | from networks import complete_network 32 | from networks import general_network 33 | from tensorflow.python.util import deprecation 34 | 35 | # disable future warnings and info messages for this demo 36 | deprecation._PRINT_DEPRECATION_WARNINGS = False 37 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" 38 | 39 | 40 | parser = argparse.ArgumentParser(description="Single shot estimation") 41 | parser.add_argument("--tgt", type=str, help="path to t0 RGB image", required=True) 42 | parser.add_argument( 43 | "--src1", 44 | type=str, 45 | help="path to src_1 RGB image (required in case of optical flow)", 46 | default=None, 47 | ) 48 | parser.add_argument( 49 | "--src2", 50 | type=str, 51 | help="path to src_2 RGB image (required in case of optical flow)", 52 | default=None, 53 | ) 54 | parser.add_argument( 55 | "--tasks", 56 | nargs="+", 57 | type=str, 58 | help="tasks to perform", 59 | default=["inverse_depth", "flow", "semantic", "motion_mask"], 60 | ) 61 | parser.add_argument( 62 | "--ckpt", type=str, help="path to complete omeganet checkpoint", required=True 63 | ) 64 | parser.add_argument("--height", type=int, help="height of resized image", default=192) 65 | parser.add_argument("--width", type=int, help="width of resized image", default=640) 66 | parser.add_argument( 67 | "--tau", 68 | type=float, 69 | help="tau threshold in the paper. For motion segmentation at testing time", 70 | default=0.5, 71 | ) 72 | 73 | parser.add_argument("--dest", type=str, help="where save results", default="./results") 74 | parser.add_argument("--cpu", action="store_true", help="run on cpu") 75 | 76 | opts = parser.parse_args() 77 | 78 | if opts.cpu: 79 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 80 | 81 | 82 | def prepare_input(): 83 | """Prepare input for the network 84 | :return src1: src1 image, resized at opts.height x opts.width 85 | :return src1: tgt image, resized at opts.height x opts.width 86 | :return src1: src2 image, resized at opts.height x opts.width 87 | :return original_tgt: original tgt image, not resize. For motion mask blending 88 | :return height: height of original image 89 | :return width: width of the original image 90 | In case of single depth or semantic, src1 and src2 are equal to tgt 91 | """ 92 | 93 | expected_more_images = False 94 | 95 | if not os.path.isfile(opts.tgt): 96 | raise ValueError("Cannot find tgt image:{}".format(opts.tgt)) 97 | 98 | if "flow" in opts.tasks or "motion_mask" in opts.tasks: 99 | if opts.src1 is None or opts.src2 is None: 100 | raise ValueError( 101 | "Expected src1 and src2 for optical flow and motion estimation, but are None" 102 | ) 103 | if not os.path.isfile(opts.src1): 104 | raise ValueError("Image src1 not found") 105 | if not os.path.isfile(opts.src2): 106 | raise ValueError("Image src2 not found") 107 | expected_more_images = True 108 | else: 109 | if not os.path.isfile(opts.tgt): 110 | raise ValueError("Cannot find tgt:{}".format(opts.tgt)) 111 | if opts.dest is not None: 112 | utilities.create_dir(opts.dest) 113 | 114 | tgt = cv2.imread(opts.tgt) 115 | tgt = cv2.cvtColor(tgt, cv2.COLOR_BGR2RGB) 116 | original_tgt = None 117 | if "motion_mask" in opts.tasks: 118 | original_tgt = tgt 119 | 120 | tgt = tgt / 255.0 121 | 122 | if expected_more_images: 123 | src1 = cv2.imread(opts.src1) 124 | src1 = cv2.cvtColor(src1, cv2.COLOR_BGR2RGB) 125 | src1 = src1 / 255.0 126 | 127 | if src1.shape != tgt.shape: 128 | raise ValueError("tgt and src1 have different shapes") 129 | 130 | src2 = cv2.imread(opts.src2) 131 | src2 = cv2.cvtColor(src2, cv2.COLOR_BGR2RGB) 132 | src2 = src2 / 255.0 133 | 134 | if src2.shape != tgt.shape: 135 | raise ValueError("tgt and src2 have different shapes") 136 | 137 | else: 138 | # NOTE: in case of src1 and src2 are useless, 139 | # we feed the tensor_src1 and tensor_src2 placeholders 140 | # with tgt one 141 | src1 = tgt 142 | src2 = tgt 143 | 144 | height, width = tgt.shape[0:2] 145 | 146 | src1 = cv2.resize(src1, (opts.width, opts.height)) 147 | tgt = cv2.resize(tgt, (opts.width, opts.height)) 148 | src2 = cv2.resize(src2, (opts.width, opts.height)) 149 | 150 | src1 = np.expand_dims(src1, 0).astype(np.float32) 151 | tgt = np.expand_dims(tgt, 0).astype(np.float32) 152 | src2 = np.expand_dims(src2, 0).astype(np.float32) 153 | return src1, tgt, src2, original_tgt, height, width 154 | 155 | 156 | def main(_): 157 | """Run the inference 158 | """ 159 | model_exists = utilities.check_model_exists(opts.ckpt) 160 | if not model_exists: 161 | raise ValueError("Model not found") 162 | src1, tgt, src2, original_tgt, height, width = prepare_input() 163 | output_tensors = [] 164 | 165 | print(" [*] Session creation: SUCCESS") 166 | config = tf.ConfigProto(allow_soft_placement=True) 167 | sess = tf.Session(config=config) 168 | 169 | training_flag = tf.placeholder(tf.bool) 170 | 171 | tensor_src1 = tf.placeholder( 172 | tf.float32, shape=(1, opts.height, opts.width, 3), name="src1" 173 | ) 174 | tensor_tgt = tf.placeholder( 175 | tf.float32, shape=(1, opts.height, opts.width, 3), name="tgt" 176 | ) 177 | tensor_src2 = tf.placeholder( 178 | tf.float32, shape=(1, opts.height, opts.width, 3), name="src2" 179 | ) 180 | batch = {"src_img_1": tensor_src1, "tgt_img": tensor_tgt, "src_img_2": tensor_src2} 181 | 182 | network_params = general_network.network_parameters( 183 | height=opts.height, width=opts.width, load_only_baseline=False, tau=opts.tau, 184 | ) 185 | network = complete_network.OmegaNet( 186 | batch, is_training=training_flag, params=network_params 187 | ) 188 | network.build() 189 | var_list = network.get_network_params() 190 | saver = tf.train.Saver(var_list=var_list) 191 | 192 | init_op = tf.group( 193 | tf.global_variables_initializer(), tf.local_variables_initializer() 194 | ) 195 | sess.run(init_op) 196 | coordinator = tf.train.Coordinator() 197 | threads = tf.train.start_queue_runners(sess=sess, coord=coordinator) 198 | 199 | saver.restore(sess, opts.ckpt) 200 | print(" [*] Load model: SUCCESS") 201 | 202 | index = 0 203 | output_mapping = {} 204 | 205 | if "inverse_depth" in opts.tasks: 206 | inverse_depth = tf.image.resize_images(network.disp, [height, width]) 207 | output_tensors.append(inverse_depth) 208 | output_mapping[index] = "inverse_depth" 209 | index += 1 210 | 211 | if "semantic" in opts.tasks: 212 | semantic = network.prepare_semantic( 213 | network.semantic_logits, height=height, width=width 214 | ) 215 | output_tensors.append(semantic) 216 | output_mapping[index] = "semantic" 217 | index += 1 218 | 219 | if "flow" in opts.tasks: 220 | optical_flow = tf.image.resize_images(network.optical_flow, [height, width]) 221 | output_tensors.append(optical_flow) 222 | output_mapping[index] = "flow" 223 | index += 1 224 | 225 | if "motion_mask" in opts.tasks: 226 | motion_mask = tf.image.resize_images( 227 | network.motion_mask, 228 | [height, width], 229 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, 230 | ) 231 | output_tensors.append(motion_mask) 232 | output_mapping[index] = "motion_mask" 233 | index += 1 234 | 235 | results = sess.run( 236 | output_tensors, 237 | feed_dict={ 238 | training_flag: False, 239 | tensor_src1: src1, 240 | tensor_tgt: tgt, 241 | tensor_src2: src2, 242 | }, 243 | ) 244 | 245 | name = os.path.basename(opts.tgt) 246 | extension = name.split(".")[-1] 247 | name = name.replace(extension, "png") 248 | dest = os.path.join(opts.dest, "{}" + name) 249 | 250 | for index, output in enumerate(results): 251 | output = output.squeeze() 252 | task = output_mapping[index] 253 | 254 | if task == "inverse_depth": 255 | plt.imsave( 256 | dest.format("inverse_depth_"), output, cmap="magma", 257 | ) 258 | 259 | if task == "flow": 260 | scaling_w = width / opts.width 261 | scaling_h = height / opts.height 262 | output *= np.tile( 263 | np.array((scaling_w, scaling_h), dtype=np.float32), (height, width, 1) 264 | ) 265 | flow_as_img = flowlib.flow_to_image(output) 266 | flow_as_img = cv2.cvtColor(flow_as_img, cv2.COLOR_RGB2BGR) 267 | cv2.imwrite(dest.format("flow_"), flow_as_img) 268 | 269 | if task == "semantic": 270 | colored_semantic_map = utilities.color_semantic(output) 271 | colored_semantic = cv2.cvtColor( 272 | colored_semantic_map.astype(np.uint8), cv2.COLOR_RGB2BGR 273 | ) 274 | cv2.imwrite(dest.format("semantic_"), colored_semantic) 275 | 276 | if task == "motion_mask": 277 | colored_motion_mask = utilities.color_motion_mask(output) 278 | blended_image = cv2.addWeighted( 279 | colored_motion_mask, 0.9, original_tgt, 0.8, 0.0, 280 | ) 281 | blended_image = cv2.cvtColor( 282 | blended_image.astype(np.uint8), cv2.COLOR_BGR2RGB 283 | ) 284 | cv2.imwrite(dest.format("moving_objects_"), blended_image) 285 | 286 | print("{} outputs have been produced in {} folder".format(index + 1, opts.dest)) 287 | sess.close() 288 | coordinator.request_stop() 289 | coordinator.join(threads) 290 | 291 | 292 | if __name__ == "__main__": 293 | tf.app.run() 294 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """ 17 | Test your network on a specific task 18 | """ 19 | 20 | import argparse 21 | import tensorflow as tf 22 | import numpy as np 23 | import os 24 | from dataloaders import factory as dataloader_factory 25 | from dataloaders.general_dataloader import dataloader_parameters 26 | from testers import factory as tester_factory 27 | from tensorflow.python.util import deprecation 28 | from networks import general_network 29 | from networks import complete_network 30 | from helpers import utilities 31 | 32 | # disable future warnings and info messages for this demo 33 | deprecation._PRINT_DEPRECATION_WARNINGS = False 34 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" 35 | 36 | parser = argparse.ArgumentParser(description="Test your network") 37 | 38 | parser.add_argument( 39 | "--task", 40 | type=str, 41 | default="depth", 42 | help="task to test", 43 | choices=["depth", "semantic", "flow", "mask"], 44 | ) 45 | parser.add_argument("--datapath", type=str, help="path to data", required=True) 46 | parser.add_argument("--ckpt", type=str, help="path to checkpoint", required=True) 47 | parser.add_argument( 48 | "--filenames_file", 49 | type=str, 50 | help="path to filenames file", 51 | default="filenames/eigen_test.txt", 52 | ) 53 | parser.add_argument("--height", type=int, help="height of resized image", default=192) 54 | parser.add_argument("--width", type=int, help="width of resized image", default=640) 55 | parser.add_argument( 56 | "--dest", type=str, help="where save artifacts", default="./artifacts" 57 | ) 58 | parser.add_argument( 59 | "--load_only_baseline", 60 | action="store_true", 61 | help="if set, load only Baseline (CameraNet+DSNet). Otherwise, full OmegaNet will be loaded", 62 | ) 63 | parser.add_argument( 64 | "--cpu", help="the network runs on CPU if enabled", action="store_true" 65 | ) 66 | parser.add_argument( 67 | "--tau", 68 | type=float, 69 | help="tau threshold in the paper. For motion segmentation at testing time", 70 | default=0.5, 71 | ) 72 | 73 | args = parser.parse_args() 74 | 75 | 76 | if args.cpu: 77 | os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 78 | 79 | 80 | def configure_parameters(): 81 | """Prepare configurations for Network, Dataloader and Tester 82 | :return network_params: configuration for Network 83 | :return dataloader_params: configuration for Dataloader 84 | :return testing_params: configuration for Tester 85 | """ 86 | network_params = general_network.network_parameters( 87 | height=args.height, 88 | width=args.width, 89 | load_only_baseline=args.load_only_baseline, 90 | tau=args.tau, 91 | ) 92 | 93 | dataloader_params = dataloader_parameters( 94 | height=args.height, width=args.width, task=args.task 95 | ) 96 | 97 | testing_params = tester_factory.tester_parameters( 98 | output_path=args.dest, 99 | checkpoint_path=args.ckpt, 100 | width=args.width, 101 | height=args.height, 102 | filenames_file=args.filenames_file, 103 | datapath=args.datapath, 104 | ) 105 | 106 | return network_params, dataloader_params, testing_params 107 | 108 | 109 | def configure_network(network_params, dataloader_params): 110 | """Build the Dataloader, then build the Network. 111 | :param network_params: configuration for Network 112 | :param dataloader_params: configuration for Dataloader 113 | :return network: built Network 114 | :return dataloader: built Dataloader 115 | :return training_flag: bool placeholder. For Batchnorm 116 | 117 | """ 118 | training_flag = tf.placeholder(tf.bool) 119 | dataloader = dataloader_factory.get_dataloader(args.task)( 120 | datapath=args.datapath, 121 | filenames_file=args.filenames_file, 122 | params=dataloader_params, 123 | ) 124 | batch = dataloader.get_next_batch() 125 | network = complete_network.OmegaNet( 126 | batch, is_training=training_flag, params=network_params 127 | ) 128 | 129 | network.build() 130 | return network, dataloader, training_flag 131 | 132 | 133 | def main(_): 134 | """Create the Dataloader, the Network and the Tester. 135 | Then, run the Tester. 136 | :raise ValueError: if model does not exist 137 | """ 138 | model_exists = utilities.check_model_exists(args.ckpt) 139 | if not model_exists: 140 | raise ValueError("Model not found") 141 | network_params, dataloader_params, testing_params = configure_parameters() 142 | network, dataloader, training_flag = configure_network( 143 | network_params, dataloader_params 144 | ) 145 | 146 | tester = tester_factory.get_tester(args.task)(testing_params) 147 | tester.test(network, dataloader, training_flag) 148 | 149 | 150 | if __name__ == "__main__": 151 | tf.app.run() 152 | -------------------------------------------------------------------------------- /testers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/testers/__init__.py -------------------------------------------------------------------------------- /testers/error_tester.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from testers import general_tester 17 | 18 | 19 | class Tester(general_tester.GeneralTester): 20 | """Error tester. If selected, it means that 21 | no valid Tester exist for that dataset/task 22 | association. 23 | """ 24 | 25 | def test(self, network, dataloader): 26 | """This component has to raise ValueError, because 27 | that dataset/task association is not admitted. 28 | :param network: built Network 29 | :param dataloader: built Dataloader 30 | :raise ValueError: No testing for task are available for the selected dataset 31 | """ 32 | raise ValueError("No testing for task are available for the selected dataset") 33 | -------------------------------------------------------------------------------- /testers/factory.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """ 17 | Factory for testers 18 | """ 19 | 20 | import tensorflow as tf 21 | import numpy as np 22 | from testers import kitti_depth, kitti_flow, kitti_semantic, kitti_mask, error_tester 23 | from collections import namedtuple 24 | 25 | tester_parameters = namedtuple( 26 | "tester_parameters", 27 | "output_path " "checkpoint_path, " "width," "height," "filenames_file," "datapath", 28 | ) 29 | 30 | TESTER_KITTI_FACTORY = { 31 | "depth": kitti_depth.Tester, 32 | "flow": kitti_flow.Tester, 33 | "semantic": kitti_semantic.Tester, 34 | "mask": kitti_mask.Tester, 35 | } 36 | 37 | 38 | def get_tester(task): 39 | """Select best Tester given a tast and a dataset 40 | If no Tester is available for that task on 41 | the selected Dataset (ie, depth for CS), then 42 | an ErrorTester is returned. 43 | :param task: task to perform 44 | """ 45 | assert task in TESTER_KITTI_FACTORY 46 | return TESTER_KITTI_FACTORY[task] 47 | -------------------------------------------------------------------------------- /testers/general_tester.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from abc import ABCMeta, abstractmethod 17 | 18 | 19 | class GeneralTester(object): 20 | """Template class for Testers 21 | """ 22 | 23 | __metaclass__ = ABCMeta 24 | 25 | def __init__(self, params): 26 | self.params = params 27 | with open(params.filenames_file, "r") as f: 28 | self.samples = f.readlines() 29 | self.num_test_samples = len(self.samples) 30 | 31 | @abstractmethod 32 | def test(self, network, dataloader, training_flag): 33 | """Principal method of the class. 34 | Start artifact generation. 35 | :param network: neural network to run 36 | :param dataloader: tf.dataloader that loads images from the file system 37 | :param training_flag: training flag bool. For Batchnorm 38 | """ 39 | pass 40 | -------------------------------------------------------------------------------- /testers/kitti_depth.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """ Tester for KITTI depth 17 | """ 18 | import os 19 | import tensorflow as tf 20 | import cv2 21 | import numpy as np 22 | from testers.general_tester import GeneralTester 23 | from helpers import utilities 24 | from tqdm import tqdm 25 | 26 | 27 | class Tester(GeneralTester): 28 | """KITTI Depth Tester. 29 | It produces depth artifacts for the KITTI dataset 30 | """ 31 | 32 | def prepare(self): 33 | """Create output folders 34 | """ 35 | dest = os.path.join(self.params.output_path, "depth") 36 | utilities.create_dir(dest) 37 | 38 | def test(self, network, dataloader, training_flag): 39 | """Test KITTI depth 40 | It produces in the params.output_path folder the depth 41 | artifacts. 42 | :param network: network to test 43 | :param dataloader: dataloader for this test 44 | :param training_flag: training_flag for Batchnorm 45 | 46 | 47 | """ 48 | 49 | config = tf.ConfigProto(allow_soft_placement=True) 50 | sess = tf.Session(config=config) 51 | 52 | self.prepare() 53 | 54 | var_list = network.get_network_params() 55 | saver = tf.train.Saver(var_list=var_list) 56 | 57 | init_op = tf.group( 58 | tf.global_variables_initializer(), tf.local_variables_initializer() 59 | ) 60 | sess.run(init_op) 61 | 62 | coordinator = tf.train.Coordinator() 63 | threads = tf.train.start_queue_runners(sess=sess, coord=coordinator) 64 | 65 | saver.restore(sess, self.params.checkpoint_path) 66 | print(" [*] Load model: SUCCESS") 67 | 68 | prediction_disp = tf.image.resize_images( 69 | network.disp, [dataloader.image_h, dataloader.image_w] 70 | ) 71 | 72 | print(" [*] Start depth artifacts generation") 73 | with tqdm(total=self.num_test_samples) as pbar: 74 | for step in range(self.num_test_samples): 75 | ops = [prediction_disp] 76 | outputs = sess.run(ops, feed_dict={training_flag: False}) 77 | name_disp = self.get_name(step) 78 | inverse_depth = outputs[0].squeeze() 79 | np.save( 80 | os.path.join(self.params.output_path, "depth", name_disp + ".npy"), 81 | np.array(inverse_depth), 82 | ) 83 | pbar.update(1) 84 | 85 | coordinator.request_stop() 86 | coordinator.join(threads) 87 | 88 | def get_name(self, step): 89 | """Get right file name 90 | :param step: current step 91 | :return name: name of artifact, based on step 92 | """ 93 | name = str(step) 94 | return name 95 | -------------------------------------------------------------------------------- /testers/kitti_flow.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """ Tester for KITTI optical flow 17 | """ 18 | 19 | from __future__ import division 20 | import os 21 | import tensorflow as tf 22 | from tqdm import tqdm 23 | import cv2 24 | import numpy as np 25 | from testers.general_tester import GeneralTester 26 | from helpers import utilities 27 | 28 | 29 | class Tester(GeneralTester): 30 | """Tester for optical flow on KITTI 31 | """ 32 | 33 | def prepare(self): 34 | """Create output folders 35 | """ 36 | dest = os.path.join(self.params.output_path, "flow") 37 | utilities.create_dir(dest) 38 | 39 | def test(self, network, dataloader, training_flag): 40 | """Generate optical 41 | It saves optical flow artifacts in the 42 | self.params.output_path/flow folder. 43 | :param network: network to test 44 | :param dataloader: dataloader for this test 45 | :param training_flag: training_flag for Batchnorm 46 | """ 47 | config = tf.ConfigProto(allow_soft_placement=True) 48 | sess = tf.Session(config=config) 49 | 50 | self.prepare() 51 | var_list = network.get_network_params() 52 | saver = tf.train.Saver(var_list=var_list) 53 | 54 | init_op = tf.group( 55 | tf.global_variables_initializer(), tf.local_variables_initializer() 56 | ) 57 | sess.run(init_op) 58 | 59 | coordinator = tf.train.Coordinator() 60 | threads = tf.train.start_queue_runners(sess=sess, coord=coordinator) 61 | 62 | saver.restore(sess, self.params.checkpoint_path) 63 | print(" [*] Load model: SUCCESS") 64 | 65 | predicted_flow = tf.image.resize_images( 66 | network.optical_flow, [dataloader.image_h, dataloader.image_w] 67 | ) 68 | 69 | print(" [*] Start optical flow artifacts generation") 70 | with tqdm(total=self.num_test_samples) as pbar: 71 | for step in range(self.num_test_samples): 72 | ops = [ 73 | predicted_flow, 74 | dataloader.image_h, 75 | dataloader.image_w, 76 | ] 77 | 78 | outputs = sess.run(ops, feed_dict={training_flag: False}) 79 | name = self.get_name(step) 80 | flow = outputs[0].squeeze() 81 | image_h = outputs[1] 82 | image_w = outputs[2] 83 | 84 | flow = self.scale_flow(flow, image_h, image_w) 85 | 86 | utilities.write_kitti_png_flow( 87 | os.path.join(self.params.output_path, "flow", name + ".png"), flow 88 | ) 89 | pbar.update(1) 90 | 91 | coordinator.request_stop() 92 | coordinator.join(threads) 93 | 94 | def get_name(self, step): 95 | """Get right file name 96 | :param step: current step 97 | :return name: name of artifact, based on step 98 | """ 99 | name = ( 100 | self.samples[step] 101 | .split(" ")[1] 102 | .replace("/", "_") 103 | .replace(".png", "") 104 | .strip() 105 | ) 106 | return name 107 | 108 | def scale_flow(self, flow, image_h, image_w): 109 | """Apply the scale factor to the resized optical flow 110 | :param flow: optional flow. Array with shape (H,W,2) 111 | :param image_h: height of the original image 112 | :param image_w: width of the original image 113 | :return scaled_flow: optical flow rescaled by the scaling factor 114 | """ 115 | scaling_w = image_w / self.params.width 116 | scaling_h = image_h / self.params.height 117 | flow *= np.tile(np.array(scaling_w, scaling_h), (image_h, image_w, 1)) 118 | return flow 119 | -------------------------------------------------------------------------------- /testers/kitti_mask.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import division 17 | import tensorflow as tf 18 | import os 19 | import cv2 20 | import numpy as np 21 | from testers.general_tester import GeneralTester 22 | from helpers import utilities 23 | from tqdm import tqdm 24 | 25 | 26 | class Tester(GeneralTester): 27 | def prepare(self): 28 | """Create output folders 29 | """ 30 | dest = os.path.join(self.params.output_path, "mask") 31 | utilities.create_dir(dest) 32 | 33 | def test(self, network, dataloader, is_training): 34 | """ Test motion mask 35 | It saves motion mask artifacts in the self.params.output_path/mask folder. 36 | :param network: network to test 37 | :param dataloader: dataloader for this test 38 | :param is_training: training_flag for Batchnorm 39 | """ 40 | # SESSION 41 | config = tf.ConfigProto(allow_soft_placement=True) 42 | sess = tf.Session(config=config) 43 | 44 | self.prepare() 45 | var_list = network.get_network_params() 46 | saver = tf.train.Saver(var_list=var_list) 47 | 48 | init_op = tf.group( 49 | tf.global_variables_initializer(), tf.local_variables_initializer() 50 | ) 51 | sess.run(init_op) 52 | 53 | coordinator = tf.train.Coordinator() 54 | threads = tf.train.start_queue_runners(sess=sess, coord=coordinator) 55 | 56 | saver.restore(sess, self.params.checkpoint_path) 57 | 58 | print(" [*] Load model: SUCCESS") 59 | 60 | segmented_mask = tf.image.resize_images( 61 | network.motion_mask, 62 | [dataloader.image_h, dataloader.image_w], 63 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, 64 | ) 65 | with tqdm(total=self.num_test_samples) as pbar: 66 | for step in range(self.num_test_samples): 67 | ops = [segmented_mask] 68 | outputs = sess.run(ops, feed_dict={is_training: False}) 69 | 70 | name = self.get_name(step) 71 | seg_mask = outputs[0].squeeze() 72 | 73 | cv2.imwrite( 74 | os.path.join(self.params.output_path, "mask", name + ".png"), 75 | (seg_mask * 255.0).astype(np.uint8), 76 | ) 77 | pbar.update(1) 78 | 79 | coordinator.request_stop() 80 | coordinator.join(threads) 81 | 82 | def get_name(self, step): 83 | """Get right file name 84 | :param step: current step 85 | :return name: name of artifact, based on step 86 | """ 87 | name = ( 88 | self.samples[step] 89 | .split(" ")[1] 90 | .replace("/", "_") 91 | .replace(".png", "") 92 | .strip() 93 | ) 94 | return name 95 | -------------------------------------------------------------------------------- /testers/kitti_semantic.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi, 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | """Generate semantic artifacts for KITTI 17 | """ 18 | import os 19 | import tensorflow as tf 20 | import cv2 21 | from tqdm import tqdm 22 | from testers import general_tester 23 | from helpers import utilities 24 | 25 | 26 | class Tester(general_tester.GeneralTester): 27 | def prepare(self): 28 | """Create output folders 29 | """ 30 | dest = os.path.join(self.params.output_path, "semantic") 31 | utilities.create_dir(dest) 32 | 33 | def test(self, network, dataloader, is_training): 34 | """Generate semantic artifacts. 35 | It saves semantic artifacts in the 36 | self.params.output_path/semantic folder. 37 | :param network: network to test 38 | :param dataloader: dataloader for this test 39 | :param is_training: training_flag for Batchnorm 40 | """ 41 | 42 | config = tf.ConfigProto(allow_soft_placement=True) 43 | sess = tf.Session(config=config) 44 | 45 | self.prepare() 46 | var_list = network.get_network_params() 47 | saver = tf.train.Saver(var_list=var_list) 48 | 49 | init_op = tf.group( 50 | tf.global_variables_initializer(), tf.local_variables_initializer() 51 | ) 52 | sess.run(init_op) 53 | 54 | coordinator = tf.train.Coordinator() 55 | threads = tf.train.start_queue_runners(sess=sess, coord=coordinator) 56 | 57 | saver.restore(sess, self.params.checkpoint_path) 58 | 59 | print(" [*] Load model: SUCCESS") 60 | 61 | prediction_semantic = tf.image.resize_images( 62 | network.semantic_logits, [dataloader.image_h, dataloader.image_w] 63 | ) 64 | ops = [tf.argmax(prediction_semantic[0], -1)] 65 | 66 | with tqdm(total=self.num_test_samples) as pbar: 67 | for step in range(self.num_test_samples): 68 | outputs = sess.run(ops, feed_dict={is_training: False}) 69 | name = self.get_file_name(step) 70 | semantic_map = outputs[0].squeeze() 71 | dest = os.path.join(self.params.output_path, "semantic", name + ".png") 72 | cv2.imwrite(dest, semantic_map) 73 | pbar.update(1) 74 | 75 | coordinator.request_stop() 76 | coordinator.join(threads) 77 | 78 | def get_file_name(self, step): 79 | """ Get name of nth line of test file 80 | :param step: current step 81 | :return name: name suited for KITTI (eg 000000_10) 82 | """ 83 | name = str(step).zfill(6) + "_10" 84 | return name 85 | --------------------------------------------------------------------------------