├── .gitignore
├── LICENSE
├── README.md
├── assets
    ├── architecture.png
    ├── banner.gif
    ├── example
    │   ├── 000018_09.png
    │   ├── 000018_10.png
    │   ├── 000018_11.png
    │   └── README.md
    ├── poster.pdf
    └── spotlight.mp4
├── dataloaders
    ├── __init__.py
    ├── dataloader_one_frame.py
    ├── dataloader_three_frames.py
    ├── factory.py
    └── general_dataloader.py
├── evaluators
    ├── __init__.py
    ├── depth.py
    ├── flow.py
    ├── mask.py
    └── semantic.py
├── filenames
    ├── eigen_test.txt
    ├── kitti_2015_test.txt
    └── kitti_2015_test_semantic.txt
├── helpers
    ├── __init__.py
    ├── bilinear_sampler.py
    ├── depth_utils.py
    ├── flow_tool
    │   ├── LICENSE
    │   ├── README.md
    │   ├── __init__.py
    │   └── flowlib.py
    └── utilities.py
├── networks
    ├── __init__.py
    ├── baseline.py
    ├── complete_network.py
    ├── general_network.py
    ├── network_components.py
    ├── ops.py
    └── selflow
    │   ├── LICENSE
    │   ├── __init__.py
    │   ├── selflow_network.py
    │   └── warp.py
├── requirements.txt
├── single_inference.py
├── test.py
└── testers
    ├── __init__.py
    ├── error_tester.py
    ├── factory.py
    ├── general_tester.py
    ├── kitti_depth.py
    ├── kitti_flow.py
    ├── kitti_mask.py
    └── kitti_semantic.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | results*
3 | __pycache__
4 | tf*
5 | artifacts*
6 | *.o


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
190 |       Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Distilled semantics for comprehensive scene understanding from videos
  2 | Demo code of "Distilled semantics for comprehensive scene understanding from videos", published at [CVPR 2020](http://cvpr2020.thecvf.com/)
  3 | 
  4 | [[Paper]](https://arxiv.org/pdf/2003.14030.pdf) - [[Video]](assets/spotlight.mp4) - [[Poster]](assets/poster.pdf)
  5 | 
  6 | ### Authors
  7 | [Fabio Tosi †](https://vision.disi.unibo.it/~ftosi/) - [Filippo Aleotti †](https://filippoaleotti.github.io/website/) - [Pierluigi Zama Ramirez †](https://pierlui92.github.io/) - [Matteo Poggi](https://mattpoggi.github.io/) - [Samuele Salti](https://vision.deis.unibo.it/ssalti/) - [Luigi Di Stefano](https://www.unibo.it/sitoweb/luigi.distefano/) - [Stefano Mattoccia](http://vision.deis.unibo.it/~smatt/)
  8 | 
  9 | † *joint first authorship*
 10 | 
 11 | ![](assets/banner.gif)
 12 | 
 13 | **At the moment, we do not plan to release the training code.**
 14 | ## Abstract
 15 | Whole understanding of the surroundings is  paramount to autonomous systems. Recent works have shown that deep neural networks can learn  geometry (depth) and motion (optical flow) from a  monocular video without any explicit supervision from ground truth annotations, particularly hard to source for these two tasks. In this paper, we take an additional step toward holistic scene understanding with monocular cameras by learning depth and motion alongside with semantics, with supervision for the latter provided by a pre-trained network distilling proxy ground truth images. 
 16 | We address the three tasks jointly by a) a novel training protocol based on knowledge distillation and self-supervision and b) a compact network architecture which enables efficient scene understanding on both power hungry GPUs and low-power embedded platforms.
 17 | We thoroughly assess the performance of our framework and show that it yields state-of-the-art results for monocular depth estimation, optical flow and motion segmentation. 
 18 | 
 19 | ## Architecture
 20 | 
 21 | ![](assets/architecture.png)
 22 | 
 23 | At training time, our final network is an ensamble of many sub networks (depicted in figure), where each one is in charge of a specific task:
 24 | * Camera Network: network in charge of intrinsics and pose estimation
 25 | * Depth Semantic Network (DSNet): network able to infer both depth and semantic for a given scene
 26 | * Optical Flow Network (OFNet): teacher optical flow network
 27 | * Self-Distilled Optical Flow Network: student optical flow network, used at testing time
 28 | 
 29 | At testing time, we rely on DSNet, CameraNet and Self-Distilled OFNet depending on the task.
 30 | 
 31 | ## Requirements
 32 | For this project, you need TensorFlow version 1.8 and Python `2.x` or `3.x`.
 33 | 
 34 | You can install all the requirements easily running the command:
 35 | ```bash
 36 | pip install -r requirements.txt
 37 | ```
 38 | 
 39 | ## Pretrained Models
 40 | Pretrained models are available for download:
 41 | | Training | Network | Resolution | zip |
 42 | |:-:|:--:|:--:|:--:|
 43 | | KITTI | Omeganet | 640x192 | [weights](https://drive.google.com/file/d/15MfMcAIJeg7TV8lqxa69qXF-COLGtTZe/view?usp=sharing) |
 44 | | CS + KITTI (EIGEN) | DSNet | 1024x320 | [weights](https://drive.google.com/file/d/1OUcq-ueT5i8-mkVRfpWOcxfH6ss8DY_F/view?usp=sharing) |
 45 | | CS | DSNet | 1024x320 | [weights](https://drive.google.com/file/d/1YdteQxo4MZukVlb8gIBZM4Tj_4bLC5F2/view?usp=sharing) |
 46 | 
 47 | ## How To
 48 | 
 49 | ### Run a Single Inference
 50 | 
 51 | You can run OmegaNet on a single image using the following command:
 52 | 
 53 | ```
 54 | python single_inference.py --tgt $tgt_path [--ckpt $ckpt --tasks $tasks --dest $dest --src1 $src1 --src2 $src2]
 55 | ```
 56 | where :
 57 | * `tgt`: path to target image (ie, image at time t0). Required
 58 | * `src1`: path to src1 image (ie, image at time t-1). Required only in case of `flow` or `mask` are in tasks list
 59 | * `src2`: path to src2 image (ie, image at time t+1). Required only in case of `flow` or `mask` are in tasks list
 60 | * `ckpt`: path to checkpoint. Required
 61 | * `tasks`: list of tasks to perform, space separated. Default [`inverse_depth`, `semantic`, `flow`]
 62 | * `dest`: destination folder. Default `results`
 63 | 
 64 | For instance, the following command run OmegaNet on an example batch from KITTI 2015 test set
 65 | 
 66 | ```
 67 | python single_inference.py  --src1 assets/example/000018_09.png \
 68 |                             --tgt assets/example/000018_10.png \
 69 |                             --src2 assets/example/000018_11.png \
 70 |                             --ckpt models/omeganet
 71 | ```
 72 | 
 73 | ## Test
 74 | To test the network, you have to generate the artifacts for a specific task first, then you can test them.
 75 | 
 76 | ### Generate Artifacts
 77 | You can generate the artifacts for a specific `task` running the following command:
 78 | 
 79 | ```
 80 | python test.py --task $task --ckpt $ckpt \
 81 |                             [--cpu --load_only_baseline --filenames_file $filenames ] \
 82 |                             [--height $height --width $width --dest $dest]
 83 | ```
 84 | 
 85 | where:
 86 | 
 87 | * `task`: task to perform. Can be [`depth`, `semantic`, `flow`]. Default `depth`
 88 | * `filenames`: path to filename.txt, where are listed all the images to load. Default `filenames/eigen_test.txt`
 89 | * `ckpt`: path to checkpoint. **Required**
 90 | * `load_only_baseline`: if set, load only Baseline (CameraNet+DSNet). Otherwise, full OmegaNet will be loaded. For instance, 
 91 | if you want to test a Baseline model SD-OFNet weights are not available, so you do not expect to load them.
 92 | * `height`: height of resized image. Default `192`
 93 | * `width`: width of resized image. Default `640`
 94 | * `dest`: where save artifacts. Default `artifacts`
 95 | * `cpu`: run test on cpu
 96 | 
 97 | #### Depth Artifacts
 98 | You can generate depth artifacts using the following script:
 99 | ```
100 | export datapath="/path/to/full_kitti/"
101 | python test.py  --task depth \
102 |                 --datapath $datapath \
103 |                 --filenames_file filenames/eigen_test.txt \
104 |                 --ckpt models/omeganet \
105 |                 --load_only_baseline
106 | ```
107 | where:
108 | * `datapath`: path to your FULL KITTI dataset
109 | 
110 | #### Flow Artifacts
111 | Artifacts for KITTI can be produced with the following command
112 | 
113 | ```
114 | export datapath="/path/to/3-frames-KITTI/"
115 | python test.py  --task flow \
116 |                 --datapath $datapath \
117 |                 --filenames_file filenames/kitti_2015_test.txt \
118 |                 --ckpt models/omeganet
119 | ```
120 | 
121 | where:
122 | * `datapath`: path to your 3-frames extended KITTI dataset
123 | 
124 | #### Semantic Artifacts
125 | Artifacts for KITTI can be produced with the following command.
126 | 
127 | ```
128 | export datapath="/path_to_kitti/data_semantics/training/image_2"
129 | python test.py --task semantic \
130 |                --datapath $datapath \
131 |                --filenames_file filenames/kitti_2015_test_semantic.txt \
132 |                --ckpt path_to_ckpts/dsnet \
133 |                --load_only_baseline
134 | ```
135 | where:
136 | * `datapath`: path to your images of the semantic kitti dataset
137 | 
138 | #### Motion Mask Artifacts
139 | Artifacts for KITTI can be produced with the following command.
140 | 
141 | ```
142 | export datapath="/path/to/kitti/2015/"
143 | python test.py --task mask \
144 |                --ckpt path_to_ckpts/omeganet \
145 |                --datapath $datapath \
146 |                --filenames_file filenames/kitti_2015_test.txt
147 | ```
148 | where:
149 | * `datapath`: path to your 3-frames extended KITTI dataset
150 | 
151 | ### Run tests
152 | 
153 | #### Depth
154 | You can evaluate the maps running the command:
155 | 
156 | ```
157 | cd evaluators
158 | python depth.py --datapath $datapath \
159 |                 --prediction_folder $prediction_folder
160 | ```
161 | 
162 | where:
163 | * `datapath`: path to FULL KITTI dataset
164 | * `prediction_folder`: path to folder with npy files, e.g. `../artifacts/depth/`
165 | 
166 | #### Flow
167 | To test optical flow artifacts, run the command:
168 | 
169 | ```
170 | cd evaluators
171 | python flow.py  --datapath $datapath \
172 |                 --prediction_folder $prediction_folder
173 | ```
174 | 
175 | where:
176 | * `datapath`: path to KITTI/2015
177 | * `prediction_folder`: path to flow predictions, e.g. `../artifacts/flow/`
178 | 
179 | #### Semantic
180 | To test semantic run the following command:
181 | 
182 | ```
183 | cd evaluators
184 | python semantic.py --datapath $datapath \
185 |                    --prediction_folder $prediction_folder
186 | ```
187 | 
188 | where:
189 | * `datapath`: path to KITTI/2015/data_semantics
190 | * `prediction_folder`: path to semantic predictions, e.g. `../artifacts/semantic/`
191 | 
192 | #### Motion Mask
193 | When motion mask artifacts are ready, you can test them on KITTI.
194 | 
195 | ```
196 | cd evaluators
197 | python mask.py  --datapath $datapath \
198 |                 --prediction_folder $prediction_folder
199 | ```
200 | 
201 | where:
202 | * `datapath`: path to KITTI/2015 folder
203 | * `prediction_folder`: path to predicted moving masks, e.g. `../artifacts/mask`
204 | 
205 | ## Citation
206 | If you find this code useful in your research, please cite:
207 | 
208 | ```
209 | @inproceedings{tosi2020distilled,
210 |   title={Distilled semantics for comprehensive scene understanding from videos},
211 |   author={Tosi, Fabio and Aleotti, Filippo and Ramirez, Pierluigi Zama and Poggi, Matteo and Salti, Samuele and Di Stefano, Luigi and Mattoccia, Stefano},
212 |   booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
213 |   year={2020}
214 | }
215 | ```
216 | 
217 | ## License
218 | Code is licensed under Apache 2.0 License. More information in the `LICENSE` file.
219 | 
220 | ## Acknowledgements
221 | 
222 | Portions of our code are from other repositories:
223 | 
224 | * `Depth evaluation` is from [monodepth](https://github.com/mrharicot/monodepth), for "Unsupervised Monocular Depth Estimation with Left-Right Consistency, by C. Godard, O Mac Aodha, G. Brostow, CVPR 2017".
225 | * `Flow Tools` are from https://github.com/liruoteng/OpticalFlowToolkit, licensed under MIT license.
226 | * `Rigid flow estimation` is from [SfMLearner](https://github.com/tinghuiz/SfMLearner/blob/master), for "Unsupervised Learning of Depth and Ego-Motion from Video, by T. Zhou, M. Brown, N. Snavely, D. G. Lowe, CVPR 2017". Code is licensed under MIT License.
227 | * `SelfFlow` network and utilities are from [SelfFlow](https://github.com/ppliuboy/SelFlow), for "SelFlow: Self-Supervised Learning of Optical Flow, by P. Liu, M. Lyu
228 | , I. King, J. Xu, CVPR 2019". Code is licensed under MIT License.
229 | * The `Teacher semantic network` is [DPC](https://github.com/tensorflow/models/tree/master/research/deeplab), for "Searching for Efficient Multi-Scale Architectures for Dense Image Prediction, by , L. C. Chen, M. D. Collins, Y. Zhu, G. Papandreou, B. Zoph, F. Schroff, H. Adam, J. Shlens, Advances in neural information processing systems 2018". Code is licensed under Apache v2 License. We used this network to generate proxy sematic maps.
230 | 
231 | 
232 | We would like to thank all these authors for making their code publicly available and, eventually, for sharing pretrained models.
233 | 


--------------------------------------------------------------------------------
/assets/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/architecture.png


--------------------------------------------------------------------------------
/assets/banner.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/banner.gif


--------------------------------------------------------------------------------
/assets/example/000018_09.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/example/000018_09.png


--------------------------------------------------------------------------------
/assets/example/000018_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/example/000018_10.png


--------------------------------------------------------------------------------
/assets/example/000018_11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/example/000018_11.png


--------------------------------------------------------------------------------
/assets/example/README.md:
--------------------------------------------------------------------------------
 1 | Images in this folder belong to [KITTI](http://www.cvlibs.net/datasets/kitti/) 2015 dataset, and are used only with demonstration purposes.
 2 | 
 3 | Citations:
 4 | 
 5 | ```
 6 | @ARTICLE{Menze2018JPRS,
 7 |   author = {Moritz Menze and Christian Heipke and Andreas Geiger},
 8 |   title = {Object Scene Flow},
 9 |   journal = {ISPRS Journal of Photogrammetry and Remote Sensing (JPRS)},
10 |   year = {2018}
11 | }
12 | @INPROCEEDINGS{Menze2015ISA,
13 |   author = {Moritz Menze and Christian Heipke and Andreas Geiger},
14 |   title = {Joint 3D Estimation of Vehicles and Scene Flow},
15 |   booktitle = {ISPRS Workshop on Image Sequence Analysis (ISA)},
16 |   year = {2015}
17 | }
18 | ```


--------------------------------------------------------------------------------
/assets/poster.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/poster.pdf


--------------------------------------------------------------------------------
/assets/spotlight.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/assets/spotlight.mp4


--------------------------------------------------------------------------------
/dataloaders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/dataloaders/__init__.py


--------------------------------------------------------------------------------
/dataloaders/dataloader_one_frame.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """
17 | Dataloader suited for 1 frame tasks
18 | """
19 | import tensorflow as tf
20 | import numpy as np
21 | from dataloaders.general_dataloader import GeneralDataloader
22 | 
23 | 
24 | class TestDataloader(GeneralDataloader):
25 |     def build(self):
26 |         input_queue = tf.train.string_input_producer(
27 |             [self.filenames_file], shuffle=False
28 |         )
29 |         line_reader = tf.TextLineReader()
30 |         _, line = line_reader.read(input_queue)
31 |         split_line = tf.string_split([line]).values
32 | 
33 |         with tf.variable_scope("tester_dataloader_one_frame"):
34 |             tgt_img_path = tf.string_join([self.datapath, split_line[0]])
35 |             tgt_img_o = self.read_image(tgt_img_path)
36 |             self.tgt_img_batch = tf.stack([tgt_img_o], 0)
37 |             self.tgt_img_batch.set_shape([1, None, None, 3])
38 | 
39 |     def get_next_batch(self):
40 |         with tf.variable_scope("get_next_batch"):
41 |             batch = {
42 |                 "src_img_1": self.tgt_img_batch,
43 |                 "tgt_img": self.tgt_img_batch,
44 |                 "src_img_2": self.tgt_img_batch,
45 |             }
46 |             return batch
47 | 


--------------------------------------------------------------------------------
/dataloaders/dataloader_three_frames.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """
17 | Dataloader suited for 3 frames tasks
18 | """
19 | import tensorflow as tf
20 | import numpy as np
21 | from dataloaders.general_dataloader import GeneralDataloader
22 | 
23 | 
24 | class TestDataloader(GeneralDataloader):
25 |     def build(self):
26 |         input_queue = tf.train.string_input_producer(
27 |             [self.filenames_file], shuffle=False
28 |         )
29 |         line_reader = tf.TextLineReader()
30 |         _, line = line_reader.read(input_queue)
31 |         split_line = tf.string_split([line]).values
32 | 
33 |         with tf.variable_scope("tester_dataloader_three_frames"):
34 |             with tf.variable_scope("image_reader"):
35 |                 src_img_1_path = tf.string_join([self.datapath, split_line[0]])
36 |                 tgt_img_path = tf.string_join([self.datapath, split_line[1]])
37 |                 src_img_2_path = tf.string_join([self.datapath, split_line[2]])
38 | 
39 |                 src_img_1_o = self.read_image(src_img_1_path)
40 |                 tgt_img_o = self.read_image(tgt_img_path)
41 |                 src_img_2_o = self.read_image(src_img_2_path)
42 | 
43 |             with tf.variable_scope("batch_creator"):
44 |                 self.src_img_1_batch = tf.stack([src_img_1_o], 0)
45 |                 self.tgt_img_batch = tf.stack([tgt_img_o], 0)
46 |                 self.src_img_2_batch = tf.stack([src_img_2_o], 0)
47 | 
48 |             with tf.variable_scope("shape_setter"):
49 |                 self.src_img_1_batch.set_shape([1, None, None, 3])
50 |                 self.tgt_img_batch.set_shape([1, None, None, 3])
51 |                 self.src_img_2_batch.set_shape([1, None, None, 3])
52 | 
53 |     def get_next_batch(self):
54 |         with tf.variable_scope("get_next_batch"):
55 |             batch = {
56 |                 "src_img_1": self.src_img_1_batch,
57 |                 "tgt_img": self.tgt_img_batch,
58 |                 "src_img_2": self.src_img_2_batch,
59 |             }
60 |             return batch
61 | 


--------------------------------------------------------------------------------
/dataloaders/factory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """
17 | Factory for dataloaders
18 | """
19 | 
20 | import tensorflow as tf
21 | import numpy as np
22 | from dataloaders import dataloader_one_frame, dataloader_three_frames
23 | 
24 | 
25 | TESTER_DATALOADERS_FACTORY = {
26 |     "semantic": dataloader_one_frame.TestDataloader,
27 |     "depth": dataloader_one_frame.TestDataloader,
28 |     "flow": dataloader_three_frames.TestDataloader,
29 |     "mask": dataloader_three_frames.TestDataloader,
30 | }
31 | 
32 | 
33 | def get_dataloader(task):
34 |     """Return the desired dataloader.
35 |         :param task: task to perform
36 |         :return dataloader: dataloader suited for the task
37 |     """
38 |     assert task in TESTER_DATALOADERS_FACTORY.keys()
39 |     return TESTER_DATALOADERS_FACTORY[task]
40 | 


--------------------------------------------------------------------------------
/dataloaders/general_dataloader.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """
17 | Dataloader for Test
18 | """
19 | import tensorflow as tf
20 | import numpy as np
21 | from collections import namedtuple
22 | 
23 | 
24 | dataloader_parameters = namedtuple("dataloader_parameters", "height, width, task")
25 | 
26 | 
27 | def string_length_tf(t):
28 |     return tf.py_func(len, [t], [tf.int64])
29 | 
30 | 
31 | class GeneralDataloader(object):
32 |     def __init__(
33 |         self, datapath, filenames_file, params,
34 |     ):
35 |         if not datapath.endswith("/"):
36 |             datapath = datapath + "/"
37 |         self.datapath = datapath
38 |         self.params = params
39 |         self.filenames_file = filenames_file
40 |         self.src_img_1_batch = None
41 |         self.src_img_2_batch = None
42 |         self.tgt_img_batch = None
43 |         self.build()
44 | 
45 |     def build(self):
46 |         pass
47 | 
48 |     def get_next_batch(self):
49 |         pass
50 | 
51 |     def read_image(self, image_path):
52 |         """Read an image from the file system
53 |             :params image_path: string, path to image
54 |         """
55 |         with tf.variable_scope("read_image"):
56 |             path_length = string_length_tf(image_path)[0]
57 |             file_extension = tf.substr(image_path, path_length - 3, 3)
58 |             file_cond = tf.equal(file_extension, "jpg")
59 | 
60 |             image = tf.cond(
61 |                 file_cond,
62 |                 lambda: tf.image.decode_jpeg(tf.read_file(image_path)),
63 |                 lambda: tf.image.decode_png(tf.read_file(image_path)),
64 |             )
65 | 
66 |             self.image_w = tf.shape(image)[1]
67 |             self.image_h = tf.shape(image)[0]
68 | 
69 |             image = tf.image.convert_image_dtype(image, tf.float32)
70 |             image = tf.image.resize_images(
71 |                 image,
72 |                 [self.params.height, self.params.width],
73 |                 tf.image.ResizeMethod.AREA,
74 |             )
75 |             return image
76 | 


--------------------------------------------------------------------------------
/evaluators/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/evaluators/depth.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Depth evaluation for KITTI Eigen split
  3 | This code is based on https://github.com/mrharicot/monodepth/blob/master/utils/evaluate_kitti.py
  4 | We would like to thank C. Godard and other authors for sharing their code
  5 | """
  6 | from __future__ import division
  7 | import os
  8 | import argparse
  9 | import numpy as np
 10 | from tqdm import tqdm
 11 | import sys
 12 | 
 13 | sys.path.insert(0, os.path.abspath(".."))
 14 | from helpers import depth_utils
 15 | 
 16 | 
 17 | parser = argparse.ArgumentParser(description="Evaluation on the KITTI dataset")
 18 | parser.add_argument(
 19 |     "--prediction_folder", type=str, help="path to estimated disparities", required=True
 20 | )
 21 | parser.add_argument(
 22 |     "--datapath", type=str, help="path to ground truth disparities", required=True
 23 | )
 24 | parser.add_argument(
 25 |     "--min_depth", type=float, help="minimum depth for evaluation", default=1e-3
 26 | )
 27 | parser.add_argument(
 28 |     "--max_depth", type=float, help="maximum depth for evaluation", default=80
 29 | )
 30 | parser.add_argument(
 31 |     "--filename_file",
 32 |     type=str,
 33 |     help="path to filename file",
 34 |     default="../filenames/eigen_test.txt",
 35 | )
 36 | args = parser.parse_args()
 37 | 
 38 | if __name__ == "__main__":
 39 |     print("Depth evaluation is started: loading ground-truths and predictions")
 40 |     pred_disparities = []
 41 |     num_samples = 697
 42 | 
 43 |     for t_id in range(num_samples):
 44 |         pred_disparities.append(
 45 |             np.load(os.path.join(args.prediction_folder, str(t_id) + ".npy"))
 46 |         )
 47 |     datapath = args.datapath
 48 |     if not datapath.endswith("/"):
 49 |         datapath += "/"
 50 |     test_files = depth_utils.read_text_lines(args.filename_file)
 51 |     gt_files, gt_calib, im_sizes, im_files, cams = depth_utils.read_file_data(
 52 |         test_files, datapath
 53 |     )
 54 | 
 55 |     num_test = len(im_files)
 56 |     gt_depths = []
 57 |     pred_depths = []
 58 |     for t_id in range(num_samples):
 59 |         camera_id = cams[t_id]  # 2 is left, 3 is right
 60 |         depth = depth_utils.generate_depth_map(
 61 |             gt_calib[t_id], gt_files[t_id], im_sizes[t_id], camera_id, False, True
 62 |         )
 63 |         gt_depths.append(depth.astype(np.float32))
 64 | 
 65 |         disp_pred = pred_disparities[t_id].squeeze()
 66 | 
 67 |         # need to convert from disparity to depth
 68 |         focal_length, baseline = depth_utils.get_focal_length_baseline(
 69 |             gt_calib[t_id], camera_id
 70 |         )
 71 |         depth_pred = (baseline * focal_length) / disp_pred
 72 |         depth_pred[np.isinf(depth_pred)] = 0
 73 | 
 74 |         pred_depths.append(depth_pred)
 75 | 
 76 |     rms = np.zeros(num_samples, np.float32)
 77 |     log_rms = np.zeros(num_samples, np.float32)
 78 |     abs_rel = np.zeros(num_samples, np.float32)
 79 |     sq_rel = np.zeros(num_samples, np.float32)
 80 |     d1_all = np.zeros(num_samples, np.float32)
 81 |     a1 = np.zeros(num_samples, np.float32)
 82 |     a2 = np.zeros(num_samples, np.float32)
 83 |     a3 = np.zeros(num_samples, np.float32)
 84 | 
 85 |     with tqdm(total=num_samples) as pbar:
 86 |         for i in range(num_samples):
 87 | 
 88 |             gt_depth = gt_depths[i]
 89 |             pred_depth = pred_depths[i]
 90 |             mask = np.logical_and(gt_depth > args.min_depth, gt_depth < args.max_depth)
 91 | 
 92 |             gt_height, gt_width = gt_depth.shape
 93 |             crop = np.array(
 94 |                 [
 95 |                     0.40810811 * gt_height,
 96 |                     0.99189189 * gt_height,
 97 |                     0.03594771 * gt_width,
 98 |                     0.96405229 * gt_width,
 99 |                 ]
100 |             ).astype(np.int32)
101 | 
102 |             crop_mask = np.zeros(mask.shape)
103 |             crop_mask[crop[0] : crop[1], crop[2] : crop[3]] = 1
104 |             mask = np.logical_and(mask, crop_mask)
105 | 
106 |             # Scale matching
107 |             scalor = np.median(gt_depth[mask]) / np.median(pred_depth[mask])
108 |             pred_depth[mask] *= scalor
109 | 
110 |             pred_depth[pred_depth < args.min_depth] = args.min_depth
111 |             pred_depth[pred_depth > args.max_depth] = args.max_depth
112 | 
113 |             (
114 |                 abs_rel[i],
115 |                 sq_rel[i],
116 |                 rms[i],
117 |                 log_rms[i],
118 |                 a1[i],
119 |                 a2[i],
120 |                 a3[i],
121 |             ) = depth_utils.compute_errors(gt_depth[mask], pred_depth[mask])
122 |             pbar.update(1)
123 |     print(
124 |         "{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(
125 |             "abs_rel", "sq_rel", "rms", "log_rms", "d1_all", "a1", "a2", "a3"
126 |         )
127 |     )
128 |     print(
129 |         "{:10.4f}, {:10.4f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}, {:10.3f}".format(
130 |             abs_rel.mean(),
131 |             sq_rel.mean(),
132 |             rms.mean(),
133 |             log_rms.mean(),
134 |             d1_all.mean(),
135 |             a1.mean(),
136 |             a2.mean(),
137 |             a3.mean(),
138 |         )
139 |     )
140 | 


--------------------------------------------------------------------------------
/evaluators/flow.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import cv2
 3 | import os
 4 | import numpy as np
 5 | import argparse
 6 | import sys
 7 | 
 8 | sys.path.insert(0, os.path.abspath(".."))
 9 | import helpers.flow_tool.flowlib as fl
10 | 
11 | parser = argparse.ArgumentParser()
12 | parser.add_argument("--datapath", type=str, help="Path to kitti stereo dataset")
13 | parser.add_argument("--prediction_folder", type=str, help="Path to the flow prediction")
14 | args = parser.parse_args()
15 | 
16 | 
17 | def main():
18 |     img_num = 200
19 |     noc_epe = np.zeros(img_num, dtype=np.float)
20 |     noc_acc = np.zeros(img_num, dtype=np.float)
21 |     occ_epe = np.zeros(img_num, dtype=np.float)
22 |     occ_acc = np.zeros(img_num, dtype=np.float)
23 | 
24 |     eval_log = os.path.join(args.prediction_folder, "flow_result.txt")
25 |     with open(eval_log, "w") as el:
26 |         for idx in range(img_num):
27 |             # read groundtruth flow
28 |             gt_noc_fn = args.datapath + "training/flow_noc/%.6d_10.png" % idx
29 |             gt_occ_fn = args.datapath + "training/flow_occ/%.6d_10.png" % idx
30 |             gt_noc_flow = fl.read_flow(gt_noc_fn)
31 |             gt_occ_flow = fl.read_flow(gt_occ_fn)
32 | 
33 |             # read predicted flow (in png format)
34 |             pred_flow_fn = args.prediction_folder + "%.6d_10.png" % idx
35 |             pred_flow = fl.read_flow(pred_flow_fn)
36 | 
37 |             # resize pred_flow to the same size as gt_flow
38 |             dst_h = gt_noc_flow.shape[0]
39 |             dst_w = gt_noc_flow.shape[1]
40 | 
41 |             # evaluation
42 |             (single_noc_epe, single_noc_acc) = fl.evaluate_kitti_flow(
43 |                 gt_noc_flow, pred_flow, None
44 |             )
45 |             (single_occ_epe, single_occ_acc) = fl.evaluate_kitti_flow(
46 |                 gt_occ_flow, pred_flow, None
47 |             )
48 |             noc_epe[idx] = single_noc_epe
49 |             noc_acc[idx] = single_noc_acc
50 |             occ_epe[idx] = single_occ_epe
51 |             occ_acc[idx] = single_occ_acc
52 |             output_line = (
53 |                 "Flow %.6d Noc EPE = %.4f"
54 |                 + " Noc ACC = %.4f"
55 |                 + " Occ EPE = %.4f"
56 |                 + " Occ ACC = %.4f\n"
57 |             )
58 |             el.write(
59 |                 output_line
60 |                 % (idx, noc_epe[idx], noc_acc[idx], occ_epe[idx], occ_acc[idx])
61 |             )
62 | 
63 |     noc_mean_epe = np.mean(noc_epe)
64 |     noc_mean_acc =  (1 - np.mean(noc_acc)) * 100.0
65 |     occ_mean_epe = np.mean(occ_epe)
66 |     occ_mean_acc = (1 - np.mean(occ_acc)) * 100.0
67 | 
68 |     print("Mean Noc EPE = %.2f " % noc_mean_epe)
69 |     print("F1 Noc = %.2f " % noc_mean_acc)
70 |     print("Mean Occ EPE = %.2f " % occ_mean_epe)
71 |     print("F1 Occ = %.2f " % occ_mean_acc)
72 | 
73 | 
74 | main()
75 | 


--------------------------------------------------------------------------------
/evaluators/mask.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Adopted from https://github.com/martinkersner/py_img_seg_eval
  3 | """
  4 | import os
  5 | import numpy as np
  6 | import scipy.misc as sm
  7 | import cv2
  8 | import matplotlib.pyplot as plt
  9 | import multiprocessing
 10 | import argparse
 11 | from PIL import Image
 12 | 
 13 | from tensorflow.python.platform import flags
 14 | 
 15 | parser = argparse.ArgumentParser(description="Argument parser")
 16 | parser.add_argument("--datapath", type=str, help="path to KITTI folder")
 17 | parser.add_argument("--prediction_folder", type=str, help="path to predicted masks")
 18 | parser.add_argument("--rescale", action="store_true", help="upsample motion mask")
 19 | 
 20 | args = parser.parse_args()
 21 | 
 22 | 
 23 | class EvalSegErr(Exception):
 24 |     def __init__(self, value):
 25 |         self.value = value
 26 | 
 27 |     def __str__(self):
 28 |         return repr(self.value)
 29 | 
 30 | 
 31 | def pixel_accuracy(eval_segm, gt_segm):
 32 |     """
 33 |     sum_i(n_ii) / sum_i(t_i)
 34 |     """
 35 | 
 36 |     check_size(eval_segm, gt_segm)
 37 | 
 38 |     cl, n_cl = extract_classes(gt_segm)
 39 |     eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl)
 40 | 
 41 |     sum_n_ii = 0
 42 |     sum_t_i = 0
 43 | 
 44 |     for i, c in enumerate(cl):
 45 |         curr_eval_mask = eval_mask[i, :, :]
 46 |         curr_gt_mask = gt_mask[i, :, :]
 47 | 
 48 |         sum_n_ii += np.sum(np.logical_and(curr_eval_mask, curr_gt_mask))
 49 |         sum_t_i += np.sum(curr_gt_mask)
 50 | 
 51 |     if sum_t_i == 0:
 52 |         pixel_accuracy_ = 0
 53 |     else:
 54 |         pixel_accuracy_ = sum_n_ii / sum_t_i
 55 | 
 56 |     return pixel_accuracy_
 57 | 
 58 | 
 59 | def mean_accuracy(eval_segm, gt_segm):
 60 |     """
 61 |     (1/n_cl) sum_i(n_ii/t_i)
 62 |     """
 63 | 
 64 |     check_size(eval_segm, gt_segm)
 65 | 
 66 |     cl, n_cl = extract_classes(gt_segm)
 67 |     eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl)
 68 | 
 69 |     accuracy = list([0]) * n_cl
 70 | 
 71 |     for i, c in enumerate(cl):
 72 |         curr_eval_mask = eval_mask[i, :, :]
 73 |         curr_gt_mask = gt_mask[i, :, :]
 74 | 
 75 |         n_ii = np.sum(np.logical_and(curr_eval_mask, curr_gt_mask))
 76 |         t_i = np.sum(curr_gt_mask)
 77 | 
 78 |         if t_i != 0:
 79 |             accuracy[i] = n_ii / t_i
 80 | 
 81 |     mean_accuracy_ = np.mean(accuracy)
 82 |     return mean_accuracy_
 83 | 
 84 | 
 85 | def mean_IU(eval_segm, gt_segm):
 86 |     """
 87 |     (1/n_cl) * sum_i(n_ii / (t_i + sum_j(n_ji) - n_ii))
 88 |     """
 89 | 
 90 |     check_size(eval_segm, gt_segm)
 91 | 
 92 |     cl, n_cl = union_classes(eval_segm, gt_segm)
 93 |     _, n_cl_gt = extract_classes(gt_segm)
 94 |     eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl)
 95 | 
 96 |     IU = list([0]) * n_cl
 97 | 
 98 |     for i, c in enumerate(cl):
 99 |         curr_eval_mask = eval_mask[i, :, :]
100 |         curr_gt_mask = gt_mask[i, :, :]
101 | 
102 |         if (np.sum(curr_eval_mask) == 0) or (np.sum(curr_gt_mask) == 0):
103 |             continue
104 | 
105 |         n_ii = np.sum(np.logical_and(curr_eval_mask, curr_gt_mask))
106 |         t_i = np.sum(curr_gt_mask)
107 |         n_ij = np.sum(curr_eval_mask)
108 | 
109 |         IU[i] = n_ii / (t_i + n_ij - n_ii)
110 | 
111 |     mean_IU_ = np.sum(IU) / n_cl_gt
112 |     return mean_IU_, np.array(IU)
113 | 
114 | 
115 | def frequency_weighted_IU(eval_segm, gt_segm):
116 |     """
117 |     sum_k(t_k)^(-1) * sum_i((t_i*n_ii)/(t_i + sum_j(n_ji) - n_ii))
118 |     """
119 | 
120 |     check_size(eval_segm, gt_segm)
121 | 
122 |     cl, n_cl = union_classes(eval_segm, gt_segm)
123 |     eval_mask, gt_mask = extract_both_masks(eval_segm, gt_segm, cl, n_cl)
124 | 
125 |     frequency_weighted_IU_ = list([0]) * n_cl
126 | 
127 |     for i, c in enumerate(cl):
128 |         curr_eval_mask = eval_mask[i, :, :]
129 |         curr_gt_mask = gt_mask[i, :, :]
130 | 
131 |         if (np.sum(curr_eval_mask) == 0) or (np.sum(curr_gt_mask) == 0):
132 |             continue
133 | 
134 |         n_ii = np.sum(np.logical_and(curr_eval_mask, curr_gt_mask))
135 |         t_i = np.sum(curr_gt_mask)
136 |         n_ij = np.sum(curr_eval_mask)
137 | 
138 |         frequency_weighted_IU_[i] = (t_i * n_ii) / (t_i + n_ij - n_ii)
139 | 
140 |     sum_k_t_k = get_pixel_area(eval_segm)
141 | 
142 |     frequency_weighted_IU_ = np.sum(frequency_weighted_IU_) / sum_k_t_k
143 |     return frequency_weighted_IU_
144 | 
145 | 
146 | """
147 | Auxiliary functions used during evaluation.
148 | """
149 | 
150 | 
151 | def get_pixel_area(segm):
152 |     return segm.shape[0] * segm.shape[1]
153 | 
154 | 
155 | def extract_both_masks(eval_segm, gt_segm, cl, n_cl):
156 |     eval_mask = extract_masks(eval_segm, cl, n_cl)
157 |     gt_mask = extract_masks(gt_segm, cl, n_cl)
158 | 
159 |     return eval_mask, gt_mask
160 | 
161 | 
162 | def extract_classes(segm):
163 |     cl = np.unique(segm)
164 |     n_cl = len(cl)
165 | 
166 |     return cl, n_cl
167 | 
168 | 
169 | def union_classes(eval_segm, gt_segm):
170 |     eval_cl, _ = extract_classes(eval_segm)
171 |     gt_cl, _ = extract_classes(gt_segm)
172 | 
173 |     cl = np.union1d(eval_cl, gt_cl)
174 |     n_cl = len(cl)
175 | 
176 |     return cl, n_cl
177 | 
178 | 
179 | def extract_masks(segm, cl, n_cl):
180 |     h, w = segm_size(segm)
181 |     masks = np.zeros((n_cl, h, w))
182 | 
183 |     for i, c in enumerate(cl):
184 |         masks[i, :, :] = segm == c
185 | 
186 |     return masks
187 | 
188 | 
189 | def segm_size(segm):
190 |     try:
191 |         height = segm.shape[0]
192 |         width = segm.shape[1]
193 |     except IndexError:
194 |         raise
195 | 
196 |     return height, width
197 | 
198 | 
199 | def check_size(eval_segm, gt_segm):
200 |     h_e, w_e = segm_size(eval_segm)
201 |     h_g, w_g = segm_size(gt_segm)
202 | 
203 |     if (h_e != h_g) or (w_e != w_g):
204 |         raise EvalSegErr("DiffDim: Different dimensions of matrices!")
205 | 
206 | 
207 | def read_mask_gt_worker(i):
208 |     path = os.path.join(args.datapath, "training/obj_map", str(i).zfill(6) + "_10.png")
209 |     return sm.imread(path, -1)
210 | 
211 | 
212 | def load_gt_mask():
213 |     results = [read_mask_gt_worker(i) for i in range(200)]
214 |     gt_masks = []
215 |     for m in results:
216 |         m[m > 0.0] = 1.0
217 |         gt_masks.append(m)
218 | 
219 |     return gt_masks
220 | 
221 | 
222 | def eval_mask(pred_masks, gt_masks):
223 |     grey_cmap = plt.get_cmap("Greys")
224 | 
225 |     pa_res, ma_res, mIU_res, fwIU_res = 0.0, 0.0, 0.0, 0.0
226 |     IU_res = np.array([0.0, 0.0])
227 | 
228 |     for i in range(200):
229 |         gt_mask = gt_masks[i]
230 |         pred_mask = pred_masks[i]
231 | 
232 |         if args.rescale:
233 |             H, W = gt_mask.shape[0:2]
234 |             pred_mask = cv2.resize(pred_mask, (W, H), interpolation=cv2.INTER_NEAREST)
235 | 
236 |         th = 0.5
237 | 
238 |         pred_mask[pred_mask > th] = 1.0
239 |         pred_mask[pred_mask <= th] = 0.0
240 |         # pred_mask = 1.0 - pred_mask
241 | 
242 |         pa_res += pixel_accuracy(pred_mask, gt_mask)
243 |         ma_res += mean_accuracy(pred_mask, gt_mask)
244 | 
245 |         mIU, IU = mean_IU(pred_mask, gt_mask)
246 |         mIU_res += mIU
247 |         IU_res += IU
248 | 
249 |         fwIU_res += frequency_weighted_IU(pred_mask, gt_mask)
250 | 
251 |     return (
252 |         pa_res / 200.0,
253 |         ma_res / 200.0,
254 |         mIU_res / 200.0,
255 |         fwIU_res / 200.0,
256 |         IU_res / 200.0,
257 |     )
258 | 
259 | 
260 | def read_mask_pred_worker(i):
261 |     img = (
262 |         cv2.imread(args.prediction_folder + "/" + str(i).zfill(6) + "_10.png", -1) / 255.0
263 |     )
264 |     return img
265 | 
266 | 
267 | def load_pred_mask():
268 |     results = [read_mask_pred_worker(i) for i in range(200)]
269 | 
270 |     pred_masks = []
271 |     for m in results:
272 |         pred_masks.append(m)
273 | 
274 |     return pred_masks
275 | 
276 | 
277 | def evaluate():
278 |     gt_masks = load_gt_mask()
279 |     predicted_masks = load_pred_mask()
280 | 
281 |     pa, ma, miu, fwiu, iu = eval_mask(predicted_masks, gt_masks)
282 |     print(
283 |         "PA:{:3.2f} MA:{:3.2f} mIU:{:3.2f} fwIU:{:3.2f} IU:[{:3.2f}, {:3.2f}]".format(
284 |             pa, ma, miu, fwiu, iu[0], iu[1]
285 |         )
286 |     )
287 | 
288 | 
289 | if __name__ == "__main__":
290 |     evaluate()
291 | 


--------------------------------------------------------------------------------
/evaluators/semantic.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
  2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | 
 17 | from __future__ import print_function
 18 | 
 19 | import tensorflow as tf
 20 | import cv2
 21 | import numpy as np
 22 | import argparse
 23 | import os
 24 | 
 25 | id2trainId = {
 26 |      0 : 255,
 27 |      1 : 255,
 28 |      2 : 255,
 29 |      3 : 255,
 30 |      4 : 255,
 31 |      5 : 255,
 32 |      6 : 255,
 33 |      7 :   0,
 34 |      8 :   1,
 35 |      9 : 255,
 36 |     10 : 255,
 37 |     11 :   2,
 38 |     12 :   3,
 39 |     13 :   4,
 40 |     14 : 255,
 41 |     15 : 255,
 42 |     16 : 255,
 43 |     17 :   5,
 44 |     18 : 255,
 45 |     19 :   6,
 46 |     20 :   7,
 47 |     21 :   8,
 48 |     22 :   9,
 49 |     23 :  10,
 50 |     24 :  11,
 51 |     25 :  12,
 52 |     26 :  13,
 53 |     27 :  14,
 54 |     28 :  15,
 55 |     29 : 255,
 56 |     30 : 255,
 57 |     31 :  16,
 58 |     32 :  17,
 59 |     33 :  18
 60 | }
 61 | 
 62 | 
 63 | trainId2cat = {
 64 |      0 :   0,
 65 |      1 :   0,
 66 |      2 :   1,
 67 |      3 :   1,
 68 |      4 :   1, 
 69 |      5 :   2,
 70 |      6 :   2,
 71 |      7 :   2,
 72 |      8 :   3,
 73 |      9 :   3,
 74 |     10 :   4,
 75 |     11 :   5,
 76 |     12 :   5,
 77 |     13 :   6,
 78 |     14 :   6,
 79 |     15 :   6,
 80 |     16 :   6,
 81 |     17 :   6,
 82 |     18 :   6,
 83 | }
 84 | 
 85 | trainId2name = {
 86 |      0 : "road",
 87 |      1 : "sidewalk",
 88 |      2 : "building",
 89 |      3 : "wall",
 90 |      4 : "fence",
 91 |      5 : "pole",
 92 |      6 : "traffic_light",
 93 |      7 : "traffic_sign",
 94 |      8 : "vegetation",
 95 |      9 : "terrain",
 96 |     10 : "sky",
 97 |     11 : "person",
 98 |     12 : "rider",
 99 |     13 : "car",
100 |     14 : "truck",
101 |     15 : "bus",
102 |     16 : "train",
103 |     17 : "motorcycle",
104 |     18 : "bicycle"
105 | }
106 | 
107 | num_train_classes = 19
108 | num_categories = 7
109 | num_total_classes = 34
110 | 
111 | parser = argparse.ArgumentParser(description="Evaluation Semantic")
112 | ### PATHS
113 | parser.add_argument(
114 |     "--dataset",
115 |     dest="dataset",
116 |     choices=["kitti", "cityscapes"],
117 |     default="kitti",
118 |     help="kitti, cityscapes",
119 | )
120 | parser.add_argument(
121 |     "--datapath", 
122 |     type=str, 
123 |     help="Path to dataset (e.g. data_semantics Kitti 2015)"
124 | )
125 | parser.add_argument(
126 |     "--prediction_folder", 
127 |     type=str, 
128 |     help="Path to predictions"
129 | )
130 | parser.add_argument(
131 |     "--filename_file", 
132 |     default="../filenames/kitti_2015_test_semantic.txt", 
133 |     help="Path to txt input list"
134 | )
135 | ### PARAMS
136 | parser.add_argument(
137 |     "--ignore_label", 
138 |     type=int, 
139 |     default=255, 
140 |     help="label to ignore in evaluation",
141 | )
142 | parser.add_argument(
143 |     "--format_pred",
144 |     type=str,
145 |     choices=["id", "trainId"],
146 |     default="trainId",
147 |     help="encoding of predictions, trainId or id",
148 | )
149 | parser.add_argument(
150 |     "--format_gt",
151 |     type=str,
152 |     choices=["id", "trainId"],
153 |     default="id",
154 |     help="encoding of gt, trainId or id",
155 | )
156 | args = parser.parse_args()
157 | 
158 | 
159 | def convert_labels(sem, mapping):
160 |     p = tf.cast(sem, tf.uint8)
161 |     m = tf.ones_like(p) * 255
162 |     for i in range(0, len(mapping)):
163 |         mi = tf.multiply(tf.ones_like(p), mapping[i])
164 |         m = tf.where(tf.equal(p, i), mi, m)
165 |     return m
166 | 
167 | 
168 | prediction_placeholder = tf.placeholder(tf.int32)
169 | prediction_placeholder.set_shape([None, None, 1])
170 | gt_placeholder = tf.placeholder(tf.int32)
171 | 
172 | gt = gt_placeholder
173 | prediction = prediction_placeholder
174 | 
175 | if args.format_pred == "id":
176 |     prediction = convert_labels(prediction, id2trainId)
177 | if args.format_gt == "id":
178 |     gt = convert_labels(gt, id2trainId)
179 | 
180 | 
181 | pred_cat = convert_labels(prediction, trainId2cat)
182 | gt_cat = convert_labels(gt, trainId2cat)
183 | 
184 | ### INIT WEIGHTS MIOU
185 | weightsValue = tf.to_float(tf.not_equal(gt, args.ignore_label))
186 | ### IGNORE LABELS TO 0, WE HAVE ALREADY MASKED THOSE PIXELS WITH WEIGHTS 0###
187 | gt = tf.where(tf.equal(gt, args.ignore_label), tf.zeros_like(gt), gt)
188 | prediction = tf.where(
189 |     tf.equal(prediction, args.ignore_label), tf.zeros_like(prediction), prediction
190 | )
191 | ### ACCURACY ###
192 | acc, update_op_acc = tf.metrics.accuracy(gt, prediction, weights=weightsValue)
193 | ### MIOU ###
194 | miou, update_op = tf.metrics.mean_iou(
195 |     labels=tf.reshape(gt, [-1]),
196 |     predictions=tf.reshape(prediction, [-1]),
197 |     num_classes=num_train_classes,
198 |     weights=tf.reshape(weightsValue, [-1]),
199 | )
200 | 
201 | # CATEGORIES
202 | ### INIT WEIGHTS MIOU
203 | weightsValue_cat = tf.to_float(tf.not_equal(gt_cat, args.ignore_label))
204 | ### IGNORE LABELS TO 0, WE HAVE ALREADY MASKED THOSE PIXELS WITH WEIGHTS 0###
205 | gt_cat = tf.where(tf.equal(gt_cat, args.ignore_label), tf.zeros_like(gt_cat), gt_cat)
206 | pred_cat = tf.where(
207 |     tf.equal(pred_cat, args.ignore_label), tf.zeros_like(pred_cat), pred_cat
208 | )
209 | ### MIOU ###
210 | miou_cat, update_op_cat = tf.metrics.mean_iou(
211 |     labels=tf.reshape(gt_cat, [-1]),
212 |     predictions=tf.reshape(pred_cat, [-1]),
213 |     num_classes=num_categories,
214 |     weights=tf.reshape(weightsValue_cat, [-1]),
215 |     name="mean_iou_cat"
216 | )
217 | 
218 | init_op = [tf.global_variables_initializer(), tf.local_variables_initializer()]
219 | 
220 | miou_value = 0
221 | with tf.Session() as sess:
222 |     sess.run(init_op)
223 |     lines = open(args.filename_file).readlines()
224 |     lenght = len(lines)
225 | 
226 |     for idx, line in enumerate(lines):
227 |         base_path = line.strip()
228 |         prediction_folder = os.path.join(args.prediction_folder, base_path)
229 |         datapath = os.path.join(args.datapath, "training/semantic", base_path)
230 |         print("GT: ", datapath, " Pred: ", prediction_folder, idx, "/", lenght, end="\r")
231 | 
232 |         gt_value = cv2.imread(datapath, cv2.IMREAD_GRAYSCALE)
233 |         pred_value = cv2.imread(prediction_folder, cv2.IMREAD_GRAYSCALE)
234 | 
235 |         image_w = gt_value.shape[1]
236 |         image_h = gt_value.shape[0]
237 | 
238 |         if args.dataset == "cityscapes":
239 |             crop_height = (image_h * 4) // 5
240 |             gt_value = gt_value[:crop_height, :]
241 |             gt_value = cv2.resize(
242 |                 gt_value, (image_w, image_h), interpolation=cv2.INTER_NEAREST
243 |             )
244 | 
245 |         _, _, _ = sess.run(
246 |             [update_op_acc, update_op, update_op_cat],
247 |             feed_dict={
248 |                 prediction_placeholder: np.expand_dims(pred_value, axis=-1),
249 |                 gt_placeholder: np.expand_dims(gt_value, axis=-1),
250 |             },
251 |         )
252 |         acc_value, miou_value, miou_cat_value = sess.run(
253 |             [acc, miou, miou_cat],
254 |             feed_dict={
255 |                 prediction_placeholder: np.expand_dims(pred_value, axis=-1),
256 |                 gt_placeholder: np.expand_dims(gt_value, axis=-1),
257 |             },
258 |         )
259 | 
260 |     confusion_matrix = (
261 |         tf.get_default_graph()
262 |         .get_tensor_by_name("mean_iou/total_confusion_matrix:0")
263 |         .eval()
264 |     )
265 |     print("")
266 |     for cl in range(confusion_matrix.shape[0]):
267 |         tp_fn = np.sum(confusion_matrix[cl, :])
268 |         tp_fp = np.sum(confusion_matrix[:, cl])
269 |         tp = confusion_matrix[cl, cl]
270 |         if tp == 0 and (tp_fn + tp_fp - tp) == 0:
271 |             IoU_cl = float("nan")
272 |         else:
273 |             IoU_cl = tp / (tp_fn + tp_fp - tp)
274 |         print(trainId2name[cl] + ": {:.8f}".format(IoU_cl))
275 |     print("mIoU: " + str(miou_value))
276 |     print("mIoU Categories: " + str(miou_cat_value))
277 |     print("Pix. Acc.: " + str(acc_value))
278 | 


--------------------------------------------------------------------------------
/filenames/kitti_2015_test.txt:
--------------------------------------------------------------------------------
  1 | 000000_09.png 000000_10.png 000000_11.png
  2 | 000001_09.png 000001_10.png 000001_11.png
  3 | 000002_09.png 000002_10.png 000002_11.png
  4 | 000003_09.png 000003_10.png 000003_11.png
  5 | 000004_09.png 000004_10.png 000004_11.png
  6 | 000005_09.png 000005_10.png 000005_11.png
  7 | 000006_09.png 000006_10.png 000006_11.png
  8 | 000007_09.png 000007_10.png 000007_11.png
  9 | 000008_09.png 000008_10.png 000008_11.png
 10 | 000009_09.png 000009_10.png 000009_11.png
 11 | 000010_09.png 000010_10.png 000010_11.png
 12 | 000011_09.png 000011_10.png 000011_11.png
 13 | 000012_09.png 000012_10.png 000012_11.png
 14 | 000013_09.png 000013_10.png 000013_11.png
 15 | 000014_09.png 000014_10.png 000014_11.png
 16 | 000015_09.png 000015_10.png 000015_11.png
 17 | 000016_09.png 000016_10.png 000016_11.png
 18 | 000017_09.png 000017_10.png 000017_11.png
 19 | 000018_09.png 000018_10.png 000018_11.png
 20 | 000019_09.png 000019_10.png 000019_11.png
 21 | 000020_09.png 000020_10.png 000020_11.png
 22 | 000021_09.png 000021_10.png 000021_11.png
 23 | 000022_09.png 000022_10.png 000022_11.png
 24 | 000023_09.png 000023_10.png 000023_11.png
 25 | 000024_09.png 000024_10.png 000024_11.png
 26 | 000025_09.png 000025_10.png 000025_11.png
 27 | 000026_09.png 000026_10.png 000026_11.png
 28 | 000027_09.png 000027_10.png 000027_11.png
 29 | 000028_09.png 000028_10.png 000028_11.png
 30 | 000029_09.png 000029_10.png 000029_11.png
 31 | 000030_09.png 000030_10.png 000030_11.png
 32 | 000031_09.png 000031_10.png 000031_11.png
 33 | 000032_09.png 000032_10.png 000032_11.png
 34 | 000033_09.png 000033_10.png 000033_11.png
 35 | 000034_09.png 000034_10.png 000034_11.png
 36 | 000035_09.png 000035_10.png 000035_11.png
 37 | 000036_09.png 000036_10.png 000036_11.png
 38 | 000037_09.png 000037_10.png 000037_11.png
 39 | 000038_09.png 000038_10.png 000038_11.png
 40 | 000039_09.png 000039_10.png 000039_11.png
 41 | 000040_09.png 000040_10.png 000040_11.png
 42 | 000041_09.png 000041_10.png 000041_11.png
 43 | 000042_09.png 000042_10.png 000042_11.png
 44 | 000043_09.png 000043_10.png 000043_11.png
 45 | 000044_09.png 000044_10.png 000044_11.png
 46 | 000045_09.png 000045_10.png 000045_11.png
 47 | 000046_09.png 000046_10.png 000046_11.png
 48 | 000047_09.png 000047_10.png 000047_11.png
 49 | 000048_09.png 000048_10.png 000048_11.png
 50 | 000049_09.png 000049_10.png 000049_11.png
 51 | 000050_09.png 000050_10.png 000050_11.png
 52 | 000051_09.png 000051_10.png 000051_11.png
 53 | 000052_09.png 000052_10.png 000052_11.png
 54 | 000053_09.png 000053_10.png 000053_11.png
 55 | 000054_09.png 000054_10.png 000054_11.png
 56 | 000055_09.png 000055_10.png 000055_11.png
 57 | 000056_09.png 000056_10.png 000056_11.png
 58 | 000057_09.png 000057_10.png 000057_11.png
 59 | 000058_09.png 000058_10.png 000058_11.png
 60 | 000059_09.png 000059_10.png 000059_11.png
 61 | 000060_09.png 000060_10.png 000060_11.png
 62 | 000061_09.png 000061_10.png 000061_11.png
 63 | 000062_09.png 000062_10.png 000062_11.png
 64 | 000063_09.png 000063_10.png 000063_11.png
 65 | 000064_09.png 000064_10.png 000064_11.png
 66 | 000065_09.png 000065_10.png 000065_11.png
 67 | 000066_09.png 000066_10.png 000066_11.png
 68 | 000067_09.png 000067_10.png 000067_11.png
 69 | 000068_09.png 000068_10.png 000068_11.png
 70 | 000069_09.png 000069_10.png 000069_11.png
 71 | 000070_09.png 000070_10.png 000070_11.png
 72 | 000071_09.png 000071_10.png 000071_11.png
 73 | 000072_09.png 000072_10.png 000072_11.png
 74 | 000073_09.png 000073_10.png 000073_11.png
 75 | 000074_09.png 000074_10.png 000074_11.png
 76 | 000075_09.png 000075_10.png 000075_11.png
 77 | 000076_09.png 000076_10.png 000076_11.png
 78 | 000077_09.png 000077_10.png 000077_11.png
 79 | 000078_09.png 000078_10.png 000078_11.png
 80 | 000079_09.png 000079_10.png 000079_11.png
 81 | 000080_09.png 000080_10.png 000080_11.png
 82 | 000081_09.png 000081_10.png 000081_11.png
 83 | 000082_09.png 000082_10.png 000082_11.png
 84 | 000083_09.png 000083_10.png 000083_11.png
 85 | 000084_09.png 000084_10.png 000084_11.png
 86 | 000085_09.png 000085_10.png 000085_11.png
 87 | 000086_09.png 000086_10.png 000086_11.png
 88 | 000087_09.png 000087_10.png 000087_11.png
 89 | 000088_09.png 000088_10.png 000088_11.png
 90 | 000089_09.png 000089_10.png 000089_11.png
 91 | 000090_09.png 000090_10.png 000090_11.png
 92 | 000091_09.png 000091_10.png 000091_11.png
 93 | 000092_09.png 000092_10.png 000092_11.png
 94 | 000093_09.png 000093_10.png 000093_11.png
 95 | 000094_09.png 000094_10.png 000094_11.png
 96 | 000095_09.png 000095_10.png 000095_11.png
 97 | 000096_09.png 000096_10.png 000096_11.png
 98 | 000097_09.png 000097_10.png 000097_11.png
 99 | 000098_09.png 000098_10.png 000098_11.png
100 | 000099_09.png 000099_10.png 000099_11.png
101 | 000100_09.png 000100_10.png 000100_11.png
102 | 000101_09.png 000101_10.png 000101_11.png
103 | 000102_09.png 000102_10.png 000102_11.png
104 | 000103_09.png 000103_10.png 000103_11.png
105 | 000104_09.png 000104_10.png 000104_11.png
106 | 000105_09.png 000105_10.png 000105_11.png
107 | 000106_09.png 000106_10.png 000106_11.png
108 | 000107_09.png 000107_10.png 000107_11.png
109 | 000108_09.png 000108_10.png 000108_11.png
110 | 000109_09.png 000109_10.png 000109_11.png
111 | 000110_09.png 000110_10.png 000110_11.png
112 | 000111_09.png 000111_10.png 000111_11.png
113 | 000112_09.png 000112_10.png 000112_11.png
114 | 000113_09.png 000113_10.png 000113_11.png
115 | 000114_09.png 000114_10.png 000114_11.png
116 | 000115_09.png 000115_10.png 000115_11.png
117 | 000116_09.png 000116_10.png 000116_11.png
118 | 000117_09.png 000117_10.png 000117_11.png
119 | 000118_09.png 000118_10.png 000118_11.png
120 | 000119_09.png 000119_10.png 000119_11.png
121 | 000120_09.png 000120_10.png 000120_11.png
122 | 000121_09.png 000121_10.png 000121_11.png
123 | 000122_09.png 000122_10.png 000122_11.png
124 | 000123_09.png 000123_10.png 000123_11.png
125 | 000124_09.png 000124_10.png 000124_11.png
126 | 000125_09.png 000125_10.png 000125_11.png
127 | 000126_09.png 000126_10.png 000126_11.png
128 | 000127_09.png 000127_10.png 000127_11.png
129 | 000128_09.png 000128_10.png 000128_11.png
130 | 000129_09.png 000129_10.png 000129_11.png
131 | 000130_09.png 000130_10.png 000130_11.png
132 | 000131_09.png 000131_10.png 000131_11.png
133 | 000132_09.png 000132_10.png 000132_11.png
134 | 000133_09.png 000133_10.png 000133_11.png
135 | 000134_09.png 000134_10.png 000134_11.png
136 | 000135_09.png 000135_10.png 000135_11.png
137 | 000136_09.png 000136_10.png 000136_11.png
138 | 000137_09.png 000137_10.png 000137_11.png
139 | 000138_09.png 000138_10.png 000138_11.png
140 | 000139_09.png 000139_10.png 000139_11.png
141 | 000140_09.png 000140_10.png 000140_11.png
142 | 000141_09.png 000141_10.png 000141_11.png
143 | 000142_09.png 000142_10.png 000142_11.png
144 | 000143_09.png 000143_10.png 000143_11.png
145 | 000144_09.png 000144_10.png 000144_11.png
146 | 000145_09.png 000145_10.png 000145_11.png
147 | 000146_09.png 000146_10.png 000146_11.png
148 | 000147_09.png 000147_10.png 000147_11.png
149 | 000148_09.png 000148_10.png 000148_11.png
150 | 000149_09.png 000149_10.png 000149_11.png
151 | 000150_09.png 000150_10.png 000150_11.png
152 | 000151_09.png 000151_10.png 000151_11.png
153 | 000152_09.png 000152_10.png 000152_11.png
154 | 000153_09.png 000153_10.png 000153_11.png
155 | 000154_09.png 000154_10.png 000154_11.png
156 | 000155_09.png 000155_10.png 000155_11.png
157 | 000156_09.png 000156_10.png 000156_11.png
158 | 000157_09.png 000157_10.png 000157_11.png
159 | 000158_09.png 000158_10.png 000158_11.png
160 | 000159_09.png 000159_10.png 000159_11.png
161 | 000160_09.png 000160_10.png 000160_11.png
162 | 000161_09.png 000161_10.png 000161_11.png
163 | 000162_09.png 000162_10.png 000162_11.png
164 | 000163_09.png 000163_10.png 000163_11.png
165 | 000164_09.png 000164_10.png 000164_11.png
166 | 000165_09.png 000165_10.png 000165_11.png
167 | 000166_09.png 000166_10.png 000166_11.png
168 | 000167_09.png 000167_10.png 000167_11.png
169 | 000168_09.png 000168_10.png 000168_11.png
170 | 000169_09.png 000169_10.png 000169_11.png
171 | 000170_09.png 000170_10.png 000170_11.png
172 | 000171_09.png 000171_10.png 000171_11.png
173 | 000172_09.png 000172_10.png 000172_11.png
174 | 000173_09.png 000173_10.png 000173_11.png
175 | 000174_09.png 000174_10.png 000174_11.png
176 | 000175_09.png 000175_10.png 000175_11.png
177 | 000176_09.png 000176_10.png 000176_11.png
178 | 000177_09.png 000177_10.png 000177_11.png
179 | 000178_09.png 000178_10.png 000178_11.png
180 | 000179_09.png 000179_10.png 000179_11.png
181 | 000180_09.png 000180_10.png 000180_11.png
182 | 000181_09.png 000181_10.png 000181_11.png
183 | 000182_09.png 000182_10.png 000182_11.png
184 | 000183_09.png 000183_10.png 000183_11.png
185 | 000184_09.png 000184_10.png 000184_11.png
186 | 000185_09.png 000185_10.png 000185_11.png
187 | 000186_09.png 000186_10.png 000186_11.png
188 | 000187_09.png 000187_10.png 000187_11.png
189 | 000188_09.png 000188_10.png 000188_11.png
190 | 000189_09.png 000189_10.png 000189_11.png
191 | 000190_09.png 000190_10.png 000190_11.png
192 | 000191_09.png 000191_10.png 000191_11.png
193 | 000192_09.png 000192_10.png 000192_11.png
194 | 000193_09.png 000193_10.png 000193_11.png
195 | 000194_09.png 000194_10.png 000194_11.png
196 | 000195_09.png 000195_10.png 000195_11.png
197 | 000196_09.png 000196_10.png 000196_11.png
198 | 000197_09.png 000197_10.png 000197_11.png
199 | 000198_09.png 000198_10.png 000198_11.png
200 | 000199_09.png 000199_10.png 000199_11.png
201 | 


--------------------------------------------------------------------------------
/filenames/kitti_2015_test_semantic.txt:
--------------------------------------------------------------------------------
  1 | 000000_10.png
  2 | 000001_10.png
  3 | 000002_10.png
  4 | 000003_10.png
  5 | 000004_10.png
  6 | 000005_10.png
  7 | 000006_10.png
  8 | 000007_10.png
  9 | 000008_10.png
 10 | 000009_10.png
 11 | 000010_10.png
 12 | 000011_10.png
 13 | 000012_10.png
 14 | 000013_10.png
 15 | 000014_10.png
 16 | 000015_10.png
 17 | 000016_10.png
 18 | 000017_10.png
 19 | 000018_10.png
 20 | 000019_10.png
 21 | 000020_10.png
 22 | 000021_10.png
 23 | 000022_10.png
 24 | 000023_10.png
 25 | 000024_10.png
 26 | 000025_10.png
 27 | 000026_10.png
 28 | 000027_10.png
 29 | 000028_10.png
 30 | 000029_10.png
 31 | 000030_10.png
 32 | 000031_10.png
 33 | 000032_10.png
 34 | 000033_10.png
 35 | 000034_10.png
 36 | 000035_10.png
 37 | 000036_10.png
 38 | 000037_10.png
 39 | 000038_10.png
 40 | 000039_10.png
 41 | 000040_10.png
 42 | 000041_10.png
 43 | 000042_10.png
 44 | 000043_10.png
 45 | 000044_10.png
 46 | 000045_10.png
 47 | 000046_10.png
 48 | 000047_10.png
 49 | 000048_10.png
 50 | 000049_10.png
 51 | 000050_10.png
 52 | 000051_10.png
 53 | 000052_10.png
 54 | 000053_10.png
 55 | 000054_10.png
 56 | 000055_10.png
 57 | 000056_10.png
 58 | 000057_10.png
 59 | 000058_10.png
 60 | 000059_10.png
 61 | 000060_10.png
 62 | 000061_10.png
 63 | 000062_10.png
 64 | 000063_10.png
 65 | 000064_10.png
 66 | 000065_10.png
 67 | 000066_10.png
 68 | 000067_10.png
 69 | 000068_10.png
 70 | 000069_10.png
 71 | 000070_10.png
 72 | 000071_10.png
 73 | 000072_10.png
 74 | 000073_10.png
 75 | 000074_10.png
 76 | 000075_10.png
 77 | 000076_10.png
 78 | 000077_10.png
 79 | 000078_10.png
 80 | 000079_10.png
 81 | 000080_10.png
 82 | 000081_10.png
 83 | 000082_10.png
 84 | 000083_10.png
 85 | 000084_10.png
 86 | 000085_10.png
 87 | 000086_10.png
 88 | 000087_10.png
 89 | 000088_10.png
 90 | 000089_10.png
 91 | 000090_10.png
 92 | 000091_10.png
 93 | 000092_10.png
 94 | 000093_10.png
 95 | 000094_10.png
 96 | 000095_10.png
 97 | 000096_10.png
 98 | 000097_10.png
 99 | 000098_10.png
100 | 000099_10.png
101 | 000100_10.png
102 | 000101_10.png
103 | 000102_10.png
104 | 000103_10.png
105 | 000104_10.png
106 | 000105_10.png
107 | 000106_10.png
108 | 000107_10.png
109 | 000108_10.png
110 | 000109_10.png
111 | 000110_10.png
112 | 000111_10.png
113 | 000112_10.png
114 | 000113_10.png
115 | 000114_10.png
116 | 000115_10.png
117 | 000116_10.png
118 | 000117_10.png
119 | 000118_10.png
120 | 000119_10.png
121 | 000120_10.png
122 | 000121_10.png
123 | 000122_10.png
124 | 000123_10.png
125 | 000124_10.png
126 | 000125_10.png
127 | 000126_10.png
128 | 000127_10.png
129 | 000128_10.png
130 | 000129_10.png
131 | 000130_10.png
132 | 000131_10.png
133 | 000132_10.png
134 | 000133_10.png
135 | 000134_10.png
136 | 000135_10.png
137 | 000136_10.png
138 | 000137_10.png
139 | 000138_10.png
140 | 000139_10.png
141 | 000140_10.png
142 | 000141_10.png
143 | 000142_10.png
144 | 000143_10.png
145 | 000144_10.png
146 | 000145_10.png
147 | 000146_10.png
148 | 000147_10.png
149 | 000148_10.png
150 | 000149_10.png
151 | 000150_10.png
152 | 000151_10.png
153 | 000152_10.png
154 | 000153_10.png
155 | 000154_10.png
156 | 000155_10.png
157 | 000156_10.png
158 | 000157_10.png
159 | 000158_10.png
160 | 000159_10.png
161 | 000160_10.png
162 | 000161_10.png
163 | 000162_10.png
164 | 000163_10.png
165 | 000164_10.png
166 | 000165_10.png
167 | 000166_10.png
168 | 000167_10.png
169 | 000168_10.png
170 | 000169_10.png
171 | 000170_10.png
172 | 000171_10.png
173 | 000172_10.png
174 | 000173_10.png
175 | 000174_10.png
176 | 000175_10.png
177 | 000176_10.png
178 | 000177_10.png
179 | 000178_10.png
180 | 000179_10.png
181 | 000180_10.png
182 | 000181_10.png
183 | 000182_10.png
184 | 000183_10.png
185 | 000184_10.png
186 | 000185_10.png
187 | 000186_10.png
188 | 000187_10.png
189 | 000188_10.png
190 | 000189_10.png
191 | 000190_10.png
192 | 000191_10.png
193 | 000192_10.png
194 | 000193_10.png
195 | 000194_10.png
196 | 000195_10.png
197 | 000196_10.png
198 | 000197_10.png
199 | 000198_10.png
200 | 000199_10.png
201 | 


--------------------------------------------------------------------------------
/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/helpers/__init__.py


--------------------------------------------------------------------------------
/helpers/bilinear_sampler.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utility functions
  3 | 
  4 | Part of this code is based on https://github.com/tinghuiz/SfMLearner/blob/master/utils.py,
  5 | published under MIT License.
  6 | We would like to thank T. Zhou and other authors for sharing their code
  7 | 
  8 | """
  9 | from __future__ import division
 10 | import numpy as np
 11 | import tensorflow as tf
 12 | 
 13 | 
 14 | def euler2mat(z, y, x):
 15 |     """Converts euler angles to rotation matrix
 16 |         TODO: remove the dimension for 'N' (deprecated for converting all source
 17 |                 poses altogether)
 18 |         Reference: https://github.com/pulkitag/pycaffe-utils/blob/master/rot_utils.py#L174
 19 |         Args:
 20 |             z: rotation angle along z axis (in radians) -- size = [B, N]
 21 |             y: rotation angle along y axis (in radians) -- size = [B, N]
 22 |             x: rotation angle along x axis (in radians) -- size = [B, N]
 23 |         Returns:
 24 |             Rotation matrix corresponding to the euler angles -- size = [B, N, 3, 3]
 25 |     """
 26 |     with tf.variable_scope("euler2mat"):
 27 |         B = tf.shape(z)[0]
 28 |         N = 1
 29 |         z = tf.clip_by_value(z, -np.pi, np.pi)
 30 |         y = tf.clip_by_value(y, -np.pi, np.pi)
 31 |         x = tf.clip_by_value(x, -np.pi, np.pi)
 32 | 
 33 |         # Expand to B x N x 1 x 1
 34 |         z = tf.expand_dims(tf.expand_dims(z, -1), -1)
 35 |         y = tf.expand_dims(tf.expand_dims(y, -1), -1)
 36 |         x = tf.expand_dims(tf.expand_dims(x, -1), -1)
 37 | 
 38 |         zeros = tf.zeros([B, N, 1, 1])
 39 |         ones = tf.ones([B, N, 1, 1])
 40 | 
 41 |         cosz = tf.cos(z)
 42 |         sinz = tf.sin(z)
 43 |         rotz_1 = tf.concat([cosz, -sinz, zeros], axis=3)
 44 |         rotz_2 = tf.concat([sinz, cosz, zeros], axis=3)
 45 |         rotz_3 = tf.concat([zeros, zeros, ones], axis=3)
 46 |         zmat = tf.concat([rotz_1, rotz_2, rotz_3], axis=2)
 47 | 
 48 |         cosy = tf.cos(y)
 49 |         siny = tf.sin(y)
 50 |         roty_1 = tf.concat([cosy, zeros, siny], axis=3)
 51 |         roty_2 = tf.concat([zeros, ones, zeros], axis=3)
 52 |         roty_3 = tf.concat([-siny, zeros, cosy], axis=3)
 53 |         ymat = tf.concat([roty_1, roty_2, roty_3], axis=2)
 54 | 
 55 |         cosx = tf.cos(x)
 56 |         sinx = tf.sin(x)
 57 |         rotx_1 = tf.concat([ones, zeros, zeros], axis=3)
 58 |         rotx_2 = tf.concat([zeros, cosx, -sinx], axis=3)
 59 |         rotx_3 = tf.concat([zeros, sinx, cosx], axis=3)
 60 |         xmat = tf.concat([rotx_1, rotx_2, rotx_3], axis=2)
 61 | 
 62 |         rotMat = tf.matmul(tf.matmul(xmat, ymat), zmat)
 63 |         return rotMat
 64 | 
 65 | 
 66 | def pose_vec2mat(vec):
 67 |     """Converts 6DoF parameters to transformation matrix
 68 |         Args:
 69 |             vec: 6DoF parameters in the order of tx, ty, tz, rx, ry, rz -- [B, 6]
 70 |         Returns:
 71 |             A transformation matrix -- [B, 4, 4]
 72 |     """
 73 |     with tf.variable_scope("vec2mat"):
 74 |         batch_size, _ = vec.get_shape().as_list()
 75 |         translation = tf.slice(vec, [0, 0], [-1, 3])
 76 |         translation = tf.expand_dims(translation, -1)
 77 |         rx = tf.slice(vec, [0, 3], [-1, 1])
 78 |         ry = tf.slice(vec, [0, 4], [-1, 1])
 79 |         rz = tf.slice(vec, [0, 5], [-1, 1])
 80 |         rot_mat = euler2mat(rz, ry, rx)
 81 |         rot_mat = tf.squeeze(rot_mat, axis=[1])
 82 |         filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
 83 |         filler = tf.tile(filler, [batch_size, 1, 1])
 84 |         transform_mat = tf.concat([rot_mat, translation], axis=2)
 85 |         transform_mat = tf.concat([transform_mat, filler], axis=1)
 86 |         return transform_mat
 87 | 
 88 | 
 89 | def pixel2cam(depth, pixel_coords, intrinsics, is_homogeneous=True):
 90 |     """Transforms coordinates in the pixel frame to the camera frame.
 91 |         Args:
 92 |             depth: [batch, height, width]
 93 |             pixel_coords: homogeneous pixel coordinates [batch, 3, height, width]
 94 |             intrinsics: camera intrinsics [batch, 3, 3]
 95 |             is_homogeneous: return in homogeneous coordinates
 96 |         Returns:
 97 |             Coords in the camera frame [batch, 3 (4 if homogeneous), height, width]
 98 |     """
 99 |     with tf.variable_scope("pixel2cam"):
100 |         batch, height, width = depth.get_shape().as_list()
101 |         depth = tf.reshape(depth, [batch, 1, -1])
102 |         pixel_coords = tf.reshape(pixel_coords, [batch, 3, -1])
103 |         cam_coords = tf.matmul(tf.matrix_inverse(intrinsics), pixel_coords) * depth
104 |         if is_homogeneous:
105 |             ones = tf.ones([batch, 1, height * width])
106 |             cam_coords = tf.concat([cam_coords, ones], axis=1)
107 |         cam_coords = tf.reshape(cam_coords, [batch, -1, height, width])
108 |         return cam_coords
109 | 
110 | 
111 | def cam2pixel(cam_coords, proj):
112 |     """Transforms coordinates in a camera frame to the pixel frame.
113 |         Args:
114 |             cam_coords: [batch, 4, height, width]
115 |             proj: [batch, 4, 4]
116 |         Returns:
117 |             Pixel coordinates projected from the camera frame [batch, height, width, 2]
118 |     """
119 |     with tf.variable_scope("cam2pixel"):
120 |         batch, _, height, width = cam_coords.get_shape().as_list()
121 |         cam_coords = tf.reshape(cam_coords, [batch, 4, -1])
122 |         unnormalized_pixel_coords = tf.matmul(proj, cam_coords)
123 |         x_u = tf.slice(unnormalized_pixel_coords, [0, 0, 0], [-1, 1, -1])
124 |         y_u = tf.slice(unnormalized_pixel_coords, [0, 1, 0], [-1, 1, -1])
125 |         z_u = tf.slice(unnormalized_pixel_coords, [0, 2, 0], [-1, 1, -1])
126 |         x_n = x_u / (z_u + 1e-10)
127 |         y_n = y_u / (z_u + 1e-10)
128 |         pixel_coords = tf.concat([x_n, y_n], axis=1)
129 |         pixel_coords = tf.reshape(pixel_coords, [batch, 2, height, width])
130 |         return tf.transpose(pixel_coords, perm=[0, 2, 3, 1])
131 | 
132 | 
133 | def meshgrid(batch, height, width, is_homogeneous=True):
134 |     """Construct a 2D meshgrid.
135 |         Args:
136 |             batch: batch size
137 |             height: height of the grid
138 |             width: width of the grid
139 |             is_homogeneous: whether to return in homogeneous coordinates
140 |         Returns:
141 |             x,y grid coordinates [batch, 2 (3 if homogeneous), height, width]
142 |     """
143 |     x_t = tf.matmul(
144 |         tf.ones(shape=tf.stack([height, 1])),
145 |         tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0]),
146 |     )
147 |     y_t = tf.matmul(
148 |         tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
149 |         tf.ones(shape=tf.stack([1, width])),
150 |     )
151 |     x_t = (x_t + 1.0) * 0.5 * tf.cast(width - 1, tf.float32)
152 |     y_t = (y_t + 1.0) * 0.5 * tf.cast(height - 1, tf.float32)
153 |     if is_homogeneous:
154 |         ones = tf.ones_like(x_t)
155 |         coords = tf.stack([x_t, y_t, ones], axis=0)
156 |     else:
157 |         coords = tf.stack([x_t, y_t], axis=0)
158 |     coords = tf.tile(tf.expand_dims(coords, 0), [batch, 1, 1, 1])
159 |     return coords
160 | 
161 | 
162 | def flow_warp(src_img, flow):
163 |     """ inverse warp a source image to the target image plane based on flow field
164 |         Args:
165 |             src_img: the source  image [batch, height_s, width_s, 3]
166 |             flow: target image to source image flow [batch, height_t, width_t, 2]
167 |         Returns:
168 |             Source image inverse warped to the target image plane [batch, height_t, width_t, 3]
169 |     """
170 |     with tf.variable_scope("flow_warp"):
171 |         batch, height, width, _ = src_img.get_shape().as_list()
172 |         tgt_pixel_coords = tf.transpose(
173 |             meshgrid(batch, height, width, False), [0, 2, 3, 1]
174 |         )
175 |         src_pixel_coords = tgt_pixel_coords + flow
176 |         output_img = bilinear_sampler(src_img, src_pixel_coords)
177 |         return output_img
178 | 
179 | 
180 | def compute_rigid_flow(depth, pose, intrinsics, reverse_pose=False):
181 |     """Compute the rigid flow from target image plane to source image
182 |         Args:
183 |             depth: depth map of the target image [batch, height_t, width_t]
184 |             pose: target to source (or source to target if reverse_pose=True)
185 |                 camera transformation matrix [batch, 6], in the order of
186 |                 tx, ty, tz, rx, ry, rz;
187 |             intrinsics: camera intrinsics [batch, 3, 3]
188 |         Returns:
189 |             Rigid flow from target image to source image [batch, height_t, width_t, 2]
190 |     """
191 |     with tf.variable_scope("compute_rigid_flow"):
192 |         batch, height, width = depth.get_shape().as_list()
193 |         # Convert pose vector to matrix
194 |         pose = pose_vec2mat(pose)
195 |         if reverse_pose:
196 |             pose = tf.matrix_inverse(pose)
197 |         # Construct pixel grid coordinates
198 |         pixel_coords = meshgrid(batch, height, width)
199 |         tgt_pixel_coords = tf.transpose(pixel_coords[:, :2, :, :], [0, 2, 3, 1])
200 |         # Convert pixel coordinates to the camera frame
201 |         cam_coords = pixel2cam(depth, pixel_coords, intrinsics)
202 |         # Construct a 4x4 intrinsic matrix
203 |         filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
204 |         filler = tf.tile(filler, [batch, 1, 1])
205 |         intrinsics = tf.concat([intrinsics, tf.zeros([batch, 3, 1])], axis=2)
206 |         intrinsics = tf.concat([intrinsics, filler], axis=1)
207 |         # Get a 4x4 transformation matrix from 'target' camera frame to 'source'
208 |         # pixel frame.
209 |         proj_tgt_cam_to_src_pixel = tf.matmul(intrinsics, pose)
210 |         src_pixel_coords = cam2pixel(cam_coords, proj_tgt_cam_to_src_pixel)
211 |         rigid_flow = src_pixel_coords - tgt_pixel_coords
212 |         return rigid_flow
213 | 
214 | 
215 | def bilinear_sampler(imgs, coords):
216 |     """Construct a new image by bilinear sampling from the input image.
217 |         Points falling outside the source image boundary have value 0.
218 |         Args:
219 |             imgs: source image to be sampled from [batch, height_s, width_s, channels]
220 |             coords: coordinates of source pixels to sample from [batch, height_t,
221 |             width_t, 2]. height_t/width_t correspond to the dimensions of the output
222 |             image (don't need to be the same as height_s/width_s). The two channels
223 |             correspond to x and y coordinates respectively.
224 |         Returns:
225 |             A new sampled image [batch, height_t, width_t, channels]
226 |     """
227 | 
228 |     def _repeat(x, n_repeats):
229 |         rep = tf.transpose(
230 |             tf.expand_dims(tf.ones(shape=tf.stack([n_repeats,])), 1), [1, 0]
231 |         )
232 |         rep = tf.cast(rep, "float32")
233 |         x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
234 |         return tf.reshape(x, [-1])
235 | 
236 |     with tf.name_scope("image_sampling"):
237 |         coords_x, coords_y = tf.split(coords, [1, 1], axis=3)
238 |         inp_size = imgs.get_shape()
239 |         coord_size = coords.get_shape()
240 |         out_size = coords.get_shape().as_list()
241 |         out_size[3] = imgs.get_shape().as_list()[3]
242 | 
243 |         coords_x = tf.cast(coords_x, "float32")
244 |         coords_y = tf.cast(coords_y, "float32")
245 | 
246 |         x0 = tf.floor(coords_x)
247 |         x1 = x0 + 1
248 |         y0 = tf.floor(coords_y)
249 |         y1 = y0 + 1
250 | 
251 |         y_max = tf.cast(tf.shape(imgs)[1] - 1, "float32")
252 |         x_max = tf.cast(tf.shape(imgs)[2] - 1, "float32")
253 |         # zero = tf.zeros([1], dtype='float32')
254 |         zero = tf.constant(0, dtype=tf.float32)
255 | 
256 |         x0_safe = tf.clip_by_value(x0, zero, x_max)
257 |         y0_safe = tf.clip_by_value(y0, zero, y_max)
258 |         x1_safe = tf.clip_by_value(x1, zero, x_max)
259 |         y1_safe = tf.clip_by_value(y1, zero, y_max)
260 | 
261 |         wt_x0 = x1_safe - coords_x
262 |         wt_x1 = coords_x - x0_safe
263 |         wt_y0 = y1_safe - coords_y
264 |         wt_y1 = coords_y - y0_safe
265 | 
266 |         ## indices in the flat image to sample from
267 |         dim2 = tf.cast(inp_size[2], "float32")
268 |         dim1 = tf.cast(inp_size[2] * inp_size[1], "float32")
269 |         base = tf.reshape(
270 |             _repeat(
271 |                 tf.cast(tf.range(coord_size[0]), "float32") * dim1,
272 |                 coord_size[1] * coord_size[2],
273 |             ),
274 |             [out_size[0], out_size[1], out_size[2], 1],
275 |         )
276 | 
277 |         base_y0 = base + y0_safe * dim2
278 |         base_y1 = base + y1_safe * dim2
279 |         idx00 = tf.reshape(x0_safe + base_y0, [-1])
280 |         idx01 = x0_safe + base_y1
281 |         idx10 = x1_safe + base_y0
282 |         idx11 = x1_safe + base_y1
283 | 
284 |         ## sample from imgs
285 |         imgs_flat = tf.reshape(imgs, tf.stack([-1, inp_size[3]]))
286 |         imgs_flat = tf.cast(imgs_flat, "float32")
287 |         im00 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx00, "int32")), out_size)
288 |         im01 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx01, "int32")), out_size)
289 |         im10 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx10, "int32")), out_size)
290 |         im11 = tf.reshape(tf.gather(imgs_flat, tf.cast(idx11, "int32")), out_size)
291 | 
292 |         w00 = wt_x0 * wt_y0
293 |         w01 = wt_x0 * wt_y1
294 |         w10 = wt_x1 * wt_y0
295 |         w11 = wt_x1 * wt_y1
296 | 
297 |         output = tf.add_n([w00 * im00, w01 * im01, w10 * im10, w11 * im11])
298 |         return output
299 | 


--------------------------------------------------------------------------------
/helpers/depth_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Evaluation utils.
  3 | This code is based on https://github.com/mrharicot/monodepth/blob/master/utils/evaluation_utils.py
  4 | We would like to thank C. Godard and other authors for sharing their code
  5 | """
  6 | 
  7 | import os
  8 | import numpy as np
  9 | import pandas as pd
 10 | import cv2
 11 | from collections import Counter
 12 | import pickle
 13 | 
 14 | 
 15 | def compute_errors(gt, pred):
 16 |     thresh = np.maximum((gt / pred), (pred / gt))
 17 |     a1 = (thresh < 1.25).mean()
 18 |     a2 = (thresh < 1.25 ** 2).mean()
 19 |     a3 = (thresh < 1.25 ** 3).mean()
 20 | 
 21 |     rmse = (gt - pred) ** 2
 22 |     rmse = np.sqrt(rmse.mean())
 23 | 
 24 |     rmse_log = (np.log(gt) - np.log(pred)) ** 2
 25 |     rmse_log = np.sqrt(rmse_log.mean())
 26 | 
 27 |     abs_rel = np.mean(np.abs(gt - pred) / gt)
 28 | 
 29 |     sq_rel = np.mean(((gt - pred) ** 2) / gt)
 30 | 
 31 |     return abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
 32 | 
 33 | 
 34 | ###############################################################################
 35 | #######################  KITTI
 36 | 
 37 | width_to_focal = dict()
 38 | width_to_focal[1242] = 721.5377
 39 | width_to_focal[1241] = 718.856
 40 | width_to_focal[1224] = 707.0493
 41 | width_to_focal[1238] = 718.3351
 42 | 
 43 | 
 44 | def load_gt_disp_kitti(path):
 45 |     gt_disparities = []
 46 |     for i in range(200):
 47 |         disp = cv2.imread(
 48 |             path + "/training/disp_noc_0/" + str(i).zfill(6) + "_10.png", -1
 49 |         )
 50 |         disp = disp.astype(np.float32) / 256
 51 |         gt_disparities.append(disp)
 52 |     return gt_disparities
 53 | 
 54 | 
 55 | def convert_disps_to_depths_kitti(gt_disparities, pred_disparities):
 56 |     gt_depths = []
 57 |     pred_depths = []
 58 |     pred_disparities_resized = []
 59 | 
 60 |     for i in range(len(gt_disparities)):
 61 |         gt_disp = gt_disparities[i]
 62 |         height, width = gt_disp.shape
 63 | 
 64 |         pred_disp = pred_disparities[i]
 65 |         pred_disparities_resized.append(pred_disp)
 66 | 
 67 |         mask = gt_disp > 0
 68 | 
 69 |         gt_depth = width_to_focal[width] * 0.54 / (gt_disp + (1.0 - mask))
 70 |         pred_depth = width_to_focal[width] * 0.54 / pred_disp
 71 | 
 72 |         gt_depths.append(gt_depth)
 73 |         pred_depths.append(pred_depth)
 74 |     return gt_depths, pred_depths, pred_disparities_resized
 75 | 
 76 | 
 77 | ###############################################################################
 78 | #######################  EIGEN
 79 | 
 80 | 
 81 | def read_text_lines(file_path):
 82 |     f = open(file_path, "r")
 83 |     lines = f.readlines()
 84 |     f.close()
 85 |     lines = [l.rstrip() for l in lines]
 86 |     return lines
 87 | 
 88 | 
 89 | def read_file_data(files, data_root):
 90 |     gt_files = []
 91 |     gt_calib = []
 92 |     im_sizes = []
 93 |     im_files = []
 94 |     cams = []
 95 |     num_probs = 0
 96 |     for filename in files:
 97 |         filename = filename.split()[0]
 98 |         splits = filename.split("/")
 99 |         camera_id = np.int32(splits[2][-1:])  # 2 is left, 3 is right
100 |         date = splits[0]
101 |         im_id = splits[4][:10]
102 |         file_root = "{}/{}"
103 | 
104 |         im = filename
105 |         vel = "{}/{}/velodyne_points/data/{}.bin".format(splits[0], splits[1], im_id)
106 | 
107 |         if os.path.isfile(data_root + im):
108 |             gt_files.append(data_root + vel)
109 |             gt_calib.append(data_root + date + "/")
110 |             im_sizes.append(cv2.imread(data_root + im).shape[:2])
111 |             im_files.append(data_root + im)
112 |             cams.append(2)
113 |         else:
114 |             num_probs += 1
115 |             print("{} missing".format(data_root + im))
116 | 
117 |     return gt_files, gt_calib, im_sizes, im_files, cams
118 | 
119 | 
120 | def load_velodyne_points(file_name):
121 |     # adapted from https://github.com/hunse/kitti
122 |     points = np.fromfile(file_name, dtype=np.float32).reshape(-1, 4)
123 |     points[:, 3] = 1.0  # homogeneous
124 |     return points
125 | 
126 | 
127 | def lin_interp(shape, xyd):
128 |     # taken from https://github.com/hunse/kitti
129 |     m, n = shape
130 |     ij, d = xyd[:, 1::-1], xyd[:, 2]
131 |     f = LinearNDInterpolator(ij, d, fill_value=0)
132 |     J, I = np.meshgrid(np.arange(n), np.arange(m))
133 |     IJ = np.vstack([I.flatten(), J.flatten()]).T
134 |     disparity = f(IJ).reshape(shape)
135 |     return disparity
136 | 
137 | 
138 | def read_calib_file(path):
139 |     # taken from https://github.com/hunse/kitti
140 |     float_chars = set("0123456789.e+- ")
141 |     data = {}
142 |     with open(path, "r") as f:
143 |         for line in f.readlines():
144 |             key, value = line.split(":", 1)
145 |             value = value.strip()
146 |             data[key] = value
147 |             if float_chars.issuperset(value):
148 |                 # try to cast to float array
149 |                 try:
150 |                     # NOTE: as reported in ISSUE #224 of Monodepth
151 |                     # https://github.com/mrharicot/monodepth/issues/224
152 | 
153 |                     # data[key] = np.array(map(float, value.split(" ")))
154 |                     data[key] = np.array(list(map(float, value.split(" "))))
155 | 
156 |                 except ValueError:
157 |                     # casting error: data[key] already eq. value, so pass
158 |                     pass
159 | 
160 |     return data
161 | 
162 | 
163 | def get_focal_length_baseline(calib_dir, cam):
164 |     cam2cam = read_calib_file(calib_dir + "calib_cam_to_cam.txt")
165 |     P2_rect = cam2cam["P_rect_02"].reshape(3, 4)
166 |     P3_rect = cam2cam["P_rect_03"].reshape(3, 4)
167 | 
168 |     # cam 2 is left of camera 0  -6cm
169 |     # cam 3 is to the right  +54cm
170 |     b2 = P2_rect[0, 3] / -P2_rect[0, 0]
171 |     b3 = P3_rect[0, 3] / -P3_rect[0, 0]
172 |     baseline = b3 - b2
173 | 
174 |     if cam == 2:
175 |         focal_length = P2_rect[0, 0]
176 |     elif cam == 3:
177 |         focal_length = P3_rect[0, 0]
178 | 
179 |     return focal_length, baseline
180 | 
181 | 
182 | def sub2ind(matrixSize, rowSub, colSub):
183 |     m, n = matrixSize
184 |     return rowSub * (n - 1) + colSub - 1
185 | 
186 | 
187 | def generate_depth_map(
188 |     calib_dir, velo_file_name, im_shape, cam=2, interp=False, vel_depth=False
189 | ):
190 |     # load calibration files
191 |     cam2cam = read_calib_file(calib_dir + "calib_cam_to_cam.txt")
192 |     velo2cam = read_calib_file(calib_dir + "calib_velo_to_cam.txt")
193 |     velo2cam = np.hstack((velo2cam["R"].reshape(3, 3), velo2cam["T"][..., np.newaxis]))
194 |     velo2cam = np.vstack((velo2cam, np.array([0, 0, 0, 1.0])))
195 | 
196 |     # compute projection matrix velodyne->image plane
197 |     R_cam2rect = np.eye(4)
198 |     R_cam2rect[:3, :3] = cam2cam["R_rect_00"].reshape(3, 3)
199 |     P_rect = cam2cam["P_rect_0" + str(cam)].reshape(3, 4)
200 |     P_velo2im = np.dot(np.dot(P_rect, R_cam2rect), velo2cam)
201 | 
202 |     # load velodyne points and remove all behind image plane (approximation)
203 |     # each row of the velodyne data is forward, left, up, reflectance
204 |     velo = load_velodyne_points(velo_file_name)
205 |     velo = velo[velo[:, 0] >= 0, :]
206 | 
207 |     # project the points to the camera
208 |     velo_pts_im = np.dot(P_velo2im, velo.T).T
209 |     velo_pts_im[:, :2] = velo_pts_im[:, :2] / velo_pts_im[:, 2][..., np.newaxis]
210 | 
211 |     if vel_depth:
212 |         velo_pts_im[:, 2] = velo[:, 0]
213 | 
214 |     # check if in bounds
215 |     # use minus 1 to get the exact same value as KITTI matlab code
216 |     velo_pts_im[:, 0] = np.round(velo_pts_im[:, 0]) - 1
217 |     velo_pts_im[:, 1] = np.round(velo_pts_im[:, 1]) - 1
218 |     val_inds = (velo_pts_im[:, 0] >= 0) & (velo_pts_im[:, 1] >= 0)
219 |     val_inds = (
220 |         val_inds & (velo_pts_im[:, 0] < im_shape[1]) & (velo_pts_im[:, 1] < im_shape[0])
221 |     )
222 |     velo_pts_im = velo_pts_im[val_inds, :]
223 | 
224 |     # project to image
225 |     depth = np.zeros((im_shape))
226 |     depth[
227 |         velo_pts_im[:, 1].astype(np.int), velo_pts_im[:, 0].astype(np.int)
228 |     ] = velo_pts_im[:, 2]
229 | 
230 |     # find the duplicate points and choose the closest depth
231 |     inds = sub2ind(depth.shape, velo_pts_im[:, 1], velo_pts_im[:, 0])
232 |     dupe_inds = [item for item, count in Counter(inds).items() if count > 1]
233 |     for dd in dupe_inds:
234 |         pts = np.where(inds == dd)[0]
235 |         x_loc = int(velo_pts_im[pts[0], 0])
236 |         y_loc = int(velo_pts_im[pts[0], 1])
237 |         depth[y_loc, x_loc] = velo_pts_im[pts, 2].min()
238 |     depth[depth < 0] = 0
239 | 
240 |     if interp:
241 |         # interpolate the depth map to fill in holes
242 |         depth_interp = lin_interp(im_shape, velo_pts_im)
243 |         return depth, depth_interp
244 |     else:
245 |         return depth
246 | 
247 | 
248 | def load_priors(path, num_samples, split):
249 |     """ load semantic priors """
250 |     priors = []
251 |     for t_id in range(num_samples):
252 |         name = (
253 |             str(t_id).zfill(6) + "_10.png" if split == "kitti" else str(t_id) + ".png"
254 |         )
255 |         prior = cv2.imread(os.path.join(path, name), cv2.IMREAD_GRAYSCALE)
256 |         assert prior is not None, "{} not found".format(os.path.join(path, name))
257 |         priors.append(prior)
258 |     return priors
259 | 
260 | 
261 | def load_objects_mask(path, num_samples):
262 |     """ Load object mask from kitti dataset """
263 |     priors = []
264 |     for t_id in range(num_samples):
265 |         name = str(t_id).zfill(6) + "_10.png"
266 |         full_name = os.path.join(path, "training", "obj_map", name)
267 |         prior = cv2.imread(full_name, cv2.IMREAD_GRAYSCALE)
268 |         assert prior is not None, "{} not found".format(full_name)
269 |         priors.append(prior)
270 |     return priors
271 | 


--------------------------------------------------------------------------------
/helpers/flow_tool/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2019 LI RUOTENG
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.


--------------------------------------------------------------------------------
/helpers/flow_tool/README.md:
--------------------------------------------------------------------------------
1 | Code from https://github.com/liruoteng/OpticalFlowToolkit, licensed with MIT license


--------------------------------------------------------------------------------
/helpers/flow_tool/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/helpers/flow_tool/__init__.py


--------------------------------------------------------------------------------
/helpers/utilities.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
  2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | 
 17 | """ Utility functions
 18 | """
 19 | from collections import namedtuple
 20 | import numpy as np
 21 | import tensorflow as tf
 22 | import cv2
 23 | import os
 24 | import matplotlib
 25 | import matplotlib.cm
 26 | 
 27 | Label = namedtuple(
 28 |     "Label",
 29 |     [
 30 |         "name",  # The identifier of this label, e.g. 'car', 'person', ... .
 31 |         # We use them to uniquely name a class
 32 |         "id",  # An integer ID that is associated with this label.
 33 |         # The IDs are used to represent the label in ground truth images
 34 |         # An ID of -1 means that this label does not have an ID and thus
 35 |         # is ignored when creating ground truth images (e.g. license plate).
 36 |         "trainId",  # An integer ID that overwrites the ID above, when creating ground truth
 37 |         # images for training.
 38 |         # For training, multiple labels might have the same ID. Then, these labels
 39 |         # are mapped to the same class in the ground truth images. For the inverse
 40 |         # mapping, we use the label that is defined first in the list below.
 41 |         # For example, mapping all void-type classes to the same ID in training,
 42 |         # might make sense for some approaches.
 43 |         "category",  # The name of the category that this label belongs to
 44 |         "categoryId",  # The ID of this category. Used to create ground truth images
 45 |         # on category level.
 46 |         "hasInstances",  # Whether this label distinguishes between single instances or not
 47 |         "ignoreInEval",  # Whether pixels having this class as ground truth label are ignored
 48 |         # during evaluations or not
 49 |         "color",  # The color of this label
 50 |     ],
 51 | )
 52 | 
 53 | labels_all = [
 54 |     #       name                     id    trainId   category            catId     hasInstances   ignoreInEval   color
 55 |     Label("unlabeled", 0, 255, "void", 0, False, True, (0, 0, 0)),
 56 |     Label("ego vehicle", 1, 255, "void", 0, False, True, (0, 0, 0)),
 57 |     Label("rectification border", 2, 255, "void", 0, False, True, (0, 0, 0)),
 58 |     Label("out of roi", 3, 255, "void", 0, False, True, (0, 0, 0)),
 59 |     Label("static", 4, 255, "void", 0, False, True, (0, 0, 0)),
 60 |     Label("dynamic", 5, 255, "void", 0, False, True, (111, 74, 0)),
 61 |     Label("ground", 6, 255, "void", 0, False, True, (81, 0, 81)),
 62 |     Label("road", 7, 0, "flat", 1, False, False, (128, 64, 128)),
 63 |     Label("sidewalk", 8, 1, "flat", 1, False, False, (244, 35, 232)),
 64 |     Label("parking", 9, 255, "flat", 1, False, True, (250, 170, 160)),
 65 |     Label("rail track", 10, 255, "flat", 1, False, True, (230, 150, 140)),
 66 |     Label("building", 11, 2, "construction", 2, False, False, (70, 70, 70)),
 67 |     Label("wall", 12, 3, "construction", 2, False, False, (102, 102, 156)),
 68 |     Label("fence", 13, 4, "construction", 2, False, False, (190, 153, 153)),
 69 |     Label("guard rail", 14, 255, "construction", 2, False, True, (180, 165, 180)),
 70 |     Label("bridge", 15, 255, "construction", 2, False, True, (150, 100, 100)),
 71 |     Label("tunnel", 16, 255, "construction", 2, False, True, (150, 120, 90)),
 72 |     Label("pole", 17, 5, "object", 3, False, False, (153, 153, 153)),
 73 |     Label("polegroup", 18, 255, "object", 3, False, True, (153, 153, 153)),
 74 |     Label("traffic light", 19, 6, "object", 3, False, False, (250, 170, 30)),
 75 |     Label("traffic sign", 20, 7, "object", 3, False, False, (220, 220, 0)),
 76 |     Label("vegetation", 21, 8, "nature", 4, False, False, (107, 142, 35)),
 77 |     Label("terrain", 22, 9, "nature", 4, False, False, (152, 251, 152)),
 78 |     Label("sky", 23, 10, "sky", 5, False, False, (70, 130, 180)),
 79 |     Label("person", 24, 11, "human", 6, True, False, (220, 20, 60)),
 80 |     Label("rider", 25, 12, "human", 6, True, False, (255, 0, 0)),
 81 |     Label("car", 26, 13, "vehicle", 7, True, False, (0, 0, 142)),
 82 |     Label("truck", 27, 14, "vehicle", 7, True, False, (0, 0, 70)),
 83 |     Label("bus", 28, 15, "vehicle", 7, True, False, (0, 60, 100)),
 84 |     Label("caravan", 29, 255, "vehicle", 7, True, True, (0, 0, 90)),
 85 |     Label("trailer", 30, 255, "vehicle", 7, True, True, (0, 0, 110)),
 86 |     Label("train", 31, 16, "vehicle", 7, True, False, (0, 80, 100)),
 87 |     Label("motorcycle", 32, 17, "vehicle", 7, True, False, (0, 0, 230)),
 88 |     Label("bicycle", 33, 18, "vehicle", 7, True, False, (119, 11, 32)),
 89 | ]
 90 | 
 91 | labels_train = [
 92 |     #       name                     id    trainId   category            catId     hasInstances   ignoreInEval   color
 93 |     Label("road", 0, 0, "flat", 1, False, False, (128, 64, 128)),
 94 |     Label("sidewalk", 1, 1, "flat", 1, False, False, (244, 35, 232)),
 95 |     Label("building", 2, 2, "construction", 2, False, False, (70, 70, 70)),
 96 |     Label("wall", 3, 3, "construction", 2, False, False, (102, 102, 156)),
 97 |     Label("fence", 4, 4, "construction", 2, False, False, (190, 153, 153)),
 98 |     Label("pole", 5, 5, "object", 3, False, False, (153, 153, 153)),
 99 |     Label("traffic light", 6, 6, "object", 3, False, False, (250, 170, 30)),
100 |     Label("traffic sign", 7, 7, "object", 3, False, False, (220, 220, 0)),
101 |     Label("vegetation", 8, 8, "nature", 4, False, False, (107, 142, 35)),
102 |     Label("terrain", 9, 9, "nature", 4, False, False, (152, 251, 152)),
103 |     Label("sky", 10, 10, "sky", 5, False, False, (70, 130, 180)),
104 |     Label("person", 11, 11, "human", 6, True, False, (220, 20, 60)),
105 |     Label("rider", 12, 12, "human", 6, True, False, (255, 0, 0)),
106 |     Label("car", 13, 13, "vehicle", 7, True, False, (0, 0, 142)),
107 |     Label("truck", 14, 14, "vehicle", 7, True, False, (0, 0, 70)),
108 |     Label("bus", 15, 15, "vehicle", 7, True, False, (0, 60, 100)),
109 |     Label("train", 16, 16, "vehicle", 7, True, False, (0, 80, 100)),
110 |     Label("motorcycle", 17, 17, "vehicle", 7, True, False, (0, 0, 230)),
111 |     Label("bicycle", 18, 18, "vehicle", 7, True, False, (119, 11, 32)),
112 | ]
113 | 
114 | 
115 | labels_static_dynamic_trainIds = [
116 |     #       name                     id    trainId   category            catId     hasInstances   ignoreInEval   color
117 |     Label("road", 0, 0, "flat", 1, False, False, (128, 64, 128)),
118 |     Label("sidewalk", 1, 0, "flat", 1, False, False, (244, 35, 232)),
119 |     Label("building", 2, 0, "construction", 2, False, False, (70, 70, 70)),
120 |     Label("wall", 3, 0, "construction", 2, False, False, (102, 102, 156)),
121 |     Label("fence", 4, 0, "construction", 2, False, False, (190, 153, 153)),
122 |     Label("pole", 5, 0, "object", 3, False, False, (153, 153, 153)),
123 |     Label("traffic light", 6, 0, "object", 3, False, False, (250, 170, 30)),
124 |     Label("traffic sign", 7, 0, "object", 3, False, False, (220, 220, 0)),
125 |     Label("vegetation", 8, 0, "nature", 4, False, False, (107, 142, 35)),
126 |     Label("terrain", 9, 0, "nature", 4, False, False, (152, 251, 152)),
127 |     Label("sky", 10, 0, "sky", 5, False, False, (70, 130, 180)),
128 |     Label("person", 11, 1, "human", 6, True, False, (220, 20, 60)),
129 |     Label("rider", 12, 1, "human", 6, True, False, (255, 0, 0)),
130 |     Label("car", 13, 1, "vehicle", 7, True, False, (0, 0, 142)),
131 |     Label("truck", 14, 1, "vehicle", 7, True, False, (0, 0, 70)),
132 |     Label("bus", 15, 1, "vehicle", 7, True, False, (0, 60, 100)),
133 |     Label("train", 16, 1, "vehicle", 7, True, False, (0, 80, 100)),
134 |     Label("motorcycle", 17, 1, "vehicle", 7, True, False, (0, 0, 230)),
135 |     Label("bicycle", 18, 1, "vehicle", 7, True, False, (119, 11, 32)),
136 | ]
137 | 
138 | labels = labels_train
139 | id2Color = {label.id: label.color for label in labels}
140 | id2trainId = {label.id: label.trainId for label in labels_all}
141 | id2name = {label.id: label.name for label in labels}
142 | 
143 | labels2priors = np.array(
144 |     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
145 | )  # labels_static_dynamic_trainIds.trainId
146 | 
147 | 
148 | def extract_semantic_priors(predictions):
149 |     """ Extract priors from a semantic map
150 |         Return a new map, with the same shape of the input, with 1 for possibly moving
151 |         objects and 0 otherwise.
152 |         Params:
153 |             predictions: BxHxWx1
154 |         Returns:
155 |             priors: BxHxWx1
156 |     """
157 |     priors = []
158 |     b, h, w, _ = predictions.shape
159 |     for i in range(b):
160 |         p = tf.py_func(label_to_priors, [predictions[i]], tf.uint8)
161 |         p = tf.cast(p, tf.float32)
162 |         priors.append(p)
163 |     priors = tf.stack(priors, axis=0)
164 |     priors.set_shape(predictions.get_shape())
165 |     return priors
166 | 
167 | 
168 | def label_to_priors(predictions):
169 |     predictions = predictions.astype(np.uint8)
170 |     predictions = predictions.squeeze()
171 |     priors = labels2priors[predictions]
172 |     priors = np.expand_dims(priors, -1)
173 |     return priors.astype(np.uint8)
174 | 
175 | 
176 | def colormap_semantic(pred_sem, dict_id2color=id2Color):
177 |     p = tf.squeeze(tf.cast(pred_sem, tf.uint8), axis=-1)
178 |     p = tf.stack([p, p, p], axis=-1)
179 |     m = tf.zeros_like(p)
180 |     for i in range(0, len(dict_id2color)):
181 |         mi = tf.multiply(tf.ones_like(p), dict_id2color[i])
182 |         m = tf.where(tf.equal(p, i), mi, m)
183 |     return m
184 | 
185 | 
186 | def get_num_classes():
187 |     return len(labels)
188 | 
189 | 
190 | def colorize(value, vmin=None, vmax=None, cmap=None):
191 |     """
192 |     A utility function for TensorFlow that maps a grayscale image to a matplotlib
193 |     colormap for use with TensorBoard image summaries.
194 |     By default it will normalize the input value to the range 0..1 before mapping
195 |     to a grayscale colormap.
196 |     Arguments:
197 |       - value: 2D Tensor of shape [height, width] or 3D Tensor of shape
198 |         [height, width, 1].
199 |       - vmin: the minimum value of the range used for normalization.
200 |         (Default: value minimum)
201 |       - vmax: the maximum value of the range used for normalization.
202 |         (Default: value maximum)
203 |       - cmap: a valid cmap named for use with matplotlib's `get_cmap`.
204 |         (Default: 'gray')
205 |     Example usage:
206 |     ```
207 |     output = tf.random_uniform(shape=[256, 256, 1])
208 |     output_color = colorize(output, vmin=0.0, vmax=1.0, cmap='viridis')
209 |     tf.summary.image('output', output_color)
210 |     ```
211 |     
212 |     Returns a 3D tensor of shape [height, width, 3].
213 |     """
214 | 
215 |     # normalize
216 |     vmin = tf.reduce_min(value) if vmin is None else vmin
217 |     vmax = tf.reduce_max(value) if vmax is None else vmax
218 |     value = (value - vmin) / (vmax - vmin)  # vmin..vmax
219 | 
220 |     # squeeze last dim if it exists
221 |     value = tf.squeeze(value)
222 | 
223 |     # quantize
224 |     indices = tf.to_int32(tf.round(value * 255))
225 | 
226 |     # gather
227 |     cm = matplotlib.cm.get_cmap(cmap if cmap is not None else "gray")
228 |     colors = tf.constant(cm.colors, dtype=tf.float32)
229 |     value = tf.gather(colors, indices)
230 | 
231 |     return value
232 | 
233 | 
234 | def count_text_lines(file_path):
235 |     f = open(file_path, "r")
236 |     lines = f.readlines()
237 |     f.close()
238 |     return len(lines)
239 | 
240 | 
241 | def flow_to_color(flow, mask=None, max_flow=None):
242 |     """
243 |     From Unflow by Meister et al
244 |     https://arxiv.org/pdf/1711.07837.pdf
245 |     https://github.com/simonmeister/UnFlow
246 |     
247 |     Converts flow to 3-channel color image.
248 |     Args:
249 |         flow: tensor of shape [num_batch, height, width, 2].
250 |         mask: flow validity mask of shape [num_batch, height, width, 1].
251 |     """
252 |     n = 8
253 |     num_batch, height, width, _ = tf.unstack(tf.shape(flow))
254 |     mask = tf.ones([num_batch, height, width, 1]) if mask is None else mask
255 |     flow_u, flow_v = tf.unstack(flow, axis=3)
256 |     if max_flow is not None:
257 |         max_flow = tf.maximum(max_flow, 1)
258 |     else:
259 |         max_flow = tf.reduce_max(tf.abs(flow * mask))
260 |     mag = tf.sqrt(tf.reduce_sum(tf.square(flow), 3))
261 |     angle = tf.atan2(flow_v, flow_u)
262 | 
263 |     im_h = tf.mod(angle / (2 * np.pi) + 1.0, 1.0)
264 |     im_s = tf.clip_by_value(mag * n / max_flow, 0, 1)
265 |     im_v = tf.clip_by_value(n - im_s, 0, 1)
266 |     im_hsv = tf.stack([im_h, im_s, im_v], 3)
267 |     im = tf.image.hsv_to_rgb(im_hsv)
268 |     return im * mask
269 | 
270 | 
271 | def tf_color_prior(prior):
272 |     mapping = {0: (0, 0, 255), 1: (0, 255, 0)}
273 |     return colormap_semantic(prior, mapping)
274 | 
275 | 
276 | def get_height_width(img):
277 |     s = tf.shape(img)
278 |     h = tf.to_int32(s[1])
279 |     w = tf.to_int32(s[2])
280 |     return h, w
281 | 
282 | 
283 | def get_priors_or_default(priors, img, params, mode):
284 |     return (
285 |         priors
286 |         if (params.use_priors and mode == "semantic")
287 |         else tf.zeros_like(img[:, :, :, 0:1])
288 |     )
289 | 
290 | 
291 | def create_dir(dirname):
292 |     """Create a directory if not exists
293 |         :param dirname: path of the directory to create
294 |     """
295 |     if not os.path.exists(dirname):
296 |         os.makedirs(dirname)
297 | 
298 | 
299 | def mask(img, mask, active):
300 |     with tf.variable_scope("mask"):
301 |         if active:
302 |             return img * mask
303 |         return img
304 | 
305 | 
306 | def flow_resize(flow, out_size, is_scale=True, method=0):
307 |     """
308 |         method: 0 mean bilinear, 1 means nearest
309 |     """
310 |     flow_size = tf.to_float(tf.shape(flow)[-3:-1])
311 |     b, _, _, c = flow.get_shape().as_list()
312 |     flow = tf.image.resize_images(flow, out_size, method=method, align_corners=True)
313 |     if is_scale:
314 |         scale = tf.to_float(out_size) / flow_size
315 |         scale = tf.stack([scale[1], scale[0]])
316 |         flow = tf.multiply(flow, scale)
317 |     return flow
318 | 
319 | 
320 | def color_semantic(semantic_map, mapping=None):
321 |     """Color a semantic map in numpy
322 |         :param x: input semantic map
323 |         :param mapping: optional color scheme. If not set, a default
324 |             color scheme will be applied
325 |         :return colored: colored semantic map
326 |     """
327 |     if mapping is None:
328 |         mapping = [
329 |             (128, 64, 128),
330 |             (244, 35, 232),
331 |             (70, 70, 70),
332 |             (102, 102, 156),
333 |             (190, 153, 153),
334 |             (153, 153, 153),
335 |             (250, 170, 30),
336 |             (220, 220, 0),
337 |             (107, 142, 35),
338 |             (152, 251, 152),
339 |             (70, 130, 180),
340 |             (220, 20, 60),
341 |             (255, 0, 0),
342 |             (0, 0, 142),
343 |             (0, 0, 70),
344 |             (0, 60, 100),
345 |             (0, 80, 100),
346 |             (0, 0, 230),
347 |             (119, 11, 32),
348 |         ]
349 |     h, w = semantic_map.shape[:2]
350 |     colored = np.ones([h, w, 3], np.uint8)
351 |     for x in range(len(mapping)):
352 | 
353 |         color = np.ones_like(colored) * mapping[x]
354 |         current_sem = np.stack((semantic_map, semantic_map, semantic_map), axis=-1)
355 |         index = np.ones_like(current_sem) * x
356 |         colored = np.where(current_sem == index, color, colored)
357 |     return colored
358 | 
359 | 
360 | def check_model_exists(ckpt):
361 |     """Check if model exists
362 |         :param ckpt: path to checkpoint
363 |         :return exist: flag. True if model exists
364 |     """
365 |     expected_data = ckpt + ".data-00000-of-00001"
366 |     return os.path.exists(expected_data)
367 | 
368 | 
369 | def write_kitti_png_flow(dest, flow_data, mask_data=None):
370 |     """Save optical flow in KITTI format, ie 16 bit png image"
371 |         :param dest: where image will be saved
372 |         :param flow_data: optical flow field. Array with shape (H,W,2)
373 |         :param mask_data: optional mask
374 |     """
375 |     flow_img = np.zeros((flow_data.shape[0], flow_data.shape[1], 3), dtype=np.uint16)
376 |     flow_img[:, :, 2] = flow_data[:, :, 0] * 64.0 + 2 ** 15
377 |     flow_img[:, :, 1] = flow_data[:, :, 1] * 64.0 + 2 ** 15
378 |     if mask_data is None:
379 |         mask_data = np.ones_like(flow_img[:, :, 2])
380 |     flow_img[:, :, 0] = mask_data[:, :]
381 |     cv2.imwrite(dest, flow_img)
382 | 
383 | 
384 | def color_motion_mask(mask, color=None):
385 |     """Apply a color scheme to a motion mask
386 |         :param mask: input motion mask
387 |         :param color: RGB tuple, color applied to moving objects. Default (220, 20, 60)
388 |         :return final_mask: colored mask, as np.uint8
389 |     """
390 |     if color is None:
391 |         color = (220, 20, 60)
392 |     h, w = mask.shape
393 |     ext_mask = np.stack([mask, mask, mask], -1).astype(np.uint8)
394 |     color = np.ones_like(ext_mask) * color
395 |     index = np.ones_like(ext_mask) * 1.0
396 |     final_mask = np.where(ext_mask == index, color, ext_mask).astype(np.uint8)
397 |     return final_mask
398 | 


--------------------------------------------------------------------------------
/networks/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/networks/__init__.py


--------------------------------------------------------------------------------
/networks/baseline.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
  2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | 
 17 | """
 18 | Baseline network
 19 | We learn to predict depth, pose of the camera, intrinsics and the semantic
 20 | """
 21 | 
 22 | import tensorflow as tf
 23 | import os
 24 | from networks.ops import *
 25 | from networks.general_network import GeneralNetwork
 26 | from networks.network_components import *
 27 | 
 28 | 
 29 | class BaselineNet(GeneralNetwork):
 30 |     """Baseline network, w/o OFNet and SD-OFNet.
 31 |         It contains DSNet and CameraNet
 32 |     """
 33 | 
 34 |     def __init__(self, batch, is_training, params):
 35 |         """BaselineNet constructor:
 36 |             :param batch: input of the network. Dictionary
 37 |             :param is_training: training flag. For batchnorm
 38 |             :params: network settings
 39 |         """
 40 |         super(BaselineNet, self).__init__(batch, is_training, params)
 41 |         self.name = "MultiViewNetwork"
 42 |         self.depth_tgt = None
 43 |         self.disp_tgt = None
 44 |         self.semantic_tgt = None
 45 | 
 46 |     def get_features(self, src_img_1, tgt_img, src_img_2, is_training, scope):
 47 |         """Extract features from images
 48 |             :param src_img_1: tensor with src1 image, (B,H,W,3)
 49 |             :param tgt_img: tensor with tgt image, (B,H,W,3)
 50 |             :param src_img_2: tensor with src1 image, (B,H,W,3)
 51 |             :param is_training: training flag. For batchnorm
 52 |             :param scope: name used in the feature extractor
 53 |             :return features: list of extracted features
 54 |         """
 55 |         return feature_extractor(src_img_1, tgt_img, src_img_2, is_training, scope)
 56 | 
 57 |     def get_DSNet(self, features, classes, is_training):
 58 |         """Build DSNet, in charge of depth and semantic estimation
 59 |             :return DSNet: DSNet network
 60 |         """
 61 |         return DSNet(features, classes, is_training)
 62 | 
 63 |     def get_CameraNet(self, src_img_1, tgt_img, src_img_2, is_training, scope="pose"):
 64 |         """Build CameraNet, in charge of pose and intrinsic estimation
 65 |             :return CameraNet: CameraNet network
 66 |         """
 67 |         features = self.get_features(src_img_1, tgt_img, src_img_2, is_training, scope)
 68 |         return CameraNet(features, self.is_training)
 69 | 
 70 |     def disp_normalize(self, disp):
 71 |         """Apply spatial normalizer defined in 
 72 |             :param disp: disparity (inverse depth)
 73 |             :return normalized_disp: tensor with same shape of disp
 74 |         """
 75 |         with tf.variable_scope("disp_normalize"):
 76 |             return spatial_normalize(disp)
 77 | 
 78 |     def disp2depth(self, disp):
 79 |         """Turn disparity into depth
 80 |             :param disp: disparity (inverse depth)
 81 |             :return depth: tensor with same shape of disp
 82 |         """
 83 |         with tf.variable_scope("disp2depth"):
 84 |             return 1.0 / disp
 85 | 
 86 |     def get_rigid_flow(self, depth, pose, intrinsics, pose_index, reversed_pose):
 87 |         """
 88 |             Get rigid flow using depth and pose projection
 89 |             :param depth: depth estimated by DSNet. Tensor with shape (B,H,W)
 90 |             :param pose: pose estimated by CameraNet. Tensor with shape (1,2,6)
 91 |             :param pose_index: index of pose to use
 92 |             :param reversed_pose: if True, use reversed pose
 93 |             :return rigid flow: BxHxWx2 rigid optical flow
 94 |             :raise ValueError: if pose_index is not in [0,1]
 95 |         """
 96 |         with tf.variable_scope("get_rigid_flow"):
 97 |             if pose_index not in [0, 1]:
 98 |                 raise ValueError("pose index must be in [0,1]")
 99 |             rigid_flow = compute_rigid_flow(
100 |                 depth, pose[:, pose_index, :], intrinsics[:, 0, :, :], reversed_pose
101 |             )
102 |             return rigid_flow
103 | 
104 |     def prepare_depth(self, disp):
105 |         """
106 |             Turn disp into depth
107 |             :param disp: tensor with disparity estimations
108 |         """
109 |         with tf.variable_scope("prepare_depth"):
110 |             normalized = tf.image.resize_bilinear(
111 |                 self.disp_normalize(disp), [self.h, self.w]
112 |             )
113 |             depth = self.disp2depth(normalized)
114 |             depth.set_shape([None, self.params.height, self.params.width, 1])
115 |             depth = tf.squeeze(depth, axis=3)
116 |             return depth
117 | 
118 |     def prepare_disp(self, disp):
119 |         """ First, normalization is applied to disp, then the result is
120 |             upsampled to (self.params.height, self.params.width).
121 |             :param disp: tensor with shape (B,H,W)
122 |             :return upsampled_normalized_disp: tensor with shape (B, self.params.height, self.params.width)
123 |         """
124 |         with tf.variable_scope("prepare_disp"):
125 |             disp = tf.image.resize_bilinear(self.disp_normalize(disp), [self.h, self.w])
126 |             disp.set_shape([None, self.params.height, self.params.width, 1])
127 |             return disp
128 | 
129 |     def upsample_semantic(self, semantic):
130 |         """Upsample semantic to [self.params.height,self.params.width]
131 |             :param semantic: tensor with logits or semantic labels
132 |         """
133 |         with tf.variable_scope("upsample_semantic"):
134 |             semantic = tf.image.resize_images(
135 |                 semantic, [self.params.height, self.params.width]
136 |             )
137 |             return semantic
138 | 
139 |     def build_network(self):
140 |         """Build baseline network,
141 |             composed of DSNet and CameraNet
142 |         """
143 |         with tf.variable_scope(self.name):
144 | 
145 |             self.features = self.get_features(
146 |                 self.src_img_1,
147 |                 self.tgt_img,
148 |                 self.src_img_2,
149 |                 self.is_training,
150 |                 scope=None,
151 |             )
152 |             self.pred_disp_tgt, self.pred_semantic_logits_tgt = self.get_DSNet(
153 |                 self.features[1], self.classes, self.is_training
154 |             )
155 |             print(" [*] Building DSNet: SUCCESS")
156 | 
157 |             self.pose, self.intrinsics = self.get_CameraNet(
158 |                 self.src_img_1, self.tgt_img, self.src_img_2, self.is_training
159 |             )
160 |             print(" [*] Building CameraNet: SUCCESS")
161 | 
162 |     def build_outputs(self):
163 |         """ Output generated by the network.
164 |             Attributes semantic_tgt, depth_tgt and disp_tgt are updated
165 |         """
166 |         with tf.variable_scope("build_baseline_outputs"):
167 |             self.semantic_tgt = self.upsample_semantic(self.pred_semantic_logits_tgt)
168 |             self.depth_tgt = self.prepare_depth(self.pred_disp_tgt[0])
169 |             self.disp_tgt = self.prepare_disp(self.pred_disp_tgt[0])
170 | 
171 |     def get_network_params(self):
172 |         """Get network variables to load.
173 |             This function is valid only in the case test, since
174 |             no Adam state is loaded and training from scratch
175 |             is not supported.
176 |             Note that also Batchnorm params are loaded
177 |             """
178 |         with tf.variable_scope("get_network_params"):
179 |             var = [x for x in tf.trainable_variables() if self.name in x.name]
180 |             batch_norm_variables = [
181 |                 x
182 |                 for x in tf.all_variables()
183 |                 if "moving_mean" in x.name or "moving_variance" in x.name
184 |             ]
185 |             var += batch_norm_variables
186 |             return var
187 | 


--------------------------------------------------------------------------------
/networks/complete_network.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
  2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | 
 17 | """
 18 | Complete OmegaNet
 19 | """
 20 | import tensorflow as tf
 21 | import os
 22 | 
 23 | from networks.general_network import GeneralNetwork
 24 | from networks.baseline import BaselineNet
 25 | from helpers import bilinear_sampler
 26 | from networks.selflow.selflow_network import flownet
 27 | from helpers.utilities import extract_semantic_priors
 28 | 
 29 | 
 30 | class OmegaNet(GeneralNetwork):
 31 |     """OmegaNet. It contains DSNet, CameraNet and SD-OFNet
 32 |     """
 33 | 
 34 |     def __init__(self, batch, is_training, params):
 35 |         """OmegaNet constructor:
 36 |             :param batch: input of the network. Dictionary
 37 |             :param is_training: training flag. For batchnorm
 38 |             :params: network settings
 39 |         """
 40 |         super(OmegaNet, self).__init__(batch, is_training, params)
 41 |         self.name = "OmegaNet"
 42 |         self.disp = None
 43 |         self.optical_flow = None
 44 |         self.semantic_logits = None
 45 |         self.motion_mask = None
 46 | 
 47 |     def build_network(self):
 48 |         """Build OmegaNet: first, DSNet and CameraNet are instantiated,
 49 |             then SD-OFNet
 50 |         """
 51 |         self.baselineNet = BaselineNet(self.batch, self.is_training, self.params)
 52 |         self.baselineNet.build_network()
 53 |         self.baselineNet.build_outputs()
 54 | 
 55 |         # prepare semantic stuff
 56 |         self.semantic_logits = self.baselineNet.pred_semantic_logits_tgt
 57 |         self.__semantic = self.prepare_semantic(self.semantic_logits)
 58 |         self.__priors = extract_semantic_priors(self.__semantic)
 59 |         self.__dynamic_tgt_mask, self.__static_tgt_mask = self.build_semantic_masks()
 60 | 
 61 |         # get rigid flow using depth and pose
 62 |         self.__sflow_src2_tgt = self.baselineNet.get_rigid_flow(
 63 |             self.baselineNet.depth_tgt,
 64 |             self.baselineNet.pose,
 65 |             self.baselineNet.intrinsics,
 66 |             pose_index=1,
 67 |             reversed_pose=False,
 68 |         )
 69 | 
 70 |         # self-distilled optical flow network
 71 |         load_flow = not self.params.load_only_baseline
 72 |         self.__optical_flow_src2_tgt, _ = flownet(
 73 |             self.tgt_img.shape,
 74 |             self.src_img_1,
 75 |             self.tgt_img,
 76 |             self.src_img_2,
 77 |             train=False,
 78 |             trainable=load_flow,
 79 |             reuse=tf.AUTO_REUSE,
 80 |             regularizer=None,
 81 |             is_scale=True,
 82 |             scope="superflow",
 83 |         )
 84 | 
 85 |     def prepare_final_motion_mask(self):
 86 |         """
 87 |             :return final_motion_mask: motion binary mask. 1 if pixel is moving
 88 |         """
 89 |         moving_src2_tgt = self.build_moving_probability_mask(
 90 |             self.__optical_flow_src2_tgt, self.__sflow_src2_tgt
 91 |         )
 92 |         final_motion_mask = self.__dynamic_tgt_mask * tf.where(
 93 |             moving_src2_tgt > self.params.tau,
 94 |             tf.ones_like(moving_src2_tgt),
 95 |             tf.zeros_like(moving_src2_tgt),
 96 |         )
 97 |         return final_motion_mask
 98 | 
 99 |     def prepare_semantic(self, logits, height=None, width=None):
100 |         """Extract semantic map from logits.
101 |             :param logits: semantic logits
102 |             :param height: height of image. Optional (default is params.height)
103 |             :param width: width of image. Optional (default is params.width)
104 |         """
105 |         with tf.variable_scope("prepare_semantic"):
106 |             if height is None:
107 |                 height = self.params.height
108 |             if width is None:
109 |                 width = self.params.width
110 |             logits = tf.image.resize_images(logits, [height, width])
111 |             semantic = tf.argmax(logits, axis=-1)
112 |             semantic = tf.expand_dims(semantic, -1)
113 |             semantic = tf.cast(semantic, tf.float32)
114 |             return semantic
115 | 
116 |     def build_outputs(self):
117 |         """Build outputs of the network
118 |         """
119 |         with tf.variable_scope("build_outputs"):
120 | 
121 |             self.optical_flow = self.__optical_flow_src2_tgt
122 |             self.disp = self.baselineNet.disp_tgt
123 |             self.semantic = self.__semantic
124 |             self.motion_mask = self.prepare_final_motion_mask()
125 | 
126 |     def tf_cosine_distance(self, a, b):
127 |         """Measure cosine distance between a and b
128 |             :param a: tensor
129 |             :param b: tensor
130 |             :return cosine similarity
131 |         """
132 |         normalize_a = tf.nn.l2_normalize(a, -1)
133 |         normalize_b = tf.nn.l2_normalize(b, -1)
134 |         cos_similarity = tf.reduce_sum(
135 |             tf.multiply(normalize_a, normalize_b), axis=-1, keep_dims=True
136 |         )
137 |         return (1.0 - cos_similarity) / 2.0
138 | 
139 |     def get_occlusion_mask_from_rigid_flow(self, rigid_flow):
140 |         """Prepare occlusion mask due to rigid motion
141 |             :param rigid_flow: Tensor with rigid flow
142 |             :return mask: mask of occlusions due to rigid camera motion
143 |         """
144 |         with tf.variable_scope("get_occlusion_mask_from_rigid_flow"):
145 |             b, h, w, _ = rigid_flow.shape
146 |             rigid_flow = tf.stop_gradient(rigid_flow)
147 |             mask = bilinear_sampler.flow_warp(
148 |                 tf.ones([b, h, w, 1], dtype=tf.float32), rigid_flow
149 |             )
150 |             mask = tf.clip_by_value(mask, 0.0, 1.0)
151 |             return mask
152 | 
153 |     def build_moving_probability_mask(self, optical_flow, rigid_flow):
154 |         """
155 |             Masks of moving objects
156 |             If the object is moving, this value should be low.
157 |         """
158 |         with tf.variable_scope("build_moving_probability_mask"):
159 |             epsylon = 1e-7
160 |             optical_flow = tf.stop_gradient(optical_flow)
161 |             rigid_flow = tf.stop_gradient(rigid_flow)
162 |             normalized_optical_flow = tf.norm(
163 |                 optical_flow, axis=-1, keep_dims=True, name="optical_flow_norm"
164 |             )
165 |             normalized_rigid_flow = tf.norm(
166 |                 rigid_flow, axis=-1, keep_dims=True, name="rigid_flow_norm"
167 |             )
168 |             cosine_distance = self.tf_cosine_distance(optical_flow, rigid_flow)
169 |             ratio = (
170 |                 epsylon + tf.minimum(normalized_optical_flow, normalized_rigid_flow)
171 |             ) / (epsylon + tf.maximum(normalized_optical_flow, normalized_rigid_flow))
172 |             ratio_distance = 1.0 - ratio
173 |             moving_probability = tf.maximum(cosine_distance, ratio_distance)
174 |             return moving_probability
175 | 
176 |     def get_network_params(self):
177 |         """Load network params.
178 |             In particular, OmegaNet relies on DSNet, Camnet and self-distilled OFNet
179 |         """
180 |         with tf.variable_scope("get_network_params"):
181 |             baseline_vars = self.baselineNet.get_network_params()
182 |             reflownet_vars = [
183 |                 x for x in tf.trainable_variables() if "superflow" in x.name
184 |             ]
185 |             return baseline_vars + reflownet_vars
186 | 
187 |     def build_semantic_masks(self):
188 |         """
189 |            Prepare masks based on semantic priors
190 |            :return dynamic_tgt_mask: mask of potentially dinamyc objects
191 |            :return static_tgt_mask: mask of potentially static objects
192 |         """
193 |         with tf.variable_scope("build_semantic_masks"):
194 |             dynamic_tgt_mask = self.__priors
195 |             static_tgt_mask = 1.0 - dynamic_tgt_mask
196 |             return dynamic_tgt_mask, static_tgt_mask
197 | 


--------------------------------------------------------------------------------
/networks/general_network.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """
17 | General network, superclass for other networks
18 | """
19 | 
20 | from abc import ABCMeta, abstractmethod
21 | import tensorflow as tf
22 | from helpers.utilities import get_num_classes, get_height_width, colormap_semantic
23 | from collections import namedtuple
24 | 
25 | network_parameters = namedtuple(
26 |     "network_parameters", "height, width, load_only_baseline, tau",
27 | )
28 | 
29 | 
30 | class GeneralNetwork(object):
31 |     """Template for other networks
32 |     """
33 | 
34 |     __metaclass__ = ABCMeta
35 | 
36 |     def __init__(self, batch, is_training, params):
37 |         """ Prepare the network and create the graph"""
38 |         self.is_training = is_training
39 |         self.classes = get_num_classes()
40 |         self.params = params
41 |         self.src_img_1 = batch["src_img_1"]
42 |         self.tgt_img = batch["tgt_img"]
43 |         self.src_img_2 = batch["src_img_2"]
44 |         self.h, self.w = get_height_width(self.tgt_img)
45 |         self.batch = batch
46 | 
47 |     def build(self):
48 |         """ Build the model and the outputs """
49 |         self.build_network()
50 |         self.build_outputs()
51 | 
52 |     @abstractmethod
53 |     def build_network(self):
54 |         """ Network specification"""
55 | 
56 |     @abstractmethod
57 |     def build_outputs(self):
58 |         """ Output generated by the network. """
59 | 
60 |     def build_masks(self):
61 |         """ Build masks used in the stage """
62 | 


--------------------------------------------------------------------------------
/networks/network_components.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
  2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | from networks.ops import *
 17 | from helpers.bilinear_sampler import *
 18 | 
 19 | NUM_FEATURES = 16
 20 | FLOW_SCALING = 0.1
 21 | DISP_SCALING = 10.0
 22 | MIN_DISP = 0.01
 23 | POSE_SCALING = 0.01
 24 | 
 25 | 
 26 | def feature_extractor(src_img_1, tgt_img, src_img_2, is_training, name=None):
 27 |     """Features extractor
 28 |         :param src_img_1: image at time t-1. Tensor with shape [1,H,W,3], dtype=tf.float32
 29 |         :param tgt_img: image at time t. Tensor with shape [1,H,W,3], dtype=tf.float32
 30 |         :param src_img_2: image at time t+1. Tensor with shape [1,H,W,3], dtype=tf.float32
 31 |         :param is_training: training flag. For batchnorm
 32 |         :param name: name of the extractor. If name is not None, the name will be feature_extractor_NAME
 33 |     """
 34 |     batch_norm_params = {"is_training": is_training}
 35 |     final_name = "feature_extractor"
 36 |     if name is not None:
 37 |         final_name = "{}_{}".format(final_name, name)
 38 |     with tf.variable_scope(final_name):
 39 |         pyramid_src_img_1 = build_pyramid(
 40 |             src_img_1, normalizer_params=batch_norm_params
 41 |         )
 42 |         pyramid_tgt_img = build_pyramid(tgt_img, normalizer_params=batch_norm_params)
 43 |         pyramid_src_img_2 = build_pyramid(
 44 |             src_img_2, normalizer_params=batch_norm_params
 45 |         )
 46 |     return pyramid_src_img_1, pyramid_tgt_img, pyramid_src_img_2
 47 | 
 48 | 
 49 | def CameraNet(features, is_training):
 50 |     """CameraNet
 51 |         It estimates both the pose and camera intrinsics.
 52 |         :param features: list of features from [src1, tgt, src2]
 53 |         :param is_training: training flag. For batchnorm
 54 | 
 55 |         :return pose_final: tensor with shape (1, 2, 6)
 56 |         :return intrinsics_mat: tensor with shape (1, 1, 3, 3)
 57 |     """
 58 |     with tf.variable_scope("pose_net"):
 59 |         batch_norm_params = {"is_training": is_training}
 60 | 
 61 |         pyramid_src_img_1 = features[0]
 62 |         pyramid_tgt_img = features[1]
 63 |         pyramid_src_img_2 = features[2]
 64 |         input_batch = tf.concat(
 65 |             [pyramid_src_img_1[4], pyramid_tgt_img[4], pyramid_src_img_2[4]], axis=3
 66 |         )
 67 | 
 68 |         with tf.variable_scope("conv1_a"):
 69 |             conv1_a = conv2d(
 70 |                 input_batch,
 71 |                 NUM_FEATURES * 8,
 72 |                 3,
 73 |                 1,
 74 |                 normalizer_params=batch_norm_params,
 75 |                 activation_fn=tf.nn.relu,
 76 |             )
 77 |         with tf.variable_scope("conv1_b"):
 78 |             conv1_b = conv2d(
 79 |                 conv1_a,
 80 |                 NUM_FEATURES * 8,
 81 |                 3,
 82 |                 2,
 83 |                 normalizer_params=batch_norm_params,
 84 |                 activation_fn=tf.nn.relu,
 85 |             )
 86 |         with tf.variable_scope("conv2_a"):
 87 |             conv2_a = conv2d(
 88 |                 conv1_b,
 89 |                 NUM_FEATURES * 16,
 90 |                 3,
 91 |                 1,
 92 |                 normalizer_params=batch_norm_params,
 93 |                 activation_fn=tf.nn.relu,
 94 |             )
 95 |         with tf.variable_scope("conv2_b"):
 96 |             conv2_b = conv2d(
 97 |                 conv2_a,
 98 |                 NUM_FEATURES * 16,
 99 |                 3,
100 |                 2,
101 |                 normalizer_params=batch_norm_params,
102 |                 activation_fn=tf.nn.relu,
103 |             )
104 | 
105 |         # POSE ESTIMATOR
106 |         with tf.variable_scope("pred"):
107 |             pose_pred = conv2d(
108 |                 conv2_b, 12, 1, 1, normalizer_fn=None, activation_fn=None
109 |             )
110 |             pose_avg = tf.reduce_mean(pose_pred, [1, 2])
111 |             pose_final = POSE_SCALING * tf.reshape(pose_avg, [-1, 2, 6])
112 | 
113 |         # INTRINSIC ESTIMATOR
114 |         s = tf.shape(pyramid_tgt_img[0])
115 |         h = tf.to_float(s[1])
116 |         w = tf.to_float(s[2])
117 |         intrinsics_mat = _estimate_intrinsics(conv2_b, w, h)
118 | 
119 |         return pose_final, intrinsics_mat
120 | 
121 | 
122 | def _estimate_intrinsics(bottleneck, image_width, image_height):
123 |     """Estimate intrinsic
124 |     :param bottleneck: feature bottleneck tensor
125 |     :param image_width: width of the resized image
126 |     :param image_height: height of the resized image
127 |     
128 |     :return intrinsic_mat: tensor with shape (1, 1, 3, 3)
129 |     """
130 |     with tf.variable_scope("intrinsics"):
131 |         bottleneck = tf.reduce_mean(bottleneck, axis=[1, 2], keepdims=True)
132 |         focal_lengths = tf.squeeze(
133 |             tf.contrib.layers.conv2d(
134 |                 bottleneck,
135 |                 2,
136 |                 [1, 1],
137 |                 stride=1,
138 |                 activation_fn=tf.nn.softplus,
139 |                 weights_regularizer=None,
140 |                 scope="foci",
141 |             ),
142 |             axis=(1, 2),
143 |         ) * tf.to_float(tf.convert_to_tensor([[image_width, image_height]]))
144 | 
145 |         offsets = (
146 |             tf.squeeze(
147 |                 tf.contrib.layers.conv2d(
148 |                     bottleneck,
149 |                     2,
150 |                     [1, 1],
151 |                     stride=1,
152 |                     activation_fn=None,
153 |                     weights_regularizer=None,
154 |                     biases_initializer=None,
155 |                     scope="offsets",
156 |                 ),
157 |                 axis=(1, 2),
158 |             )
159 |             + 0.5
160 |         ) * tf.to_float(tf.convert_to_tensor([[image_width, image_height]]))
161 | 
162 |         foci = tf.linalg.diag(focal_lengths)
163 |         intrinsic_mat = tf.concat([foci, tf.expand_dims(offsets, -1)], axis=2)
164 |         batch_size = tf.shape(bottleneck)[0]
165 |         last_row = tf.tile([[[0.0, 0.0, 1.0]]], [batch_size, 1, 1])
166 |         intrinsic_mat = tf.concat([intrinsic_mat, last_row], axis=1)
167 |         intrinsic_mat = tf.expand_dims(intrinsic_mat, axis=1)
168 |         return intrinsic_mat
169 | 
170 | 
171 | def DSNet(pyramid_tgt_img, classes, is_training):
172 |     """DSNet
173 |     """
174 |     with tf.variable_scope("monocular_depthnet", reuse=tf.AUTO_REUSE):
175 | 
176 |         batch_norm_params = {"is_training": is_training}
177 | 
178 |         # SCALE 5
179 |         with tf.variable_scope("L5"):
180 |             with tf.variable_scope("estimator"):
181 |                 conv5 = build_estimator(
182 |                     pyramid_tgt_img[5], normalizer_params=batch_norm_params
183 |                 )
184 |             with tf.variable_scope("disparity"):
185 |                 disp5 = get_disp(conv5, normalizer_params=batch_norm_params)
186 |                 updisp5 = depth_upsampling(disp5, 1)
187 |             with tf.variable_scope("upsampler"):
188 |                 upconv5 = bilinear_upsampling_by_convolution(
189 |                     conv5, 2, normalizer_params=batch_norm_params
190 |                 )
191 |         # SCALE 4
192 |         with tf.variable_scope("L4"):
193 |             with tf.variable_scope("estimator"):
194 |                 conv4 = build_estimator(
195 |                     pyramid_tgt_img[4], upconv5, normalizer_params=batch_norm_params
196 |                 )
197 |             with tf.variable_scope("disparity"):
198 |                 disp4 = (
199 |                     get_disp(conv4, normalizer_params=batch_norm_params) + updisp5[0]
200 |                 )
201 |                 updisp4 = depth_upsampling(disp4, 1)
202 |             with tf.variable_scope("upsampler"):
203 |                 upconv4 = bilinear_upsampling_by_convolution(
204 |                     conv4, 2, normalizer_params=batch_norm_params
205 |                 )
206 |         # SCALE 3
207 |         with tf.variable_scope("L3"):
208 |             with tf.variable_scope("estimator"):
209 |                 conv3 = build_estimator(
210 |                     pyramid_tgt_img[3], upconv4, normalizer_params=batch_norm_params
211 |                 )
212 |             with tf.variable_scope("disparity"):
213 |                 disp3 = (
214 |                     get_disp(conv3, normalizer_params=batch_norm_params) + updisp4[0]
215 |                 )
216 |                 updisp3 = depth_upsampling(disp3, 1)
217 |             with tf.variable_scope("upsampler"):
218 |                 upconv3 = bilinear_upsampling_by_convolution(
219 |                     conv3, 2, normalizer_params=batch_norm_params
220 |                 )
221 |         # SCALE 2
222 |         with tf.variable_scope("L2"):
223 |             with tf.variable_scope("estimator"):
224 |                 conv2 = build_estimator(
225 |                     pyramid_tgt_img[2], upconv3, normalizer_params=batch_norm_params
226 |                 )
227 |             with tf.variable_scope("disparity"):
228 |                 disp2 = (
229 |                     get_disp(conv2, normalizer_params=batch_norm_params) + updisp3[0]
230 |                 )
231 |                 updisp2 = depth_upsampling(disp2, 1)
232 |             with tf.variable_scope("upsampler"):
233 |                 upconv2 = bilinear_upsampling_by_convolution(
234 |                     conv2, 2, normalizer_params=batch_norm_params
235 |                 )
236 |         # SCALE 1
237 |         with tf.variable_scope("L1"):
238 |             with tf.variable_scope("estimator"):
239 |                 conv1 = build_estimator(
240 |                     pyramid_tgt_img[1], upconv2, normalizer_params=batch_norm_params
241 |                 )
242 |             with tf.variable_scope("disparity"):
243 |                 disp1 = (
244 |                     get_disp(conv1, normalizer_params=batch_norm_params) + updisp2[0]
245 |                 )
246 | 
247 |             with tf.variable_scope("semantic"):
248 |                 sem1 = get_semantic(conv1, classes, normalizer_params=batch_norm_params)
249 | 
250 |         return [disp1, disp2, disp3, disp4, disp5], sem1
251 | 
252 | 
253 | def build_pyramid(input_batch, normalizer_params=None, scope="img_pyramid"):
254 |     """Pyramidal feature extractor
255 |     """
256 |     with tf.variable_scope(scope, reuse=tf.AUTO_REUSE):
257 |         features = []
258 |         features.append(input_batch)
259 | 
260 |         with tf.variable_scope("conv1a"):
261 |             conv1a = conv2d(
262 |                 input_batch, NUM_FEATURES, 3, 2, normalizer_params=normalizer_params
263 |             )
264 |         with tf.variable_scope("conv1b"):
265 |             conv1b = conv2d(
266 |                 conv1a, NUM_FEATURES, 3, 1, normalizer_params=normalizer_params
267 |             )
268 |             features.append(conv1b)
269 |         with tf.variable_scope("conv2a"):
270 |             conv2a = conv2d(
271 |                 conv1b, NUM_FEATURES * 2, 3, 2, normalizer_params=normalizer_params
272 |             )
273 |         with tf.variable_scope("conv2b"):
274 |             conv2b = conv2d(
275 |                 conv2a, NUM_FEATURES * 2, 3, 1, normalizer_params=normalizer_params
276 |             )
277 |             features.append(conv2b)
278 |         with tf.variable_scope("conv3a"):
279 |             conv3a = conv2d(
280 |                 conv2b, NUM_FEATURES * 4, 3, 2, normalizer_params=normalizer_params
281 |             )
282 |         with tf.variable_scope("conv3b"):
283 |             conv3b = conv2d(
284 |                 conv3a, NUM_FEATURES * 4, 3, 1, normalizer_params=normalizer_params
285 |             )
286 |             features.append(conv3b)
287 |         with tf.variable_scope("conv4a"):
288 |             conv4a = conv2d(
289 |                 conv3b, NUM_FEATURES * 8, 3, 2, normalizer_params=normalizer_params
290 |             )
291 |         with tf.variable_scope("conv4b"):
292 |             conv4b = conv2d(
293 |                 conv4a, NUM_FEATURES * 8, 3, 1, normalizer_params=normalizer_params
294 |             )
295 |             features.append(conv4b)
296 |         with tf.variable_scope("conv5a"):
297 |             conv5a = conv2d(
298 |                 conv4b, NUM_FEATURES * 16, 3, 2, normalizer_params=normalizer_params
299 |             )
300 |         with tf.variable_scope("conv5b"):
301 |             conv5b = conv2d(
302 |                 conv5a, NUM_FEATURES * 16, 3, 1, normalizer_params=normalizer_params
303 |             )
304 |             features.append(conv5b)
305 |         return features
306 | 
307 | 
308 | def build_estimator(features, upsampled_disp=None, normalizer_params=None):
309 |     """Single scale estimator
310 |     """
311 |     with tf.variable_scope("build_estimator"):
312 |         if upsampled_disp is not None:
313 |             disp2 = tf.concat([features, upsampled_disp], -1)
314 |         else:
315 |             disp2 = features
316 |         with tf.variable_scope("disp-3"):
317 |             disp3 = conv2d(
318 |                 disp2, NUM_FEATURES * 4, 3, 1, normalizer_params=normalizer_params
319 |             )
320 |         with tf.variable_scope("disp-4"):
321 |             disp4 = conv2d(
322 |                 disp3, NUM_FEATURES * 3, 3, 1, normalizer_params=normalizer_params
323 |             )
324 |         with tf.variable_scope("disp-5"):
325 |             disp5 = conv2d(
326 |                 disp4, NUM_FEATURES * 2, 3, 1, normalizer_params=normalizer_params
327 |             )
328 |         with tf.variable_scope("disp-6"):
329 |             disp6 = conv2d(
330 |                 disp5, NUM_FEATURES, 3, 1, normalizer_params=normalizer_params
331 |             )
332 |         return disp6
333 | 
334 | 
335 | def get_disp(x, normalizer_params=None, rates=[1, 1]):
336 |     """Disparity prediction layer
337 |     """
338 |     with tf.variable_scope("disparity_estimator"):
339 |         with tf.variable_scope("conv1"):
340 |             conv1 = conv2d(
341 |                 x, NUM_FEATURES * 4, 3, 1, normalizer_params=normalizer_params
342 |             )
343 |         with tf.variable_scope("conv2"):
344 |             conv2 = conv2d(
345 |                 conv1,
346 |                 NUM_FEATURES * 2,
347 |                 3,
348 |                 1,
349 |                 normalizer_params=normalizer_params,
350 |                 rate=rates[0],
351 |             )
352 |         with tf.variable_scope("conv3"):
353 |             conv3 = conv2d(
354 |                 conv2,
355 |                 NUM_FEATURES,
356 |                 3,
357 |                 1,
358 |                 normalizer_params=normalizer_params,
359 |                 rate=rates[1],
360 |             )
361 |         with tf.variable_scope("disparity"):
362 |             disparity = (
363 |                 DISP_SCALING
364 |                 * conv2d(
365 |                     conv3, 1, 3, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None
366 |                 )
367 |                 + MIN_DISP
368 |             )
369 |         return disparity
370 | 
371 | 
372 | def get_semantic(x, classes, normalizer_params=None, rates=[1, 1]):
373 |     """Semantic estimator layer
374 |     """
375 |     with tf.variable_scope("semantic_estimator"):
376 |         with tf.variable_scope("conv1"):
377 |             conv1 = conv2d(
378 |                 x, NUM_FEATURES * 4, 3, 1, normalizer_params=normalizer_params
379 |             )
380 |         with tf.variable_scope("conv2"):
381 |             conv2 = conv2d(
382 |                 conv1,
383 |                 NUM_FEATURES * 2,
384 |                 3,
385 |                 1,
386 |                 normalizer_params=normalizer_params,
387 |                 rate=rates[0],
388 |             )
389 |         with tf.variable_scope("conv3"):
390 |             conv3 = conv2d(
391 |                 conv2,
392 |                 NUM_FEATURES,
393 |                 3,
394 |                 1,
395 |                 normalizer_params=normalizer_params,
396 |                 rate=rates[1],
397 |             )
398 |         with tf.variable_scope("disparity"):
399 |             sem = conv2d(conv3, classes, 3, 1, normalizer_params=normalizer_params)
400 |         return sem
401 | 


--------------------------------------------------------------------------------
/networks/ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
  2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | import tensorflow.contrib.slim as slim
 20 | 
 21 | import tensorflow as tf
 22 | 
 23 | 
 24 | def upsample_nn(x, ratio):
 25 |     s = x.get_shape().as_list()
 26 |     h = s[1]
 27 |     w = s[2]
 28 |     return tf.image.resize_nearest_neighbor(x, [h * ratio, w * ratio])
 29 | 
 30 | 
 31 | def conv2d(
 32 |     inputs,
 33 |     num_outputs,
 34 |     kernel_size,
 35 |     stride,
 36 |     normalizer_fn=slim.batch_norm,
 37 |     activation_fn=tf.nn.relu,
 38 |     weights_regularizer=slim.l2_regularizer(0.0001),
 39 |     normalizer_params=True,
 40 |     padding=(1, 1),
 41 |     reflect=True,
 42 |     rate=1,
 43 | ):
 44 | 
 45 |     if rate > 1:
 46 |         w_pad, h_pad = (rate, rate)
 47 |     else:
 48 |         w_pad, h_pad = tuple(padding)
 49 | 
 50 |     if reflect:
 51 |         inputs = tf.pad(
 52 |             inputs, [[0, 0], [h_pad, h_pad], [w_pad, w_pad], [0, 0]], "REFLECT"
 53 |         )
 54 | 
 55 |     return tf.contrib.layers.conv2d(
 56 |         inputs,
 57 |         num_outputs,
 58 |         kernel_size,
 59 |         stride,
 60 |         padding="VALID",
 61 |         normalizer_fn=normalizer_fn,
 62 |         activation_fn=activation_fn,
 63 |         weights_regularizer=weights_regularizer,
 64 |         normalizer_params=normalizer_params,
 65 |         rate=rate,
 66 |     )
 67 | 
 68 | 
 69 | def upconv(
 70 |     inputs,
 71 |     num_outputs,
 72 |     kernel_size,
 73 |     stride,
 74 |     normalizer_fn=slim.batch_norm,
 75 |     activation_fn=tf.nn.relu,
 76 |     weights_regularizer=slim.l2_regularizer(0.0001),
 77 |     normalizer_params=True,
 78 |     padding=(1, 1),
 79 | ):
 80 |     upsample = upsample_nn(inputs, stride)
 81 |     return conv2d(
 82 |         upsample,
 83 |         num_outputs,
 84 |         kernel_size,
 85 |         1,
 86 |         padding=padding,
 87 |         normalizer_fn=normalizer_fn,
 88 |         activation_fn=activation_fn,
 89 |         weights_regularizer=weights_regularizer,
 90 |         normalizer_params=normalizer_params,
 91 |     )
 92 | 
 93 | 
 94 | def gradient_x(img):
 95 |     gx = img[:, :, :-1, :] - img[:, :, 1:, :]
 96 |     return gx
 97 | 
 98 | 
 99 | def gradient_y(img):
100 |     gy = img[:, :-1, :, :] - img[:, 1:, :, :]
101 |     return gy
102 | 
103 | 
104 | def L2_norm(x, axis=3, keepdims=True):
105 |     curr_offset = 1e-10
106 |     l2_norm = tf.norm(tf.abs(x) + curr_offset, axis=axis, keepdims=keepdims)
107 |     return l2_norm
108 | 
109 | 
110 | def spatial_normalize(disp):
111 |     with tf.variable_scope("spatial_normalizer"):
112 |         _, curr_h, curr_w, curr_c = disp.get_shape().as_list()
113 |         disp_mean = tf.reduce_mean(disp, axis=[1, 2, 3], keepdims=True)
114 |         disp_mean = tf.tile(disp_mean, [1, curr_h, curr_w, curr_c])
115 |         return disp / disp_mean
116 | 
117 | 
118 | def post_process_disparity(disp):
119 |     _, h, w = disp.shape
120 |     l_disp = disp[0, :, :]
121 |     r_disp = np.fliplr(disp[1, :, :])
122 |     m_disp = 0.5 * (l_disp + r_disp)
123 |     l, _ = np.meshgrid(np.linspace(0, 1, w), np.linspace(0, 1, h))
124 |     l_mask = 1.0 - np.clip(20 * (l - 0.05), 0, 1)
125 |     r_mask = np.fliplr(l_mask)
126 |     return r_mask * l_disp + l_mask * r_disp + (1.0 - l_mask - r_mask) * m_disp
127 | 
128 | 
129 | def reduce_mean_masked(tensor, mask):
130 |     with tf.variable_scope("reduce_mean_masked"):
131 |         valid_points = tf.maximum(tf.reduce_sum(mask), 1)
132 |         loss = tf.reduce_sum(tensor * mask) / valid_points
133 |         return loss
134 | 
135 | 
136 | def reduce_mean_probability_masked(tensor, mask, probability):
137 |     with tf.variable_scope("reduce_mean_masked"):
138 |         valid_points = tf.maximum(tf.reduce_sum(mask), 1)
139 |         loss = tf.reduce_sum(tensor * mask * probability) / valid_points
140 |         return loss
141 | 
142 | 
143 | # Upsampling layer
144 | def bilinear_upsampling_by_convolution(x, stride, normalizer_params=None):
145 |     with tf.variable_scope("bilinear_upsampling_by_convolution"):
146 |         f = x.get_shape().as_list()[-1]
147 |         return upconv(x, f, 3, stride, normalizer_params=normalizer_params)
148 | 
149 | 
150 | def depth_upsampling(x, scales):
151 |     with tf.variable_scope("depth_upsampling"):
152 |         features = []
153 |         for i in range(1, scales + 1):
154 |             with tf.variable_scope("upsampler_pred_" + str(i)):
155 |                 up = tf.image.resize_bilinear(
156 |                     x,
157 |                     [
158 |                         x.get_shape().as_list()[1] * (2 ** i),
159 |                         x.get_shape().as_list()[2] * (2 ** i),
160 |                     ],
161 |                 )
162 |                 features.append(up)
163 |         return features
164 | 
165 | 
166 | def stop_features_gradient(features):
167 |     with tf.variable_scope("stop_features_gradient"):
168 |         new_features = []
169 |         for img_x_features in features:
170 |             new_img_x_features = []
171 |             for feat in img_x_features:
172 |                 new_img_x_features.append(tf.stop_gradient(feat))
173 |             new_features.append(new_img_x_features)
174 |         return new_features
175 | 
176 | 
177 | def couple_imgs_features(features):
178 |     with tf.variable_scope("couple_imgs_features"):
179 |         coupled_features = []
180 |         for tgt_feat, src2_feat in zip(features[1], features[2]):
181 |             couple_feat = tf.concat([tgt_feat, src2_feat], axis=-1)
182 |             coupled_features.append(couple_feat)
183 |         return coupled_features
184 | 


--------------------------------------------------------------------------------
/networks/selflow/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Pengpeng Liu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/networks/selflow/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/networks/selflow/__init__.py


--------------------------------------------------------------------------------
/networks/selflow/selflow_network.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.contrib import slim
  3 | from helpers.utilities import flow_resize
  4 | from networks.selflow.warp import tf_warp
  5 | 
  6 | 
  7 | def lrelu(x, leak=0.2, name="leaky_relu"):
  8 |     return tf.maximum(x, leak * x)
  9 | 
 10 | 
 11 | def feature_extractor(
 12 |     x,
 13 |     train=True,
 14 |     trainable=True,
 15 |     reuse=None,
 16 |     regularizer=None,
 17 |     name="feature_extractor",
 18 | ):
 19 |     with tf.variable_scope(name, reuse=reuse, regularizer=regularizer):
 20 |         with slim.arg_scope(
 21 |             [slim.conv2d],
 22 |             activation_fn=lrelu,
 23 |             kernel_size=3,
 24 |             padding="SAME",
 25 |             trainable=trainable,
 26 |         ):
 27 |             net = {}
 28 |             net["conv1_1"] = slim.conv2d(x, 16, stride=2, scope="conv1_1")
 29 |             net["conv1_2"] = slim.conv2d(net["conv1_1"], 16, stride=1, scope="conv1_2")
 30 | 
 31 |             net["conv2_1"] = slim.conv2d(net["conv1_2"], 32, stride=2, scope="conv2_1")
 32 |             net["conv2_2"] = slim.conv2d(net["conv2_1"], 32, stride=1, scope="conv2_2")
 33 | 
 34 |             net["conv3_1"] = slim.conv2d(net["conv2_2"], 64, stride=2, scope="conv3_1")
 35 |             net["conv3_2"] = slim.conv2d(net["conv3_1"], 64, stride=1, scope="conv3_2")
 36 | 
 37 |             net["conv4_1"] = slim.conv2d(net["conv3_2"], 96, stride=2, scope="conv4_1")
 38 |             net["conv4_2"] = slim.conv2d(net["conv4_1"], 96, stride=1, scope="conv4_2")
 39 | 
 40 |             net["conv5_1"] = slim.conv2d(net["conv4_2"], 128, stride=2, scope="conv5_1")
 41 |             net["conv5_2"] = slim.conv2d(net["conv5_1"], 128, stride=1, scope="conv5_2")
 42 | 
 43 |             net["conv6_1"] = slim.conv2d(net["conv5_2"], 192, stride=2, scope="conv6_1")
 44 |             net["conv6_2"] = slim.conv2d(net["conv6_1"], 192, stride=1, scope="conv6_2")
 45 | 
 46 |     return net
 47 | 
 48 | 
 49 | def context_network(
 50 |     x,
 51 |     flow,
 52 |     train=True,
 53 |     trainable=True,
 54 |     reuse=None,
 55 |     regularizer=None,
 56 |     name="context_network",
 57 | ):
 58 |     x_input = tf.concat([x, flow], axis=-1)
 59 |     with tf.variable_scope(name, reuse=reuse, regularizer=regularizer):
 60 |         with slim.arg_scope(
 61 |             [slim.conv2d],
 62 |             activation_fn=lrelu,
 63 |             kernel_size=3,
 64 |             padding="SAME",
 65 |             trainable=trainable,
 66 |         ):
 67 |             net = {}
 68 |             net["dilated_conv1"] = slim.conv2d(
 69 |                 x_input, 128, rate=1, scope="dilated_conv1"
 70 |             )
 71 |             net["dilated_conv2"] = slim.conv2d(
 72 |                 net["dilated_conv1"], 128, rate=2, scope="dilated_conv2"
 73 |             )
 74 |             net["dilated_conv3"] = slim.conv2d(
 75 |                 net["dilated_conv2"], 128, rate=4, scope="dilated_conv3"
 76 |             )
 77 |             net["dilated_conv4"] = slim.conv2d(
 78 |                 net["dilated_conv3"], 96, rate=8, scope="dilated_conv4"
 79 |             )
 80 |             net["dilated_conv5"] = slim.conv2d(
 81 |                 net["dilated_conv4"], 64, rate=16, scope="dilated_conv5"
 82 |             )
 83 |             net["dilated_conv6"] = slim.conv2d(
 84 |                 net["dilated_conv5"], 32, rate=1, scope="dilated_conv6"
 85 |             )
 86 |             net["dilated_conv7"] = slim.conv2d(
 87 |                 net["dilated_conv6"],
 88 |                 2,
 89 |                 rate=1,
 90 |                 activation_fn=None,
 91 |                 scope="dilated_conv7",
 92 |             )
 93 | 
 94 |     refined_flow = net["dilated_conv7"]
 95 |     return refined_flow
 96 | 
 97 | 
 98 | def estimator_network(
 99 |     x1,
100 |     cost_volume,
101 |     flow,
102 |     train=True,
103 |     trainable=True,
104 |     reuse=None,
105 |     regularizer=None,
106 |     name="estimator",
107 | ):
108 |     net_input = tf.concat([cost_volume, x1, flow], axis=-1)
109 |     with tf.variable_scope(name, reuse=reuse, regularizer=regularizer):
110 |         with slim.arg_scope(
111 |             [slim.conv2d],
112 |             activation_fn=lrelu,
113 |             kernel_size=3,
114 |             padding="SAME",
115 |             trainable=trainable,
116 |         ):
117 |             net = {}
118 |             net["conv1"] = slim.conv2d(net_input, 128, scope="conv1")
119 |             net["conv2"] = slim.conv2d(net["conv1"], 128, scope="conv2")
120 |             net["conv3"] = slim.conv2d(net["conv2"], 96, scope="conv3")
121 |             net["conv4"] = slim.conv2d(net["conv3"], 64, scope="conv4")
122 |             net["conv5"] = slim.conv2d(net["conv4"], 32, scope="conv5")
123 |             net["conv6"] = slim.conv2d(
124 |                 net["conv5"], 2, activation_fn=None, scope="conv6"
125 |             )
126 | 
127 |     return net
128 | 
129 | 
130 | def compute_cost_volume(x1, x2, H, W, channel, d=9):
131 |     x1 = tf.nn.l2_normalize(x1, axis=3)
132 |     x2 = tf.nn.l2_normalize(x2, axis=3)
133 | 
134 |     x2_patches = tf.extract_image_patches(
135 |         x2, [1, d, d, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding="SAME"
136 |     )
137 |     x2_patches = tf.reshape(x2_patches, [-1, H, W, d, d, channel])
138 |     x1_reshape = tf.reshape(x1, [-1, H, W, 1, 1, channel])
139 |     x1_dot_x2 = tf.multiply(x1_reshape, x2_patches)
140 | 
141 |     cost_volume = tf.reduce_sum(x1_dot_x2, axis=-1)
142 |     # cost_volume = tf.reduce_mean(x1_dot_x2, axis=-1)
143 |     cost_volume = tf.reshape(cost_volume, [-1, H, W, d * d])
144 |     return cost_volume
145 | 
146 | 
147 | def estimator(
148 |     x0,
149 |     x1,
150 |     x2,
151 |     flow_fw,
152 |     flow_bw,
153 |     train=True,
154 |     trainable=True,
155 |     reuse=None,
156 |     regularizer=None,
157 |     name="estimator",
158 | ):
159 |     # warp x2 according to flow
160 |     if train:
161 |         x_shape = x1.get_shape().as_list()
162 |     else:
163 |         x_shape = tf.shape(x1)
164 |     H = x_shape[1]
165 |     W = x_shape[2]
166 |     channel = x_shape[3]
167 |     x2_warp = tf_warp(x2, flow_fw, H, W)
168 |     x0_warp = tf_warp(x0, flow_bw, H, W)
169 | 
170 |     # ---------------cost volume-----------------
171 | 
172 |     cost_volume_fw = compute_cost_volume(x1, x2_warp, H, W, channel, d=9)
173 |     cost_volume_bw = compute_cost_volume(x1, x0_warp, H, W, channel, d=9)
174 | 
175 |     cv_concat_fw = tf.concat([cost_volume_fw, cost_volume_bw], -1)
176 |     cv_concat_bw = tf.concat([cost_volume_bw, cost_volume_fw], -1)
177 | 
178 |     flow_concat_fw = tf.concat([flow_fw, -flow_bw], -1)
179 |     flow_concat_bw = tf.concat([flow_bw, -flow_fw], -1)
180 | 
181 |     net_fw = estimator_network(
182 |         x1,
183 |         cv_concat_fw,
184 |         flow_concat_fw,
185 |         train=train,
186 |         trainable=trainable,
187 |         reuse=reuse,
188 |         regularizer=regularizer,
189 |         name=name,
190 |     )
191 |     net_bw = estimator_network(
192 |         x1,
193 |         cv_concat_bw,
194 |         flow_concat_bw,
195 |         train=train,
196 |         trainable=trainable,
197 |         reuse=True,
198 |         regularizer=regularizer,
199 |         name=name,
200 |     )
201 | 
202 |     return net_fw, net_bw
203 | 
204 | 
205 | def pyramid_processing_three_frame(
206 |     shape,
207 |     src1_features,
208 |     tgt_features,
209 |     src2_features,
210 |     train=True,
211 |     trainable=True,
212 |     reuse=None,
213 |     regularizer=None,
214 |     is_scale=True,
215 | ):
216 |     x_shape = tf.shape(tgt_features["conv6_2"])
217 |     initial_flow_fw = tf.zeros(
218 |         [x_shape[0], x_shape[1], x_shape[2], 2],
219 |         dtype=tf.float32,
220 |         name="initial_flow_fw",
221 |     )
222 |     initial_flow_bw = tf.zeros(
223 |         [x_shape[0], x_shape[1], x_shape[2], 2],
224 |         dtype=tf.float32,
225 |         name="initial_flow_bw",
226 |     )
227 |     flow_fw = {}
228 |     flow_bw = {}
229 |     net_fw, net_bw = estimator(
230 |         src1_features["conv6_2"],
231 |         tgt_features["conv6_2"],
232 |         src2_features["conv6_2"],
233 |         initial_flow_fw,
234 |         initial_flow_bw,
235 |         train=train,
236 |         trainable=trainable,
237 |         reuse=reuse,
238 |         regularizer=regularizer,
239 |         name="estimator_level_6",
240 |     )
241 |     flow_fw["level_6"] = net_fw["conv6"]
242 |     flow_bw["level_6"] = net_bw["conv6"]
243 | 
244 |     for i in range(4):
245 |         feature_name = "conv%d_2" % (5 - i)
246 |         level = "level_%d" % (5 - i)
247 |         feature_size = tf.shape(tgt_features[feature_name])[1:3]
248 | 
249 |         initial_flow_fw = flow_resize(
250 |             flow_fw["level_%d" % (6 - i)], feature_size, is_scale=is_scale
251 |         )
252 |         initial_flow_bw = flow_resize(
253 |             flow_bw["level_%d" % (6 - i)], feature_size, is_scale=is_scale
254 |         )
255 | 
256 |         net_fw, net_bw = estimator(
257 |             src1_features[feature_name],
258 |             tgt_features[feature_name],
259 |             src2_features[feature_name],
260 |             initial_flow_fw,
261 |             initial_flow_bw,
262 |             train=train,
263 |             trainable=trainable,
264 |             reuse=reuse,
265 |             regularizer=regularizer,
266 |             name="estimator_level_%d" % (5 - i),
267 |         )
268 |         flow_fw[level] = net_fw["conv6"]
269 |         flow_bw[level] = net_bw["conv6"]
270 | 
271 |     flow_concat_fw = tf.concat([flow_fw["level_2"], -flow_bw["level_2"]], -1)
272 |     flow_concat_bw = tf.concat([flow_bw["level_2"], -flow_fw["level_2"]], -1)
273 | 
274 |     x_feature = tf.concat([net_fw["conv5"], net_bw["conv5"]], axis=-1)
275 |     flow_fw["refined"] = context_network(
276 |         x_feature,
277 |         flow_concat_fw,
278 |         train=train,
279 |         trainable=trainable,
280 |         reuse=reuse,
281 |         regularizer=regularizer,
282 |         name="context_network",
283 |     )
284 |     flow_size = shape[1:3]
285 |     flow_fw["full_res"] = flow_resize(flow_fw["refined"], flow_size, is_scale=is_scale)
286 | 
287 |     x_feature = tf.concat([net_bw["conv5"], net_fw["conv5"]], axis=-1)
288 |     flow_bw["refined"] = context_network(
289 |         x_feature,
290 |         flow_concat_bw,
291 |         train=train,
292 |         trainable=trainable,
293 |         reuse=True,
294 |         regularizer=regularizer,
295 |         name="context_network",
296 |     )
297 |     flow_bw["full_res"] = flow_resize(flow_bw["refined"], flow_size, is_scale=is_scale)
298 | 
299 |     return flow_fw, flow_bw
300 | 
301 | 
302 | def flownet(
303 |     shape,
304 |     src1,
305 |     tgt,
306 |     src2,
307 |     train=True,
308 |     trainable=True,
309 |     reuse=None,
310 |     regularizer=None,
311 |     is_scale=True,
312 |     scope="flownet",
313 | ):
314 |     """ Get the flow 
315 |         Returns:
316 |             forward flow between tgt and src2, backward flow between tgt and src1
317 |             Both flows are tgt aligned
318 |     """
319 |     with tf.variable_scope(scope, reuse=reuse):
320 |         src1_features = feature_extractor(
321 |             src1,
322 |             train=train,
323 |             trainable=trainable,
324 |             reuse=reuse,
325 |             regularizer=regularizer,
326 |             name="feature_extractor",
327 |         )
328 |         tgt_features = feature_extractor(
329 |             tgt,
330 |             train=train,
331 |             trainable=trainable,
332 |             reuse=True,
333 |             regularizer=regularizer,
334 |             name="feature_extractor",
335 |         )
336 |         src2_features = feature_extractor(
337 |             src2,
338 |             train=train,
339 |             trainable=trainable,
340 |             reuse=True,
341 |             regularizer=regularizer,
342 |             name="feature_extractor",
343 |         )
344 | 
345 |         flow_src2_tgt, flow_src1_tgt = pyramid_processing_three_frame(
346 |             shape,
347 |             src1_features,
348 |             tgt_features,
349 |             src2_features,
350 |             train=train,
351 |             trainable=trainable,
352 |             reuse=reuse,
353 |             regularizer=regularizer,
354 |             is_scale=is_scale,
355 |         )
356 | 
357 |         return flow_src2_tgt["full_res"], flow_src1_tgt["full_res"]
358 | 


--------------------------------------------------------------------------------
/networks/selflow/warp.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def get_pixel_value(img, x, y):
 5 |     """
 6 |     Utility function to get pixel value for coordinate
 7 |     vectors x and y from a  4D tensor image.
 8 |     Input
 9 |     -----
10 |     - img: tensor of shape (B, H, W, C)
11 |     - x: flattened tensor of shape (B*H*W, )
12 |     - y: flattened tensor of shape (B*H*W, )
13 |     Returns
14 |     -------
15 |     - output: tensor of shape (B, H, W, C)
16 |     """
17 |     shape = tf.shape(x)
18 |     batch_size = shape[0]
19 |     height = shape[1]
20 |     width = shape[2]
21 | 
22 |     batch_idx = tf.range(0, batch_size)
23 |     batch_idx = tf.reshape(batch_idx, (batch_size, 1, 1))
24 |     b = tf.tile(batch_idx, (1, height, width))
25 | 
26 |     indices = tf.stack([b, y, x], 3)
27 | 
28 |     return tf.gather_nd(img, indices)
29 | 
30 | 
31 | def tf_warp(img, flow, H, W):
32 |     #    H = 256
33 |     #    W = 256
34 |     x, y = tf.meshgrid(tf.range(W), tf.range(H))
35 |     x = tf.expand_dims(x, 0)
36 |     x = tf.expand_dims(x, -1)
37 | 
38 |     y = tf.expand_dims(y, 0)
39 |     y = tf.expand_dims(y, -1)
40 | 
41 |     x = tf.cast(x, tf.float32)
42 |     y = tf.cast(y, tf.float32)
43 |     grid = tf.concat([x, y], axis=-1)
44 |     #    print grid.shape
45 |     flows = grid + flow
46 |     # print(flows.shape)
47 |     max_y = tf.cast(H - 1, tf.int32)
48 |     max_x = tf.cast(W - 1, tf.int32)
49 |     zero = tf.zeros([], dtype=tf.int32)
50 | 
51 |     x = flows[:, :, :, 0]
52 |     y = flows[:, :, :, 1]
53 |     x0 = x
54 |     y0 = y
55 |     x0 = tf.cast(x0, tf.int32)
56 |     x1 = x0 + 1
57 |     y0 = tf.cast(y0, tf.int32)
58 |     y1 = y0 + 1
59 | 
60 |     # clip to range [0, H/W] to not violate img boundaries
61 |     x0 = tf.clip_by_value(x0, zero, max_x)
62 |     x1 = tf.clip_by_value(x1, zero, max_x)
63 |     y0 = tf.clip_by_value(y0, zero, max_y)
64 |     y1 = tf.clip_by_value(y1, zero, max_y)
65 | 
66 |     # get pixel value at corner coords
67 |     Ia = get_pixel_value(img, x0, y0)
68 |     Ib = get_pixel_value(img, x0, y1)
69 |     Ic = get_pixel_value(img, x1, y0)
70 |     Id = get_pixel_value(img, x1, y1)
71 | 
72 |     # recast as float for delta calculation
73 |     x0 = tf.cast(x0, tf.float32)
74 |     x1 = tf.cast(x1, tf.float32)
75 |     y0 = tf.cast(y0, tf.float32)
76 |     y1 = tf.cast(y1, tf.float32)
77 | 
78 |     # calculate deltas
79 |     wa = (x1 - x) * (y1 - y)
80 |     wb = (x1 - x) * (y - y0)
81 |     wc = (x - x0) * (y1 - y)
82 |     wd = (x - x0) * (y - y0)
83 | 
84 |     # add dimension for addition
85 |     wa = tf.expand_dims(wa, axis=3)
86 |     wb = tf.expand_dims(wb, axis=3)
87 |     wc = tf.expand_dims(wc, axis=3)
88 |     wd = tf.expand_dims(wd, axis=3)
89 | 
90 |     # compute output
91 |     out = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id])
92 |     return out
93 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pandas==0.24.2
 2 | tqdm==4.36.1
 3 | opencv-python==4.2.0.34
 4 | matplotlib==3.0.3
 5 | numpy==1.16.4
 6 | tensorflow-gpu==1.8.0
 7 | Pillow==6.1.0
 8 | pypng==0.0.20
 9 | pfm==0.6.0
10 | scipy==1.1.0


--------------------------------------------------------------------------------
/single_inference.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
  2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """
 17 | Run OmegaNet in a one-shot way:
 18 | Given a single tgt image or three images, we run OmegaNet to get the results
 19 | for a set of tasks.
 20 | At the end, colored images will be saved in the destinatio folder.
 21 | """
 22 | from __future__ import division
 23 | import tensorflow as tf
 24 | import cv2
 25 | import numpy as np
 26 | import os
 27 | import argparse
 28 | import matplotlib.pyplot as plt
 29 | from helpers import utilities
 30 | from helpers.flow_tool import flowlib
 31 | from networks import complete_network
 32 | from networks import general_network
 33 | from tensorflow.python.util import deprecation
 34 | 
 35 | # disable future warnings and info messages for this demo
 36 | deprecation._PRINT_DEPRECATION_WARNINGS = False
 37 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 38 | 
 39 | 
 40 | parser = argparse.ArgumentParser(description="Single shot estimation")
 41 | parser.add_argument("--tgt", type=str, help="path to t0 RGB image", required=True)
 42 | parser.add_argument(
 43 |     "--src1",
 44 |     type=str,
 45 |     help="path to src_1 RGB image (required in case of optical flow)",
 46 |     default=None,
 47 | )
 48 | parser.add_argument(
 49 |     "--src2",
 50 |     type=str,
 51 |     help="path to src_2 RGB image (required in case of optical flow)",
 52 |     default=None,
 53 | )
 54 | parser.add_argument(
 55 |     "--tasks",
 56 |     nargs="+",
 57 |     type=str,
 58 |     help="tasks to perform",
 59 |     default=["inverse_depth", "flow", "semantic", "motion_mask"],
 60 | )
 61 | parser.add_argument(
 62 |     "--ckpt", type=str, help="path to complete omeganet checkpoint", required=True
 63 | )
 64 | parser.add_argument("--height", type=int, help="height of resized image", default=192)
 65 | parser.add_argument("--width", type=int, help="width of resized image", default=640)
 66 | parser.add_argument(
 67 |     "--tau",
 68 |     type=float,
 69 |     help="tau threshold in the paper. For motion segmentation at testing time",
 70 |     default=0.5,
 71 | )
 72 | 
 73 | parser.add_argument("--dest", type=str, help="where save results", default="./results")
 74 | parser.add_argument("--cpu", action="store_true", help="run on cpu")
 75 | 
 76 | opts = parser.parse_args()
 77 | 
 78 | if opts.cpu:
 79 |     os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 80 | 
 81 | 
 82 | def prepare_input():
 83 |     """Prepare input for the network
 84 |         :return src1: src1 image, resized at opts.height x opts.width
 85 |         :return src1: tgt image, resized at opts.height x opts.width
 86 |         :return src1: src2 image, resized at opts.height x opts.width
 87 |         :return original_tgt: original tgt image, not resize. For motion mask blending
 88 |         :return height: height of original image
 89 |         :return width: width of the original image
 90 |         In case of single depth or semantic, src1 and src2 are equal to tgt
 91 |     """
 92 | 
 93 |     expected_more_images = False
 94 | 
 95 |     if not os.path.isfile(opts.tgt):
 96 |         raise ValueError("Cannot find tgt image:{}".format(opts.tgt))
 97 | 
 98 |     if "flow" in opts.tasks or "motion_mask" in opts.tasks:
 99 |         if opts.src1 is None or opts.src2 is None:
100 |             raise ValueError(
101 |                 "Expected src1 and src2 for optical flow and motion estimation, but are None"
102 |             )
103 |         if not os.path.isfile(opts.src1):
104 |             raise ValueError("Image src1 not found")
105 |         if not os.path.isfile(opts.src2):
106 |             raise ValueError("Image src2 not found")
107 |         expected_more_images = True
108 |     else:
109 |         if not os.path.isfile(opts.tgt):
110 |             raise ValueError("Cannot find tgt:{}".format(opts.tgt))
111 |     if opts.dest is not None:
112 |         utilities.create_dir(opts.dest)
113 | 
114 |     tgt = cv2.imread(opts.tgt)
115 |     tgt = cv2.cvtColor(tgt, cv2.COLOR_BGR2RGB)
116 |     original_tgt = None
117 |     if "motion_mask" in opts.tasks:
118 |         original_tgt = tgt
119 | 
120 |     tgt = tgt / 255.0
121 | 
122 |     if expected_more_images:
123 |         src1 = cv2.imread(opts.src1)
124 |         src1 = cv2.cvtColor(src1, cv2.COLOR_BGR2RGB)
125 |         src1 = src1 / 255.0
126 | 
127 |         if src1.shape != tgt.shape:
128 |             raise ValueError("tgt and src1 have different shapes")
129 | 
130 |         src2 = cv2.imread(opts.src2)
131 |         src2 = cv2.cvtColor(src2, cv2.COLOR_BGR2RGB)
132 |         src2 = src2 / 255.0
133 | 
134 |         if src2.shape != tgt.shape:
135 |             raise ValueError("tgt and src2 have different shapes")
136 | 
137 |     else:
138 |         # NOTE: in case of src1 and src2 are useless,
139 |         # we feed the tensor_src1 and tensor_src2 placeholders
140 |         # with tgt one
141 |         src1 = tgt
142 |         src2 = tgt
143 | 
144 |     height, width = tgt.shape[0:2]
145 | 
146 |     src1 = cv2.resize(src1, (opts.width, opts.height))
147 |     tgt = cv2.resize(tgt, (opts.width, opts.height))
148 |     src2 = cv2.resize(src2, (opts.width, opts.height))
149 | 
150 |     src1 = np.expand_dims(src1, 0).astype(np.float32)
151 |     tgt = np.expand_dims(tgt, 0).astype(np.float32)
152 |     src2 = np.expand_dims(src2, 0).astype(np.float32)
153 |     return src1, tgt, src2, original_tgt, height, width
154 | 
155 | 
156 | def main(_):
157 |     """Run the inference
158 |     """
159 |     model_exists = utilities.check_model_exists(opts.ckpt)
160 |     if not model_exists:
161 |         raise ValueError("Model not found")
162 |     src1, tgt, src2, original_tgt, height, width = prepare_input()
163 |     output_tensors = []
164 | 
165 |     print(" [*] Session creation: SUCCESS")
166 |     config = tf.ConfigProto(allow_soft_placement=True)
167 |     sess = tf.Session(config=config)
168 | 
169 |     training_flag = tf.placeholder(tf.bool)
170 | 
171 |     tensor_src1 = tf.placeholder(
172 |         tf.float32, shape=(1, opts.height, opts.width, 3), name="src1"
173 |     )
174 |     tensor_tgt = tf.placeholder(
175 |         tf.float32, shape=(1, opts.height, opts.width, 3), name="tgt"
176 |     )
177 |     tensor_src2 = tf.placeholder(
178 |         tf.float32, shape=(1, opts.height, opts.width, 3), name="src2"
179 |     )
180 |     batch = {"src_img_1": tensor_src1, "tgt_img": tensor_tgt, "src_img_2": tensor_src2}
181 | 
182 |     network_params = general_network.network_parameters(
183 |         height=opts.height, width=opts.width, load_only_baseline=False, tau=opts.tau,
184 |     )
185 |     network = complete_network.OmegaNet(
186 |         batch, is_training=training_flag, params=network_params
187 |     )
188 |     network.build()
189 |     var_list = network.get_network_params()
190 |     saver = tf.train.Saver(var_list=var_list)
191 | 
192 |     init_op = tf.group(
193 |         tf.global_variables_initializer(), tf.local_variables_initializer()
194 |     )
195 |     sess.run(init_op)
196 |     coordinator = tf.train.Coordinator()
197 |     threads = tf.train.start_queue_runners(sess=sess, coord=coordinator)
198 | 
199 |     saver.restore(sess, opts.ckpt)
200 |     print(" [*] Load model: SUCCESS")
201 | 
202 |     index = 0
203 |     output_mapping = {}
204 | 
205 |     if "inverse_depth" in opts.tasks:
206 |         inverse_depth = tf.image.resize_images(network.disp, [height, width])
207 |         output_tensors.append(inverse_depth)
208 |         output_mapping[index] = "inverse_depth"
209 |         index += 1
210 | 
211 |     if "semantic" in opts.tasks:
212 |         semantic = network.prepare_semantic(
213 |             network.semantic_logits, height=height, width=width
214 |         )
215 |         output_tensors.append(semantic)
216 |         output_mapping[index] = "semantic"
217 |         index += 1
218 | 
219 |     if "flow" in opts.tasks:
220 |         optical_flow = tf.image.resize_images(network.optical_flow, [height, width])
221 |         output_tensors.append(optical_flow)
222 |         output_mapping[index] = "flow"
223 |         index += 1
224 | 
225 |     if "motion_mask" in opts.tasks:
226 |         motion_mask = tf.image.resize_images(
227 |             network.motion_mask,
228 |             [height, width],
229 |             method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
230 |         )
231 |         output_tensors.append(motion_mask)
232 |         output_mapping[index] = "motion_mask"
233 |         index += 1
234 | 
235 |     results = sess.run(
236 |         output_tensors,
237 |         feed_dict={
238 |             training_flag: False,
239 |             tensor_src1: src1,
240 |             tensor_tgt: tgt,
241 |             tensor_src2: src2,
242 |         },
243 |     )
244 | 
245 |     name = os.path.basename(opts.tgt)
246 |     extension = name.split(".")[-1]
247 |     name = name.replace(extension, "png")
248 |     dest = os.path.join(opts.dest, "{}" + name)
249 | 
250 |     for index, output in enumerate(results):
251 |         output = output.squeeze()
252 |         task = output_mapping[index]
253 | 
254 |         if task == "inverse_depth":
255 |             plt.imsave(
256 |                 dest.format("inverse_depth_"), output, cmap="magma",
257 |             )
258 | 
259 |         if task == "flow":
260 |             scaling_w = width / opts.width
261 |             scaling_h = height / opts.height
262 |             output *= np.tile(
263 |                 np.array((scaling_w, scaling_h), dtype=np.float32), (height, width, 1)
264 |             )
265 |             flow_as_img = flowlib.flow_to_image(output)
266 |             flow_as_img = cv2.cvtColor(flow_as_img, cv2.COLOR_RGB2BGR)
267 |             cv2.imwrite(dest.format("flow_"), flow_as_img)
268 | 
269 |         if task == "semantic":
270 |             colored_semantic_map = utilities.color_semantic(output)
271 |             colored_semantic = cv2.cvtColor(
272 |                 colored_semantic_map.astype(np.uint8), cv2.COLOR_RGB2BGR
273 |             )
274 |             cv2.imwrite(dest.format("semantic_"), colored_semantic)
275 | 
276 |         if task == "motion_mask":
277 |             colored_motion_mask = utilities.color_motion_mask(output)
278 |             blended_image = cv2.addWeighted(
279 |                 colored_motion_mask, 0.9, original_tgt, 0.8, 0.0,
280 |             )
281 |             blended_image = cv2.cvtColor(
282 |                 blended_image.astype(np.uint8), cv2.COLOR_BGR2RGB
283 |             )
284 |             cv2.imwrite(dest.format("moving_objects_"), blended_image)
285 | 
286 |     print("{} outputs have been produced in {} folder".format(index + 1, opts.dest))
287 |     sess.close()
288 |     coordinator.request_stop()
289 |     coordinator.join(threads)
290 | 
291 | 
292 | if __name__ == "__main__":
293 |     tf.app.run()
294 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
  2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """
 17 | Test your network on a specific task
 18 | """
 19 | 
 20 | import argparse
 21 | import tensorflow as tf
 22 | import numpy as np
 23 | import os
 24 | from dataloaders import factory as dataloader_factory
 25 | from dataloaders.general_dataloader import dataloader_parameters
 26 | from testers import factory as tester_factory
 27 | from tensorflow.python.util import deprecation
 28 | from networks import general_network
 29 | from networks import complete_network
 30 | from helpers import utilities
 31 | 
 32 | # disable future warnings and info messages for this demo
 33 | deprecation._PRINT_DEPRECATION_WARNINGS = False
 34 | os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 35 | 
 36 | parser = argparse.ArgumentParser(description="Test your network")
 37 | 
 38 | parser.add_argument(
 39 |     "--task",
 40 |     type=str,
 41 |     default="depth",
 42 |     help="task to test",
 43 |     choices=["depth", "semantic", "flow", "mask"],
 44 | )
 45 | parser.add_argument("--datapath", type=str, help="path to data", required=True)
 46 | parser.add_argument("--ckpt", type=str, help="path to checkpoint", required=True)
 47 | parser.add_argument(
 48 |     "--filenames_file",
 49 |     type=str,
 50 |     help="path to filenames file",
 51 |     default="filenames/eigen_test.txt",
 52 | )
 53 | parser.add_argument("--height", type=int, help="height of resized image", default=192)
 54 | parser.add_argument("--width", type=int, help="width of resized image", default=640)
 55 | parser.add_argument(
 56 |     "--dest", type=str, help="where save artifacts", default="./artifacts"
 57 | )
 58 | parser.add_argument(
 59 |     "--load_only_baseline",
 60 |     action="store_true",
 61 |     help="if set, load only Baseline (CameraNet+DSNet). Otherwise, full OmegaNet will be loaded",
 62 | )
 63 | parser.add_argument(
 64 |     "--cpu", help="the network runs on CPU if enabled", action="store_true"
 65 | )
 66 | parser.add_argument(
 67 |     "--tau",
 68 |     type=float,
 69 |     help="tau threshold in the paper. For motion segmentation at testing time",
 70 |     default=0.5,
 71 | )
 72 | 
 73 | args = parser.parse_args()
 74 | 
 75 | 
 76 | if args.cpu:
 77 |     os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 78 | 
 79 | 
 80 | def configure_parameters():
 81 |     """Prepare configurations for Network, Dataloader and Tester
 82 |         :return network_params: configuration for Network
 83 |         :return dataloader_params: configuration for Dataloader
 84 |         :return testing_params: configuration for Tester
 85 |     """
 86 |     network_params = general_network.network_parameters(
 87 |         height=args.height,
 88 |         width=args.width,
 89 |         load_only_baseline=args.load_only_baseline,
 90 |         tau=args.tau,
 91 |     )
 92 | 
 93 |     dataloader_params = dataloader_parameters(
 94 |         height=args.height, width=args.width, task=args.task
 95 |     )
 96 | 
 97 |     testing_params = tester_factory.tester_parameters(
 98 |         output_path=args.dest,
 99 |         checkpoint_path=args.ckpt,
100 |         width=args.width,
101 |         height=args.height,
102 |         filenames_file=args.filenames_file,
103 |         datapath=args.datapath,
104 |     )
105 | 
106 |     return network_params, dataloader_params, testing_params
107 | 
108 | 
109 | def configure_network(network_params, dataloader_params):
110 |     """Build the Dataloader, then build the Network.
111 |         :param network_params: configuration for Network
112 |         :param dataloader_params: configuration for Dataloader
113 |         :return network: built Network
114 |         :return dataloader: built Dataloader
115 |         :return training_flag: bool placeholder. For Batchnorm
116 | 
117 |     """
118 |     training_flag = tf.placeholder(tf.bool)
119 |     dataloader = dataloader_factory.get_dataloader(args.task)(
120 |         datapath=args.datapath,
121 |         filenames_file=args.filenames_file,
122 |         params=dataloader_params,
123 |     )
124 |     batch = dataloader.get_next_batch()
125 |     network = complete_network.OmegaNet(
126 |         batch, is_training=training_flag, params=network_params
127 |     )
128 | 
129 |     network.build()
130 |     return network, dataloader, training_flag
131 | 
132 | 
133 | def main(_):
134 |     """Create the Dataloader, the Network and the Tester.
135 |         Then, run the Tester.
136 |         :raise ValueError: if model does not exist
137 |     """
138 |     model_exists = utilities.check_model_exists(args.ckpt)
139 |     if not model_exists:
140 |         raise ValueError("Model not found")
141 |     network_params, dataloader_params, testing_params = configure_parameters()
142 |     network, dataloader, training_flag = configure_network(
143 |         network_params, dataloader_params
144 |     )
145 | 
146 |     tester = tester_factory.get_tester(args.task)(testing_params)
147 |     tester.test(network, dataloader, training_flag)
148 | 
149 | 
150 | if __name__ == "__main__":
151 |     tf.app.run()
152 | 


--------------------------------------------------------------------------------
/testers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CVLAB-Unibo/omeganet/7e23372923ee53745ba6bbb0c7921d7bb4eea01a/testers/__init__.py


--------------------------------------------------------------------------------
/testers/error_tester.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from testers import general_tester
17 | 
18 | 
19 | class Tester(general_tester.GeneralTester):
20 |     """Error tester. If selected, it means that
21 |         no valid Tester exist for that dataset/task
22 |         association.
23 |     """
24 | 
25 |     def test(self, network, dataloader):
26 |         """This component has to raise ValueError, because
27 |             that dataset/task association is not admitted.
28 |             :param network: built Network
29 |             :param dataloader: built Dataloader
30 |             :raise ValueError: No testing for task are available for the selected dataset
31 |         """
32 |         raise ValueError("No testing for task are available for the selected dataset")
33 | 


--------------------------------------------------------------------------------
/testers/factory.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """
17 | Factory for testers
18 | """
19 | 
20 | import tensorflow as tf
21 | import numpy as np
22 | from testers import kitti_depth, kitti_flow, kitti_semantic, kitti_mask, error_tester
23 | from collections import namedtuple
24 | 
25 | tester_parameters = namedtuple(
26 |     "tester_parameters",
27 |     "output_path " "checkpoint_path, " "width," "height," "filenames_file," "datapath",
28 | )
29 | 
30 | TESTER_KITTI_FACTORY = {
31 |     "depth": kitti_depth.Tester,
32 |     "flow": kitti_flow.Tester,
33 |     "semantic": kitti_semantic.Tester,
34 |     "mask": kitti_mask.Tester,
35 | }
36 | 
37 | 
38 | def get_tester(task):
39 |     """Select best Tester given a tast and a dataset
40 |         If no Tester is available for that task on
41 |         the selected Dataset (ie, depth for CS), then
42 |         an ErrorTester is returned.
43 |         :param task: task to perform
44 |     """
45 |     assert task in TESTER_KITTI_FACTORY
46 |     return TESTER_KITTI_FACTORY[task]
47 | 


--------------------------------------------------------------------------------
/testers/general_tester.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from abc import ABCMeta, abstractmethod
17 | 
18 | 
19 | class GeneralTester(object):
20 |     """Template class for Testers
21 |     """
22 | 
23 |     __metaclass__ = ABCMeta
24 | 
25 |     def __init__(self, params):
26 |         self.params = params
27 |         with open(params.filenames_file, "r") as f:
28 |             self.samples = f.readlines()
29 |         self.num_test_samples = len(self.samples)
30 | 
31 |     @abstractmethod
32 |     def test(self, network, dataloader, training_flag):
33 |         """Principal method of the class.
34 |             Start artifact generation.
35 |             :param network: neural network to run
36 |             :param dataloader: tf.dataloader that loads images from the file system
37 |             :param training_flag: training flag bool. For Batchnorm
38 |         """
39 |         pass
40 | 


--------------------------------------------------------------------------------
/testers/kitti_depth.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """ Tester for KITTI depth
17 | """
18 | import os
19 | import tensorflow as tf
20 | import cv2
21 | import numpy as np
22 | from testers.general_tester import GeneralTester
23 | from helpers import utilities
24 | from tqdm import tqdm
25 | 
26 | 
27 | class Tester(GeneralTester):
28 |     """KITTI Depth Tester.
29 |         It produces depth artifacts for the KITTI dataset
30 |     """
31 | 
32 |     def prepare(self):
33 |         """Create output folders
34 |         """
35 |         dest = os.path.join(self.params.output_path, "depth")
36 |         utilities.create_dir(dest)
37 | 
38 |     def test(self, network, dataloader, training_flag):
39 |         """Test KITTI depth
40 |             It produces in the params.output_path folder the depth
41 |             artifacts.
42 |             :param network: network to test
43 |             :param dataloader: dataloader for this test
44 |             :param training_flag: training_flag for Batchnorm
45 | 
46 | 
47 |         """
48 | 
49 |         config = tf.ConfigProto(allow_soft_placement=True)
50 |         sess = tf.Session(config=config)
51 | 
52 |         self.prepare()
53 | 
54 |         var_list = network.get_network_params()
55 |         saver = tf.train.Saver(var_list=var_list)
56 | 
57 |         init_op = tf.group(
58 |             tf.global_variables_initializer(), tf.local_variables_initializer()
59 |         )
60 |         sess.run(init_op)
61 | 
62 |         coordinator = tf.train.Coordinator()
63 |         threads = tf.train.start_queue_runners(sess=sess, coord=coordinator)
64 | 
65 |         saver.restore(sess, self.params.checkpoint_path)
66 |         print(" [*] Load model: SUCCESS")
67 | 
68 |         prediction_disp = tf.image.resize_images(
69 |             network.disp, [dataloader.image_h, dataloader.image_w]
70 |         )
71 | 
72 |         print(" [*] Start depth artifacts generation")
73 |         with tqdm(total=self.num_test_samples) as pbar:
74 |             for step in range(self.num_test_samples):
75 |                 ops = [prediction_disp]
76 |                 outputs = sess.run(ops, feed_dict={training_flag: False})
77 |                 name_disp = self.get_name(step)
78 |                 inverse_depth = outputs[0].squeeze()
79 |                 np.save(
80 |                     os.path.join(self.params.output_path, "depth", name_disp + ".npy"),
81 |                     np.array(inverse_depth),
82 |                 )
83 |                 pbar.update(1)
84 | 
85 |         coordinator.request_stop()
86 |         coordinator.join(threads)
87 | 
88 |     def get_name(self, step):
89 |         """Get right file name
90 |             :param step: current step
91 |             :return name: name of artifact, based on step
92 |         """
93 |         name = str(step)
94 |         return name
95 | 


--------------------------------------------------------------------------------
/testers/kitti_flow.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
  2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """ Tester for KITTI optical flow
 17 | """
 18 | 
 19 | from __future__ import division
 20 | import os
 21 | import tensorflow as tf
 22 | from tqdm import tqdm
 23 | import cv2
 24 | import numpy as np
 25 | from testers.general_tester import GeneralTester
 26 | from helpers import utilities
 27 | 
 28 | 
 29 | class Tester(GeneralTester):
 30 |     """Tester for optical flow on KITTI
 31 |     """
 32 | 
 33 |     def prepare(self):
 34 |         """Create output folders
 35 |         """
 36 |         dest = os.path.join(self.params.output_path, "flow")
 37 |         utilities.create_dir(dest)
 38 | 
 39 |     def test(self, network, dataloader, training_flag):
 40 |         """Generate optical
 41 |             It saves optical flow artifacts in the
 42 |             self.params.output_path/flow folder.
 43 |             :param network: network to test
 44 |             :param dataloader: dataloader for this test
 45 |             :param training_flag: training_flag for Batchnorm
 46 |         """
 47 |         config = tf.ConfigProto(allow_soft_placement=True)
 48 |         sess = tf.Session(config=config)
 49 | 
 50 |         self.prepare()
 51 |         var_list = network.get_network_params()
 52 |         saver = tf.train.Saver(var_list=var_list)
 53 | 
 54 |         init_op = tf.group(
 55 |             tf.global_variables_initializer(), tf.local_variables_initializer()
 56 |         )
 57 |         sess.run(init_op)
 58 | 
 59 |         coordinator = tf.train.Coordinator()
 60 |         threads = tf.train.start_queue_runners(sess=sess, coord=coordinator)
 61 | 
 62 |         saver.restore(sess, self.params.checkpoint_path)
 63 |         print(" [*] Load model: SUCCESS")
 64 | 
 65 |         predicted_flow = tf.image.resize_images(
 66 |             network.optical_flow, [dataloader.image_h, dataloader.image_w]
 67 |         )
 68 | 
 69 |         print(" [*] Start optical flow artifacts generation")
 70 |         with tqdm(total=self.num_test_samples) as pbar:
 71 |             for step in range(self.num_test_samples):
 72 |                 ops = [
 73 |                     predicted_flow,
 74 |                     dataloader.image_h,
 75 |                     dataloader.image_w,
 76 |                 ]
 77 | 
 78 |                 outputs = sess.run(ops, feed_dict={training_flag: False})
 79 |                 name = self.get_name(step)
 80 |                 flow = outputs[0].squeeze()
 81 |                 image_h = outputs[1]
 82 |                 image_w = outputs[2]
 83 | 
 84 |                 flow = self.scale_flow(flow, image_h, image_w)
 85 | 
 86 |                 utilities.write_kitti_png_flow(
 87 |                     os.path.join(self.params.output_path, "flow", name + ".png"), flow
 88 |                 )
 89 |                 pbar.update(1)
 90 | 
 91 |         coordinator.request_stop()
 92 |         coordinator.join(threads)
 93 | 
 94 |     def get_name(self, step):
 95 |         """Get right file name
 96 |             :param step: current step
 97 |             :return name: name of artifact, based on step
 98 |         """
 99 |         name = (
100 |             self.samples[step]
101 |             .split(" ")[1]
102 |             .replace("/", "_")
103 |             .replace(".png", "")
104 |             .strip()
105 |         )
106 |         return name
107 | 
108 |     def scale_flow(self, flow, image_h, image_w):
109 |         """Apply the scale factor to the resized optical flow
110 |             :param flow: optional flow. Array with shape (H,W,2)
111 |             :param image_h: height of the original image
112 |             :param image_w: width of the original image
113 |             :return scaled_flow: optical flow rescaled by the scaling factor
114 |         """
115 |         scaling_w = image_w / self.params.width
116 |         scaling_h = image_h / self.params.height
117 |         flow *= np.tile(np.array(scaling_w, scaling_h), (image_h, image_w, 1))
118 |         return flow
119 | 


--------------------------------------------------------------------------------
/testers/kitti_mask.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from __future__ import division
17 | import tensorflow as tf
18 | import os
19 | import cv2
20 | import numpy as np
21 | from testers.general_tester import GeneralTester
22 | from helpers import utilities
23 | from tqdm import tqdm
24 | 
25 | 
26 | class Tester(GeneralTester):
27 |     def prepare(self):
28 |         """Create output folders
29 |         """
30 |         dest = os.path.join(self.params.output_path, "mask")
31 |         utilities.create_dir(dest)
32 | 
33 |     def test(self, network, dataloader, is_training):
34 |         """ Test motion mask
35 |             It saves motion mask artifacts in the self.params.output_path/mask folder.
36 |             :param network: network to test
37 |             :param dataloader: dataloader for this test
38 |             :param is_training: training_flag for Batchnorm
39 |         """
40 |         # SESSION
41 |         config = tf.ConfigProto(allow_soft_placement=True)
42 |         sess = tf.Session(config=config)
43 | 
44 |         self.prepare()
45 |         var_list = network.get_network_params()
46 |         saver = tf.train.Saver(var_list=var_list)
47 | 
48 |         init_op = tf.group(
49 |             tf.global_variables_initializer(), tf.local_variables_initializer()
50 |         )
51 |         sess.run(init_op)
52 | 
53 |         coordinator = tf.train.Coordinator()
54 |         threads = tf.train.start_queue_runners(sess=sess, coord=coordinator)
55 | 
56 |         saver.restore(sess, self.params.checkpoint_path)
57 | 
58 |         print(" [*] Load model: SUCCESS")
59 | 
60 |         segmented_mask = tf.image.resize_images(
61 |             network.motion_mask,
62 |             [dataloader.image_h, dataloader.image_w],
63 |             method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
64 |         )
65 |         with tqdm(total=self.num_test_samples) as pbar:
66 |             for step in range(self.num_test_samples):
67 |                 ops = [segmented_mask]
68 |                 outputs = sess.run(ops, feed_dict={is_training: False})
69 | 
70 |                 name = self.get_name(step)
71 |                 seg_mask = outputs[0].squeeze()
72 | 
73 |                 cv2.imwrite(
74 |                     os.path.join(self.params.output_path, "mask", name + ".png"),
75 |                     (seg_mask * 255.0).astype(np.uint8),
76 |                 )
77 |                 pbar.update(1)
78 | 
79 |         coordinator.request_stop()
80 |         coordinator.join(threads)
81 | 
82 |     def get_name(self, step):
83 |         """Get right file name
84 |             :param step: current step
85 |             :return name: name of artifact, based on step
86 |         """
87 |         name = (
88 |             self.samples[step]
89 |             .split(" ")[1]
90 |             .replace("/", "_")
91 |             .replace(".png", "")
92 |             .strip()
93 |         )
94 |         return name
95 | 


--------------------------------------------------------------------------------
/testers/kitti_semantic.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Fabio Tosi, Filippo Aleotti, Pierluigi Zama Ramirez, Matteo Poggi,
 2 | # Samuele Salti, Luigi Di Stefano, Stefano Mattoccia
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Generate semantic artifacts for KITTI
17 | """
18 | import os
19 | import tensorflow as tf
20 | import cv2
21 | from tqdm import tqdm
22 | from testers import general_tester
23 | from helpers import utilities
24 | 
25 | 
26 | class Tester(general_tester.GeneralTester):
27 |     def prepare(self):
28 |         """Create output folders
29 |         """
30 |         dest = os.path.join(self.params.output_path, "semantic")
31 |         utilities.create_dir(dest)
32 | 
33 |     def test(self, network, dataloader, is_training):
34 |         """Generate semantic artifacts.
35 |             It saves semantic artifacts in the
36 |             self.params.output_path/semantic folder.
37 |             :param network: network to test
38 |             :param dataloader: dataloader for this test
39 |             :param is_training: training_flag for Batchnorm
40 |         """
41 | 
42 |         config = tf.ConfigProto(allow_soft_placement=True)
43 |         sess = tf.Session(config=config)
44 | 
45 |         self.prepare()
46 |         var_list = network.get_network_params()
47 |         saver = tf.train.Saver(var_list=var_list)
48 | 
49 |         init_op = tf.group(
50 |             tf.global_variables_initializer(), tf.local_variables_initializer()
51 |         )
52 |         sess.run(init_op)
53 | 
54 |         coordinator = tf.train.Coordinator()
55 |         threads = tf.train.start_queue_runners(sess=sess, coord=coordinator)
56 | 
57 |         saver.restore(sess, self.params.checkpoint_path)
58 | 
59 |         print(" [*] Load model: SUCCESS")
60 | 
61 |         prediction_semantic = tf.image.resize_images(
62 |             network.semantic_logits, [dataloader.image_h, dataloader.image_w]
63 |         )
64 |         ops = [tf.argmax(prediction_semantic[0], -1)]
65 | 
66 |         with tqdm(total=self.num_test_samples) as pbar:
67 |             for step in range(self.num_test_samples):
68 |                 outputs = sess.run(ops, feed_dict={is_training: False})
69 |                 name = self.get_file_name(step)
70 |                 semantic_map = outputs[0].squeeze()
71 |                 dest = os.path.join(self.params.output_path, "semantic", name + ".png")
72 |                 cv2.imwrite(dest, semantic_map)
73 |                 pbar.update(1)
74 | 
75 |         coordinator.request_stop()
76 |         coordinator.join(threads)
77 | 
78 |     def get_file_name(self, step):
79 |         """ Get name of nth line of test file
80 |             :param step: current step
81 |             :return name: name suited for KITTI (eg 000000_10)
82 |         """
83 |         name = str(step).zfill(6) + "_10"
84 |         return name
85 | 


--------------------------------------------------------------------------------