├── .gitignore
├── README.md
├── assets
    ├── gifs
    │   ├── day.gif
    │   ├── fog.gif
    │   ├── night.gif
    │   └── snow.gif
    └── imgs
    │   ├── albedo_ambient_examples.png
    │   ├── architecture.png
    │   └── cbar.png
├── chebychev
    └── cheb_coef_real_degree6.txt
├── depth_flat_world
    └── depth_flat_world.npz
├── environment.yml
├── example
    ├── gated0_10bit
    │   └── 2019-01-09_08-27-29_00200.png
    ├── gated1_10bit
    │   └── 2019-01-09_08-27-29_00200.png
    └── gated2_10bit
    │   └── 2019-01-09_08-27-29_00200.png
├── scripts
    ├── eval_g2d.sh
    ├── eval_stf.sh
    ├── inference.sh
    ├── train.sh
    └── unzip_data.sh
└── src
    ├── dataset
        ├── __init__.py
        ├── gated2depth.py
        └── gated_dataset.py
    ├── eval.py
    ├── inference.py
    ├── layers.py
    ├── networks
        ├── UNet.py
        ├── __init__.py
        ├── depth
        │   ├── DepthResNet.py
        │   ├── PackNet01.py
        │   ├── PackNetSlim01.py
        │   ├── PackNetSlim01MultiDecoder.py
        │   └── depth_decoder.py
        ├── depth_decoder.py
        ├── layers
        │   ├── packnet
        │   │   └── layers01.py
        │   ├── resnet
        │   │   ├── depth_decoder.py
        │   │   ├── layers.py
        │   │   └── resnet_encoder.py
        │   └── resnet_encoder.py
        ├── layers01.py
        ├── pose
        │   ├── pose_cnn.py
        │   └── pose_decoder.py
        ├── pose_cnn.py
        ├── pose_decoder.py
        ├── resnet_encoder.py
        └── resnet_encoder2.py
    ├── options.py
    ├── splits
        ├── g2d
        │   ├── real_test_day.txt
        │   └── real_test_night.txt
        ├── gated2gated
        │   ├── train_files.txt
        │   └── val_files.txt
        └── stf
        │   ├── test_clear_day.txt
        │   ├── test_clear_night.txt
        │   ├── test_dense_fog_day.txt
        │   ├── test_dense_fog_night.txt
        │   ├── test_light_fog_day.txt
        │   ├── test_light_fog_night.txt
        │   ├── test_snow_day.txt
        │   └── test_snow_night.txt
    ├── test.py
    ├── train.py
    ├── trainer.py
    ├── utils.py
    └── visualize2D.py


/.gitignore:
--------------------------------------------------------------------------------
1 | */**/__pycache__/
2 | src/test.ipynb
3 | .vscode/
4 | weights/initialization/*.pth
5 | weights/final/*.pth
6 | logs/*


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Gated2Gated : Self-Supervised Depth Estimation from Gated Images
  2 | 
  3 | ![architecture](assets/imgs/architecture.png)
  4 | 
  5 | This repository contains code for [Gated2Gated : Self-Supervised Depth Estimation from Gated Images](https://arxiv.org/pdf/2112.02416.pdf). 
  6 | <!--Dataset will also be published soon.-->
  7 | 
  8 | ## Summary
  9 | Gated cameras hold promise as an alternative to scanning LiDAR sensors with high-resolution 3D depth that is robust to back-scatter in fog, snow, and rain. Instead of sequentially scanning a scene and directly recording depth via the photon time-of-flight, as in pulsed LiDAR sensors, gated imagers encode depth in the relative intensity of a handful of gated slices, captured at megapixel resolution. Although existing methods have shown that it is possible to decode high-resolution depth from such measurements, these methods require synchronized and calibrated LiDAR to supervise the gated depth decoder -- prohibiting fast adoption across geographies, training on large unpaired datasets, and exploring alternative applications outside of automotive use cases. In this work, we fill this gap and propose an entirely self-supervised depth estimation method that uses gated intensity profiles and temporal consistency as a training signal. The proposed model is trained end-to-end from gated video sequences, does not require LiDAR or RGB data, and learns to estimate absolute depth values. We take gated slices as input and disentangle the estimation of the scene albedo, depth, and ambient light, which are then used to learn to reconstruct the input slices through a cyclic loss. We rely on temporal consistency between a given frame and neighboring gated slices to estimate depth in regions with shadows and reflections. We experimentally validate that the proposed approach outperforms existing supervised and self-supervised depth estimation methods based on monocular RGB and stereo images, as well as supervised methods based on gated images.
 10 | 
 11 | ## Getting started
 12 | To get started, first clone this repository in your local directory using 
 13 | 
 14 | ```
 15 | https://github.com/princeton-computational-imaging/Gated2Gated
 16 | ```
 17 | For getting all the necessary packages, get the anaconda environment using:
 18 | ```
 19 | conda env create -f environment.yml
 20 | ```
 21 | Activate the environment using
 22 | ```
 23 | conda activate gated2gated
 24 | ```
 25 | 
 26 | Download the Gated2Gated dataset and the models from the [DENSE dataset webpage](https://www.uni-ulm.de/en/in/driveu/projects/dense-datasets).
 27 | 
 28 | Check if you have downloaded all files. Then, you can unzip your downloaded files using:
 29 | ```
 30 | sh scripts/unzip_data.sh <dataset_download_folder> <dataset_destination_folder>
 31 | ```
 32 | 
 33 | After unzipping the files, your directory should look like this:
 34 | ```
 35 | gated2gated
 36 | ├── data
 37 | │   ├── gated0_10bit
 38 | │   ├── gated0_10bit_history_1
 39 | │   ├── gated0_10bit_history_-1
 40 | │   ├── gated0_10bit_history_2
 41 | │   ├── gated0_10bit_history_-2
 42 | │   ├── gated0_10bit_history_3
 43 | │   ├── gated0_10bit_history_-3
 44 | │   ├── gated0_10bit_history_4
 45 | │   ├── gated0_10bit_history_-4
 46 | │   ├── gated0_10bit_history_-5
 47 | │   ├── gated0_10bit_history_-6
 48 | │   ├── gated0_8bit
 49 | │   ├── gated1_10bit
 50 | │   ├── gated1_10bit_history_1
 51 | │   ├── gated1_10bit_history_-1
 52 | │   ├── gated1_10bit_history_2
 53 | │   ├── gated1_10bit_history_-2
 54 | │   ├── gated1_10bit_history_3
 55 | │   ├── gated1_10bit_history_-3
 56 | │   ├── gated1_10bit_history_4
 57 | │   ├── gated1_10bit_history_-4
 58 | │   ├── gated1_10bit_history_-5
 59 | │   ├── gated1_10bit_history_-6
 60 | │   ├── gated1_8bit
 61 | │   ├── gated2_10bit
 62 | │   ├── gated2_10bit_history_1
 63 | │   ├── gated2_10bit_history_-1
 64 | │   ├── gated2_10bit_history_2
 65 | │   ├── gated2_10bit_history_-2
 66 | │   ├── gated2_10bit_history_3
 67 | │   ├── gated2_10bit_history_-3
 68 | │   ├── gated2_10bit_history_4
 69 | │   ├── gated2_10bit_history_-4
 70 | │   ├── gated2_10bit_history_-5
 71 | │   ├── gated2_10bit_history_-6
 72 | │   ├── gated2_8bit
 73 | │   ├── gated_passive_10bit
 74 | │   ├── gated_passive_10bit_history_1
 75 | │   ├── gated_passive_10bit_history_-1
 76 | │   ├── gated_passive_10bit_history_2
 77 | │   ├── gated_passive_10bit_history_-2
 78 | │   ├── gated_passive_10bit_history_3
 79 | │   ├── gated_passive_10bit_history_-3
 80 | │   ├── gated_passive_10bit_history_4
 81 | │   ├── gated_passive_10bit_history_-4
 82 | │   ├── gated_passive_10bit_history_-5
 83 | │   ├── gated_passive_10bit_history_-6
 84 | │   ├── gated_passive_8bit
 85 | │   ├── lidar_hdl64_strongest_filtered_gated
 86 | │   └── lidar_hdl64_strongest_gated
 87 | └── models
 88 |     ├── g2d
 89 |     ├── initialization
 90 |     └── stf
 91 | ```
 92 | 
 93 | ## Quick Example
 94 | Infer depth for single example using:
 95 | ```
 96 | sh scripts/inference.sh
 97 | ```
 98 | ## Training
 99 | Train a model with pre-trained weights from lower resolution using:
100 | 
101 | ```
102 | sh scripts/train.sh
103 | ```
104 | 
105 | ## Evaluation
106 | If you have not trained the models by yourself, make sure that you have downloaded our models into the "models" folder.
107 | 
108 | Evaluation on [Seeing Trough Fog](https://openaccess.thecvf.com/content_CVPR_2020/papers/Bijelic_Seeing_Through_Fog_Without_Seeing_Fog_Deep_Multimodal_Sensor_Fusion_CVPR_2020_paper.pdf) Dataset:
109 | ```
110 | sh scripts/eval_stf.sh
111 | ```
112 | Please notice that we have used filtered LiDAR pointclouds for evaluating on the Seeing Through Fog dataset. These pointclouds are available in our Gated2Gated dataset.
113 | 
114 | Evaluation on [Gated2Depth](https://openaccess.thecvf.com/content_ICCV_2019/papers/Gruber_Gated2Depth_Real-Time_Dense_Lidar_From_Gated_Images_ICCV_2019_paper.pdf) Dataset:
115 | ```
116 | sh scripts/eval_g2d.sh
117 | ```
118 | 
119 | <!-- ## Evaluation
120 | For downloading the final weights, please refer to [here](weights/final/README.md).
121 | 
122 | **TBC** -->
123 | 
124 | <!-- ## Additional Material -->
125 | 
126 | ### Pre-trained Models
127 | Our final model weights for the [Seeing Through Fog](https://openaccess.thecvf.com/content_CVPR_2020/papers/Bijelic_Seeing_Through_Fog_Without_Seeing_Fog_Deep_Multimodal_Sensor_Fusion_CVPR_2020_paper.pdf) and the [Gated2Depth](https://openaccess.thecvf.com/content_ICCV_2019/papers/Gruber_Gated2Depth_Real-Time_Dense_Lidar_From_Gated_Images_ICCV_2019_paper.pdf) dataset are available for download at the [DENSE dataset webpage](https://www.uni-ulm.de/en/in/driveu/projects/dense-datasets).
128 | 
129 | ## Examples
130 | 
131 | ![architecture](assets/imgs/albedo_ambient_examples.png)
132 | ![architecture](assets/imgs/cbar.png)
133 | #### Day
134 | ![architecture](assets/gifs/day.gif)
135 | 
136 | #### Night
137 | ![architecture](assets/gifs/night.gif)
138 | 
139 | #### Fog
140 | ![architecture](assets/gifs/fog.gif)
141 | 
142 | #### Snow
143 | ![architecture](assets/gifs/snow.gif)
144 | 
145 | ## Reference
146 | If you find our work on gated depth estimation useful in your research, please consider citing our paper:
147 | 
148 | ```bib
149 |     @inproceedings{walia2022gated2gated,
150 |       title={Gated2Gated: Self-Supervised Depth Estimation from Gated Images},
151 |       author={Walia, Amanpreet and Walz, Stefanie and Bijelic, Mario and Mannan, Fahim and Julca-Aguilar, Frank and Langer, Michael and Ritter, Werner and Heide, Felix},
152 |       booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
153 |       pages={2811--2821},
154 |       year={2022}
155 |     }
156 | }
157 | ```
158 | 
159 | ## Acknowledgements
160 | 
161 | This code in parts is inspired/borrowed from [monodepth2](https://github.com/nianticlabs/monodepth2) and [packnet-sfm](https://github.com/TRI-ML/packnet-sfm).
162 | 
163 | 


--------------------------------------------------------------------------------
/assets/gifs/day.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/gifs/day.gif


--------------------------------------------------------------------------------
/assets/gifs/fog.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/gifs/fog.gif


--------------------------------------------------------------------------------
/assets/gifs/night.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/gifs/night.gif


--------------------------------------------------------------------------------
/assets/gifs/snow.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/gifs/snow.gif


--------------------------------------------------------------------------------
/assets/imgs/albedo_ambient_examples.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/imgs/albedo_ambient_examples.png


--------------------------------------------------------------------------------
/assets/imgs/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/imgs/architecture.png


--------------------------------------------------------------------------------
/assets/imgs/cbar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/imgs/cbar.png


--------------------------------------------------------------------------------
/chebychev/cheb_coef_real_degree6.txt:
--------------------------------------------------------------------------------
1 | 2.659209412079132562e+03 1.236691633033492735e+03 2.812828548292934716e+00
2 | -2.275481873303552334e+02 -9.717205288136297270e+01 -2.331735007256061643e+00
3 | 4.608997630906832121e+00 2.806590349227340297e+00 1.891523404614634807e-01
4 | -5.283966431194774688e-02 -4.158711021465923835e-02 -5.285076242325659202e-03
5 | 3.429758582007581000e-04 3.129857163806640447e-04 6.333752718504534029e-05
6 | -1.163829913802227196e-06 -1.158968363067281679e-06 -3.229803960331233870e-07
7 | 1.597517278468657243e-09 1.680924118421431858e-09 5.863465070137500970e-10


--------------------------------------------------------------------------------
/depth_flat_world/depth_flat_world.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/depth_flat_world/depth_flat_world.npz


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: gated2gated
  2 | channels:
  3 |   - pytorch
  4 |   - defaults
  5 |   - conda-forge
  6 | dependencies:
  7 |   - _ipyw_jlab_nb_ext_conf=0.1.0=py38_0
  8 |   - _libgcc_mutex=0.1=main
  9 |   - alabaster=0.7.12=pyhd3eb1b0_0
 10 |   - anaconda=2021.05=py38_0
 11 |   - anaconda-client=1.7.2=py38_0
 12 |   - anaconda-navigator=2.0.3=py38_0
 13 |   - anaconda-project=0.9.1=pyhd3eb1b0_1
 14 |   - anyio=2.2.0=py38h06a4308_1
 15 |   - appdirs=1.4.4=py_0
 16 |   - argh=0.26.2=py38_0
 17 |   - argon2-cffi=20.1.0=py38h27cfd23_1
 18 |   - asn1crypto=1.4.0=py_0
 19 |   - astroid=2.5=py38h06a4308_1
 20 |   - astropy=4.2.1=py38h27cfd23_1
 21 |   - async_generator=1.10=pyhd3eb1b0_0
 22 |   - atomicwrites=1.4.0=py_0
 23 |   - attrs=20.3.0=pyhd3eb1b0_0
 24 |   - autopep8=1.5.6=pyhd3eb1b0_0
 25 |   - babel=2.9.0=pyhd3eb1b0_0
 26 |   - backcall=0.2.0=pyhd3eb1b0_0
 27 |   - backports=1.0=pyhd3eb1b0_2
 28 |   - backports.functools_lru_cache=1.6.4=pyhd3eb1b0_0
 29 |   - backports.shutil_get_terminal_size=1.0.0=pyhd3eb1b0_3
 30 |   - backports.tempfile=1.0=pyhd3eb1b0_1
 31 |   - backports.weakref=1.0.post1=py_1
 32 |   - beautifulsoup4=4.9.3=pyha847dfd_0
 33 |   - bitarray=2.1.0=py38h27cfd23_1
 34 |   - bkcharts=0.2=py38_0
 35 |   - black=19.10b0=py_0
 36 |   - blas=1.0=mkl
 37 |   - bleach=3.3.0=pyhd3eb1b0_0
 38 |   - blosc=1.21.0=h8c45485_0
 39 |   - bokeh=2.3.2=py38h06a4308_0
 40 |   - boto=2.49.0=py38_0
 41 |   - bottleneck=1.3.2=py38heb32a55_1
 42 |   - brotlipy=0.7.0=py38h27cfd23_1003
 43 |   - bzip2=1.0.8=h7b6447c_0
 44 |   - c-ares=1.17.1=h27cfd23_0
 45 |   - ca-certificates=2021.4.13=h06a4308_1
 46 |   - cairo=1.16.0=hf32fb01_1
 47 |   - certifi=2020.12.5=py38h06a4308_0
 48 |   - cffi=1.14.5=py38h261ae71_0
 49 |   - chardet=4.0.0=py38h06a4308_1003
 50 |   - click=7.1.2=pyhd3eb1b0_0
 51 |   - cloudpickle=1.6.0=py_0
 52 |   - clyent=1.2.2=py38_1
 53 |   - colorama=0.4.4=pyhd3eb1b0_0
 54 |   - conda=4.11.0=py38h578d9bd_0
 55 |   - conda-build=3.21.4=py38h06a4308_0
 56 |   - conda-content-trust=0.1.1=pyhd3eb1b0_0
 57 |   - conda-env=2.6.0=1
 58 |   - conda-package-handling=1.7.3=py38h27cfd23_1
 59 |   - conda-repo-cli=1.0.4=pyhd3eb1b0_0
 60 |   - conda-token=0.3.0=pyhd3eb1b0_0
 61 |   - conda-verify=3.4.2=py_1
 62 |   - contextlib2=0.6.0.post1=py_0
 63 |   - cryptography=3.4.7=py38hd23ed53_0
 64 |   - cudatoolkit=11.0.221=h6bb024c_0
 65 |   - curl=7.71.1=hbc83047_1
 66 |   - cycler=0.10.0=py38_0
 67 |   - cython=0.29.23=py38h2531618_0
 68 |   - cytoolz=0.11.0=py38h7b6447c_0
 69 |   - dask=2021.4.0=pyhd3eb1b0_0
 70 |   - dask-core=2021.4.0=pyhd3eb1b0_0
 71 |   - dbus=1.13.18=hb2f20db_0
 72 |   - decorator=5.0.6=pyhd3eb1b0_0
 73 |   - defusedxml=0.7.1=pyhd3eb1b0_0
 74 |   - diff-match-patch=20200713=py_0
 75 |   - distributed=2021.4.1=py38h06a4308_0
 76 |   - docutils=0.17.1=py38h06a4308_1
 77 |   - entrypoints=0.3=py38_0
 78 |   - et_xmlfile=1.0.1=py_1001
 79 |   - expat=2.3.0=h2531618_2
 80 |   - fastcache=1.1.0=py38h7b6447c_0
 81 |   - ffmpeg=4.3=hf484d3e_0
 82 |   - filelock=3.0.12=pyhd3eb1b0_1
 83 |   - flake8=3.9.0=pyhd3eb1b0_0
 84 |   - flask=1.1.2=pyhd3eb1b0_0
 85 |   - fontconfig=2.13.1=h6c09931_0
 86 |   - freetype=2.10.4=h5ab3b9f_0
 87 |   - fribidi=1.0.10=h7b6447c_0
 88 |   - fsspec=0.9.0=pyhd3eb1b0_0
 89 |   - future=0.18.2=py38_1
 90 |   - get_terminal_size=1.0.0=haa9412d_0
 91 |   - gevent=21.1.2=py38h27cfd23_1
 92 |   - glib=2.68.1=h36276a3_0
 93 |   - glob2=0.7=pyhd3eb1b0_0
 94 |   - gmp=6.2.1=h2531618_2
 95 |   - gmpy2=2.0.8=py38hd5f6e3b_3
 96 |   - gnutls=3.6.15=he1e5248_0
 97 |   - graphite2=1.3.14=h23475e2_0
 98 |   - greenlet=1.0.0=py38h2531618_2
 99 |   - gst-plugins-base=1.14.0=h8213a91_2
100 |   - gstreamer=1.14.0=h28cd5cc_2
101 |   - h5py=2.10.0=py38h7918eee_0
102 |   - harfbuzz=2.8.0=h6f93f22_0
103 |   - hdf5=1.10.4=hb1b8bf9_0
104 |   - heapdict=1.0.1=py_0
105 |   - html5lib=1.1=py_0
106 |   - icu=58.2=he6710b0_3
107 |   - idna=2.10=pyhd3eb1b0_0
108 |   - imageio=2.9.0=pyhd3eb1b0_0
109 |   - imagesize=1.2.0=pyhd3eb1b0_0
110 |   - importlib_metadata=3.10.0=hd3eb1b0_0
111 |   - iniconfig=1.1.1=pyhd3eb1b0_0
112 |   - intel-openmp=2021.2.0=h06a4308_610
113 |   - intervaltree=3.1.0=py_0
114 |   - ipykernel=5.3.4=py38h5ca1d4c_0
115 |   - ipython=7.22.0=py38hb070fc8_0
116 |   - ipython_genutils=0.2.0=pyhd3eb1b0_1
117 |   - ipywidgets=7.6.3=pyhd3eb1b0_1
118 |   - isort=5.8.0=pyhd3eb1b0_0
119 |   - itsdangerous=1.1.0=pyhd3eb1b0_0
120 |   - jbig=2.1=hdba287a_0
121 |   - jdcal=1.4.1=py_0
122 |   - jedi=0.17.2=py38h06a4308_1
123 |   - jeepney=0.6.0=pyhd3eb1b0_0
124 |   - jinja2=2.11.3=pyhd3eb1b0_0
125 |   - joblib=1.0.1=pyhd3eb1b0_0
126 |   - jpeg=9b=h024ee3a_2
127 |   - json5=0.9.5=py_0
128 |   - jsonschema=3.2.0=py_2
129 |   - jupyter=1.0.0=py38_7
130 |   - jupyter-packaging=0.7.12=pyhd3eb1b0_0
131 |   - jupyter_client=6.1.12=pyhd3eb1b0_0
132 |   - jupyter_console=6.4.0=pyhd3eb1b0_0
133 |   - jupyter_core=4.7.1=py38h06a4308_0
134 |   - jupyter_server=1.4.1=py38h06a4308_0
135 |   - jupyterlab=3.0.14=pyhd3eb1b0_1
136 |   - jupyterlab_pygments=0.1.2=py_0
137 |   - jupyterlab_server=2.4.0=pyhd3eb1b0_0
138 |   - jupyterlab_widgets=1.0.0=pyhd3eb1b0_1
139 |   - keyring=22.3.0=py38h06a4308_0
140 |   - kiwisolver=1.3.1=py38h2531618_0
141 |   - krb5=1.18.2=h173b8e3_0
142 |   - lame=3.100=h7b6447c_0
143 |   - lazy-object-proxy=1.6.0=py38h27cfd23_0
144 |   - lcms2=2.12=h3be6417_0
145 |   - ld_impl_linux-64=2.33.1=h53a641e_7
146 |   - libarchive=3.4.2=h62408e4_0
147 |   - libcurl=7.71.1=h20c2e04_1
148 |   - libedit=3.1.20210216=h27cfd23_1
149 |   - libev=4.33=h7b6447c_0
150 |   - libffi=3.3=he6710b0_2
151 |   - libgcc-ng=9.1.0=hdf63c60_0
152 |   - libgfortran-ng=7.3.0=hdf63c60_0
153 |   - libiconv=1.15=h63c8f33_5
154 |   - libidn2=2.3.2=h7f8727e_0
155 |   - liblief=0.10.1=he6710b0_0
156 |   - libllvm10=10.0.1=hbcb73fb_5
157 |   - libpng=1.6.37=hbc83047_0
158 |   - libsodium=1.0.18=h7b6447c_0
159 |   - libspatialindex=1.9.3=h2531618_0
160 |   - libssh2=1.9.0=h1ba5d50_1
161 |   - libstdcxx-ng=9.1.0=hdf63c60_0
162 |   - libtasn1=4.16.0=h27cfd23_0
163 |   - libtiff=4.2.0=h85742a9_0
164 |   - libtool=2.4.6=h7b6447c_1005
165 |   - libunistring=0.9.10=h27cfd23_0
166 |   - libuuid=1.0.3=h1bed415_2
167 |   - libuv=1.40.0=h7b6447c_0
168 |   - libwebp-base=1.2.0=h27cfd23_0
169 |   - libxcb=1.14=h7b6447c_0
170 |   - libxml2=2.9.10=hb55368b_3
171 |   - libxslt=1.1.34=hc22bd24_0
172 |   - llvmlite=0.36.0=py38h612dafd_4
173 |   - locket=0.2.1=py38h06a4308_1
174 |   - lxml=4.6.3=py38h9120a33_0
175 |   - lz4-c=1.9.3=h2531618_0
176 |   - lzo=2.10=h7b6447c_2
177 |   - markupsafe=1.1.1=py38h7b6447c_0
178 |   - matplotlib=3.3.4=py38h06a4308_0
179 |   - matplotlib-base=3.3.4=py38h62a2d02_0
180 |   - mccabe=0.6.1=py38_1
181 |   - mistune=0.8.4=py38h7b6447c_1000
182 |   - mkl=2021.2.0=h06a4308_296
183 |   - mkl-service=2.3.0=py38h27cfd23_1
184 |   - mkl_fft=1.3.0=py38h42c9631_2
185 |   - mkl_random=1.2.1=py38ha9443f7_2
186 |   - mock=4.0.3=pyhd3eb1b0_0
187 |   - more-itertools=8.7.0=pyhd3eb1b0_0
188 |   - mpc=1.1.0=h10f8cd9_1
189 |   - mpfr=4.0.2=hb69a4c5_1
190 |   - mpmath=1.2.1=py38h06a4308_0
191 |   - msgpack-python=1.0.2=py38hff7bd54_1
192 |   - multipledispatch=0.6.0=py38_0
193 |   - mypy_extensions=0.4.3=py38_0
194 |   - navigator-updater=0.2.1=py38_0
195 |   - nbclassic=0.2.6=pyhd3eb1b0_0
196 |   - nbclient=0.5.3=pyhd3eb1b0_0
197 |   - nbconvert=6.0.7=py38_0
198 |   - nbformat=5.1.3=pyhd3eb1b0_0
199 |   - ncurses=6.2=he6710b0_1
200 |   - nest-asyncio=1.5.1=pyhd3eb1b0_0
201 |   - nettle=3.7.3=hbbd107a_1
202 |   - networkx=2.5=py_0
203 |   - ninja=1.10.2=py38hd09550d_3
204 |   - nltk=3.6.1=pyhd3eb1b0_0
205 |   - nose=1.3.7=pyhd3eb1b0_1006
206 |   - notebook=6.3.0=py38h06a4308_0
207 |   - numba=0.53.1=py38ha9443f7_0
208 |   - numexpr=2.7.3=py38h22e1b3c_1
209 |   - numpy=1.20.1=py38h93e21f0_0
210 |   - numpy-base=1.20.1=py38h7d8b39e_0
211 |   - numpydoc=1.1.0=pyhd3eb1b0_1
212 |   - olefile=0.46=py_0
213 |   - openh264=2.1.0=hd408876_0
214 |   - openpyxl=3.0.7=pyhd3eb1b0_0
215 |   - openssl=1.1.1k=h27cfd23_0
216 |   - packaging=20.9=pyhd3eb1b0_0
217 |   - pandas=1.2.4=py38h2531618_0
218 |   - pandoc=2.12=h06a4308_0
219 |   - pandocfilters=1.4.3=py38h06a4308_1
220 |   - pango=1.45.3=hd140c19_0
221 |   - parso=0.7.0=py_0
222 |   - partd=1.2.0=pyhd3eb1b0_0
223 |   - patchelf=0.12=h2531618_1
224 |   - path=15.1.2=py38h06a4308_0
225 |   - path.py=12.5.0=0
226 |   - pathlib2=2.3.5=py38h06a4308_2
227 |   - pathspec=0.7.0=py_0
228 |   - patsy=0.5.1=py38_0
229 |   - pcre=8.44=he6710b0_0
230 |   - pep8=1.7.1=py38_0
231 |   - pexpect=4.8.0=pyhd3eb1b0_3
232 |   - pickleshare=0.7.5=pyhd3eb1b0_1003
233 |   - pillow=8.2.0=py38he98fc37_0
234 |   - pip=21.0.1=py38h06a4308_0
235 |   - pixman=0.40.0=h7b6447c_0
236 |   - pkginfo=1.7.0=py38h06a4308_0
237 |   - pluggy=0.13.1=py38h06a4308_0
238 |   - ply=3.11=py38_0
239 |   - prometheus_client=0.10.1=pyhd3eb1b0_0
240 |   - prompt-toolkit=3.0.17=pyh06a4308_0
241 |   - prompt_toolkit=3.0.17=hd3eb1b0_0
242 |   - psutil=5.8.0=py38h27cfd23_1
243 |   - ptyprocess=0.7.0=pyhd3eb1b0_2
244 |   - py=1.10.0=pyhd3eb1b0_0
245 |   - py-lief=0.10.1=py38h403a769_0
246 |   - pycodestyle=2.6.0=pyhd3eb1b0_0
247 |   - pycosat=0.6.3=py38h7b6447c_1
248 |   - pycparser=2.20=py_2
249 |   - pycurl=7.43.0.6=py38h1ba5d50_0
250 |   - pydocstyle=6.0.0=pyhd3eb1b0_0
251 |   - pyerfa=1.7.3=py38h27cfd23_0
252 |   - pyflakes=2.2.0=pyhd3eb1b0_0
253 |   - pygments=2.8.1=pyhd3eb1b0_0
254 |   - pylint=2.7.4=py38h06a4308_1
255 |   - pyls-black=0.4.6=hd3eb1b0_0
256 |   - pyls-spyder=0.3.2=pyhd3eb1b0_0
257 |   - pyodbc=4.0.30=py38he6710b0_0
258 |   - pyopenssl=20.0.1=pyhd3eb1b0_1
259 |   - pyparsing=2.4.7=pyhd3eb1b0_0
260 |   - pyqt=5.9.2=py38h05f1152_4
261 |   - pyrsistent=0.17.3=py38h7b6447c_0
262 |   - pysocks=1.7.1=py38h06a4308_0
263 |   - pytables=3.6.1=py38h9fd0a39_0
264 |   - pytest=6.2.3=py38h06a4308_2
265 |   - python=3.8.8=hdb3f193_5
266 |   - python-dateutil=2.8.1=pyhd3eb1b0_0
267 |   - python-jsonrpc-server=0.4.0=py_0
268 |   - python-language-server=0.36.2=pyhd3eb1b0_0
269 |   - python-libarchive-c=2.9=pyhd3eb1b0_1
270 |   - python_abi=3.8=2_cp38
271 |   - pytorch=1.7.1=py3.8_cuda11.0.221_cudnn8.0.5_0
272 |   - pytorch-mutex=1.0=cuda
273 |   - pytz=2021.1=pyhd3eb1b0_0
274 |   - pywavelets=1.1.1=py38h7b6447c_2
275 |   - pyxdg=0.27=pyhd3eb1b0_0
276 |   - pyyaml=5.4.1=py38h27cfd23_1
277 |   - pyzmq=20.0.0=py38h2531618_1
278 |   - qdarkstyle=2.8.1=py_0
279 |   - qt=5.9.7=h5867ecd_1
280 |   - qtawesome=1.0.2=pyhd3eb1b0_0
281 |   - qtconsole=5.0.3=pyhd3eb1b0_0
282 |   - qtpy=1.9.0=py_0
283 |   - readline=8.1=h27cfd23_0
284 |   - regex=2021.4.4=py38h27cfd23_0
285 |   - requests=2.25.1=pyhd3eb1b0_0
286 |   - ripgrep=12.1.1=0
287 |   - rope=0.18.0=py_0
288 |   - rtree=0.9.7=py38h06a4308_1
289 |   - ruamel_yaml=0.15.100=py38h27cfd23_0
290 |   - scikit-image=0.18.1=py38ha9443f7_0
291 |   - scikit-learn=0.24.1=py38ha9443f7_0
292 |   - scipy=1.6.2=py38had2a1c9_1
293 |   - seaborn=0.11.1=pyhd3eb1b0_0
294 |   - secretstorage=3.3.1=py38h06a4308_0
295 |   - send2trash=1.5.0=pyhd3eb1b0_1
296 |   - setuptools=52.0.0=py38h06a4308_0
297 |   - simplegeneric=0.8.1=py38_2
298 |   - singledispatch=3.6.1=pyhd3eb1b0_1001
299 |   - sip=4.19.13=py38he6710b0_0
300 |   - six=1.15.0=py38h06a4308_0
301 |   - sniffio=1.2.0=py38h06a4308_1
302 |   - snowballstemmer=2.1.0=pyhd3eb1b0_0
303 |   - sortedcollections=2.1.0=pyhd3eb1b0_0
304 |   - sortedcontainers=2.3.0=pyhd3eb1b0_0
305 |   - soupsieve=2.2.1=pyhd3eb1b0_0
306 |   - sphinx=4.0.1=pyhd3eb1b0_0
307 |   - sphinxcontrib=1.0=py38_1
308 |   - sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0
309 |   - sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0
310 |   - sphinxcontrib-htmlhelp=1.0.3=pyhd3eb1b0_0
311 |   - sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0
312 |   - sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0
313 |   - sphinxcontrib-serializinghtml=1.1.4=pyhd3eb1b0_0
314 |   - sphinxcontrib-websupport=1.2.4=py_0
315 |   - spyder=4.2.5=py38h06a4308_0
316 |   - spyder-kernels=1.10.2=py38h06a4308_0
317 |   - sqlalchemy=1.4.15=py38h27cfd23_0
318 |   - sqlite=3.35.4=hdfb4753_0
319 |   - statsmodels=0.12.2=py38h27cfd23_0
320 |   - sympy=1.8=py38h06a4308_0
321 |   - tbb=2020.3=hfd86e86_0
322 |   - tblib=1.7.0=py_0
323 |   - terminado=0.9.4=py38h06a4308_0
324 |   - testpath=0.4.4=pyhd3eb1b0_0
325 |   - textdistance=4.2.1=pyhd3eb1b0_0
326 |   - threadpoolctl=2.1.0=pyh5ca1d4c_0
327 |   - three-merge=0.1.1=pyhd3eb1b0_0
328 |   - tifffile=2020.10.1=py38hdd07704_2
329 |   - tk=8.6.10=hbc83047_0
330 |   - toml=0.10.2=pyhd3eb1b0_0
331 |   - toolz=0.11.1=pyhd3eb1b0_0
332 |   - torchaudio=0.7.2=py38
333 |   - torchinfo=1.5.3=pyhd8ed1ab_0
334 |   - torchvision=0.8.2=py38_cu110
335 |   - tornado=6.1=py38h27cfd23_0
336 |   - tqdm=4.59.0=pyhd3eb1b0_1
337 |   - traitlets=5.0.5=pyhd3eb1b0_0
338 |   - typed-ast=1.4.2=py38h27cfd23_1
339 |   - typing_extensions=3.7.4.3=pyha847dfd_0
340 |   - ujson=4.0.2=py38h2531618_0
341 |   - unicodecsv=0.14.1=py38_0
342 |   - unixodbc=2.3.9=h7b6447c_0
343 |   - urllib3=1.26.4=pyhd3eb1b0_0
344 |   - watchdog=1.0.2=py38h06a4308_1
345 |   - wcwidth=0.2.5=py_0
346 |   - webencodings=0.5.1=py38_1
347 |   - werkzeug=1.0.1=pyhd3eb1b0_0
348 |   - wheel=0.36.2=pyhd3eb1b0_0
349 |   - widgetsnbextension=3.5.1=py38_0
350 |   - wrapt=1.12.1=py38h7b6447c_1
351 |   - wurlitzer=2.1.0=py38h06a4308_0
352 |   - xlrd=2.0.1=pyhd3eb1b0_0
353 |   - xlsxwriter=1.3.8=pyhd3eb1b0_0
354 |   - xlwt=1.3.0=py38_0
355 |   - xmltodict=0.12.0=py_0
356 |   - xz=5.2.5=h7b6447c_0
357 |   - yaml=0.2.5=h7b6447c_0
358 |   - yapf=0.31.0=pyhd3eb1b0_0
359 |   - zeromq=4.3.4=h2531618_0
360 |   - zict=2.0.0=pyhd3eb1b0_0
361 |   - zipp=3.4.1=pyhd3eb1b0_0
362 |   - zlib=1.2.11=h7b6447c_3
363 |   - zope=1.0=py38_1
364 |   - zope.event=4.5.0=py38_0
365 |   - zope.interface=5.3.0=py38h27cfd23_0
366 |   - zstd=1.4.5=h9ceee32_0
367 |   - pip:
368 |     - absl-py==1.0.0
369 |     - cachetools==4.2.4
370 |     - google-auth==2.3.3
371 |     - google-auth-oauthlib==0.4.6
372 |     - grpcio==1.42.0
373 |     - importlib-metadata==4.8.2
374 |     - markdown==3.3.6
375 |     - matplotlib2tikz==0.7.6
376 |     - oauthlib==3.1.1
377 |     - opencv-python==4.5.4.58
378 |     - protobuf==3.19.1
379 |     - pyasn1==0.4.8
380 |     - pyasn1-modules==0.2.8
381 |     - requests-oauthlib==1.3.0
382 |     - rsa==4.7.2
383 |     - tensorboard==2.7.0
384 |     - tensorboard-data-server==0.6.1
385 |     - tensorboard-plugin-wit==1.8.0
386 |     - tensorboardx==2.4
387 |     - tikzplotlib==0.9.15
388 | prefix: /home/amanpreet.walia/anaconda3
389 | 


--------------------------------------------------------------------------------
/example/gated0_10bit/2019-01-09_08-27-29_00200.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/example/gated0_10bit/2019-01-09_08-27-29_00200.png


--------------------------------------------------------------------------------
/example/gated1_10bit/2019-01-09_08-27-29_00200.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/example/gated1_10bit/2019-01-09_08-27-29_00200.png


--------------------------------------------------------------------------------
/example/gated2_10bit/2019-01-09_08-27-29_00200.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/example/gated2_10bit/2019-01-09_08-27-29_00200.png


--------------------------------------------------------------------------------
/scripts/eval_g2d.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | daytimes=( "day" "night")
 4 | 
 5 | 
 6 | for daytime in "${daytimes[@]}"
 7 | do
 8 | 
 9 |   echo "daytime: $daytime"
10 |   eval_files="./src/splits/g2d/real_test_${daytime}.txt"
11 |   python src/eval.py \
12 |          --data_dir data \
13 |          --min_depth 0.1 \
14 |          --max_depth 100.0 \
15 |          --height  512 \
16 |          --width   1024 \
17 |          --load_weights_folder models/g2d \
18 |          --results_dir results/g2d \
19 |          --eval_files_path $eval_files \
20 |          --dataset g2d \
21 |          --g2d_crop \
22 |          --gen_figs
23 | 
24 | done


--------------------------------------------------------------------------------
/scripts/eval_stf.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | weathers=( "clear" "light_fog" "dense_fog" "snow" )
 4 | daytimes=( "day" "night")
 5 | 
 6 | 
 7 | for daytime in "${daytimes[@]}"
 8 | do
 9 |   for weather in "${weathers[@]}"
10 |   do
11 |     echo "daytime: $daytime, weather: $weather"
12 |     eval_files="./src/splits/stf/test_${weather}_${daytime}.txt"
13 |     python src/eval.py \
14 |            --data_dir data \
15 |            --min_depth 0.1 \
16 |            --max_depth 100.0 \
17 |            --height  512 \
18 |            --width   1024 \
19 |            --load_weights_folder models/stf \
20 |            --results_dir results/stf \
21 |            --eval_files_path $eval_files \
22 |            --dataset stf \
23 |            --g2d_crop \
24 |            --gen_figs \
25 |            --binned_metrics
26 |   done
27 | done


--------------------------------------------------------------------------------
/scripts/inference.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | python src/inference.py \
 4 |        --data_dir               ./example \
 5 |        --height                 512 \
 6 |        --width                  1024 \
 7 |        --min_depth              0.1 \
 8 |        --max_depth              100.0 \
 9 |        --depth_normalizer       70.0 \
10 |        --results_dir            ./results \
11 |        --weights_dir            ./models/stf


--------------------------------------------------------------------------------
/scripts/train.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | export CUDA_VISIBLE_DEVICES=0
 3 | python src/train.py \
 4 | --data_dir                  ./data \
 5 | --log_dir                   ./logs \
 6 | --coeff_fpath               chebychev \
 7 | --depth_flat_world_fpath    depth_flat_world \
 8 | --model_name                multinetwork \
 9 | --model_type                multinetwork \
10 | --exp_name                  multinetwork \
11 | --models_to_load            depth ambient encoder albedo pose pose_encoder     \
12 | --load_weights_folder       ./models/initialization \
13 | --exp_num                   0 \
14 | --height                    512 \
15 | --width                     1024 \
16 | --num_bits                  10 \
17 | --scales                    0 \
18 | --frame_ids                 0 -1 1 \
19 | --pose_model_type           separate_resnet \
20 | --num_layers                18 \
21 | --weights_init              pretrained \
22 | --pose_model_input          pairs \
23 | --min_depth                 0.1 \
24 | --max_depth                 100.0 \
25 | --dataset                   gated \
26 | --split                     gated2gated \
27 | --batch_size                4 \
28 | --num_workers               4 \
29 | --learning_rate             2e-4 \
30 | --num_epochs                20 \
31 | --scheduler_step_size       15 \
32 | --disparity_smoothness      0.001 \
33 | --log_frequency             200 \
34 | --save_frequency            1 \
35 | --cycle_weight              0.05 \
36 | --depth_normalizer          70.0 \
37 | --passive_weight            0.01 \
38 | --cycle_loss \
39 | --temporal_loss \
40 | --sim_gated \
41 | --v1_multiscale \
42 | --infty_hole_mask   \
43 | --snr_mask \
44 | --intensity_mask \
45 | --passive_supervision \
46 | 


--------------------------------------------------------------------------------
/scripts/unzip_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | download_root=$1
 4 | dst=$2
 5 | 
 6 | files=(
 7 | 	$download_root/gated2gated.z01
 8 | 	$download_root/gated2gated.z02
 9 | 	$download_root/gated2gated.z03
10 | 	$download_root/gated2gated.z04
11 | 	$download_root/gated2gated.z05
12 | 	$download_root/gated2gated.z06
13 | 	$download_root/gated2gated.z07
14 | 	$download_root/gated2gated.z08
15 | 	$download_root/gated2gated.z09
16 | 	$download_root/gated2gated.z10
17 | 	$download_root/gated2gated.z11
18 | 	$download_root/gated2gated.zip
19 | 	)
20 | mkdir -p $dst
21 | all_exists=true
22 | for item in ${files[*]} 
23 | do
24 | 	if [[ ! -f "$item" ]]; then
25 |     		echo "$item is missing"
26 | 		all_exists=false
27 | 	fi
28 | done
29 | 
30 | if $all_exists; then
31 | 	zip -s- $download_root/gated2gated.zip -O $dst/gated2gated_full.zip
32 | 	unzip $dst/gated2gated_full.zip
33 | fi
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/src/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .gated2depth import Gated2DepthDataset
2 | from .gated_dataset import GatedDataset
3 | 


--------------------------------------------------------------------------------
/src/dataset/gated2depth.py:
--------------------------------------------------------------------------------
  1 | from . import gated_dataset
  2 | import numpy as np
  3 | import os
  4 | import cv2
  5 | import random
  6 | import torch
  7 | 
  8 | def read_gt_image(base_dir, img_id, data_type, depth_normalizer = 150.0, min_distance=0.1, max_distance=100.0, scale_images=False,
  9 |                   scaled_img_width=None,
 10 |                   crop_size_h= 104,crop_size_w = 128,
 11 |                   scaled_img_height=None, raw_values_only=False):
 12 |     
 13 |     if data_type == 'real':
 14 |         depth_lidar1 = np.load(os.path.join(base_dir, "depth_hdl64_gated_compressed", img_id + '.npz'))['arr_0']
 15 |         depth_lidar1 = depth_lidar1[crop_size_h:(depth_lidar1.shape[0] - crop_size_h),
 16 |                                     crop_size_w:(depth_lidar1.shape[1] - crop_size_w)]
 17 |         if raw_values_only:
 18 |             return depth_lidar1, None
 19 | 
 20 |         gt_mask = (depth_lidar1 > 0.)
 21 | 
 22 |         depth_lidar1 = np.float32(np.clip(depth_lidar1, min_distance, max_distance) / depth_normalizer)
 23 | 
 24 |         return depth_lidar1, gt_mask
 25 | 
 26 |     img = np.load(os.path.join(base_dir, 'depth_compressed', img_id + '.npz'))['arr_0']
 27 | 
 28 |     if raw_values_only:
 29 |         return img, None
 30 | 
 31 |     img = np.clip(img, min_distance, max_distance) / max_distance
 32 | 
 33 |     if scale_images:
 34 |         img = cv2.resize(img, dsize=(scaled_img_width, scaled_img_height), interpolation=cv2.INTER_AREA)
 35 | 
 36 |     return np.expand_dims(np.expand_dims(img, axis=2), axis=0), None
 37 | 
 38 | def read_gated_image(base_dir, img_id, num_bits=10, data_type='real', 
 39 |                      scale_images=False, scaled_img_width=None,crop_size_h= 104,crop_size_w = 128, scaled_img_height=None):
 40 |     gated_imgs = []
 41 |     normalizer = 2 ** num_bits - 1.
 42 | 
 43 |     for gate_id in range(3):
 44 |         gate_dir = os.path.join(base_dir,'gated%d_10bit' % gate_id)
 45 |         path = os.path.join(gate_dir, img_id + '.png')
 46 |         assert os.path.exists(path),"No such file : %s"%path 
 47 |         img = cv2.imread(os.path.join(gate_dir, img_id + '.png'), cv2.IMREAD_UNCHANGED)
 48 |         if data_type == 'real':
 49 |             img = img[crop_size_h:(img.shape[0] - crop_size_h),
 50 |                       crop_size_w:(img.shape[1] - crop_size_w)]
 51 |             img = img.copy()
 52 |             img[img > 2 ** 10 - 1] = normalizer
 53 |         
 54 |         img = np.float32(img / normalizer)
 55 |         gated_imgs.append(np.expand_dims(img, axis=2))
 56 |     img = np.concatenate(gated_imgs, axis=2)
 57 |     if scale_images:
 58 |         img = cv2.resize(img, dsize=(scaled_img_width, scaled_img_height), interpolation=cv2.INTER_AREA)
 59 |     return img
 60 | 
 61 | class Gated2DepthDataset(gated_dataset.GatedDataset):
 62 |     
 63 |     def __init__(self, gated_dir, filenames,
 64 |                  height, width, num_scales, depth_normalizer = 150.0,
 65 |                  frame_idxs = [0],
 66 |                  is_train=False):
 67 |         super().__init__(gated_dir, filenames, height, width, frame_idxs,
 68 |                          num_scales, is_train=is_train)
 69 |         assert frame_idxs == [0], "Gated2depth dataset has no temporal frames"
 70 |         self.depth_normalizer = depth_normalizer
 71 |         self.load_depth = self.check_depth()
 72 |         self.depth_loader = read_gt_image
 73 |         self.loader = read_gated_image
 74 | 
 75 |     def __getitem__(self, index):
 76 |         
 77 |         inputs = {}
 78 |         do_flip = self.is_train and random.random() > 0.5
 79 | 
 80 |         # line = self.filenames[index].split()
 81 |         line = self.filenames[index].split(',')
 82 |         frame_index = line[0]
 83 |         
 84 |         # there is no temporal data for gated2depth dataset    
 85 |         inputs[("gated", 0, -1)] = self.get_gated(frame_index,do_flip)
 86 |         inputs["depth_gt"] = self.get_depth(frame_index,do_flip)
 87 | 
 88 |         # adjusting intrinsics to match each scale in the pyramid
 89 |         for scale in range(self.num_scales):
 90 |             K = self.K.copy()
 91 | 
 92 |             K[0, :] *= self.width // (2 ** scale)
 93 |             K[1, :] *= self.height // (2 ** scale)
 94 | 
 95 |             inv_K = np.linalg.pinv(K)
 96 | 
 97 |             inputs[("K", scale)] = torch.from_numpy(K)
 98 |             inputs[("inv_K", scale)] = torch.from_numpy(inv_K)
 99 | 
100 |         color_aug = (lambda x: x)
101 |         self.preprocess(inputs, color_aug)
102 | 
103 |         for i in self.frame_idxs:
104 |             del inputs[("gated", i, -1)]
105 |             del inputs[("gated_aug", i, -1)]
106 |            
107 | 
108 |         return inputs 
109 | 
110 |     def preprocess(self, inputs, color_aug):
111 |        
112 |         for k in list(inputs):
113 |             frame = inputs[k]
114 |             if "gated" in k :
115 |                 n, im, i = k
116 |                 for i in range(self.num_scales):
117 |                     # inputs[(n, im, i)] = self.resize[i](inputs[(n, im, i - 1)])
118 |                     s = 2 ** i
119 |                     scaled_img_width, scaled_img_height = self.width // s, self.height // s
120 |                     inputs[(n, im, i)] = cv2.resize(inputs[(n, im, i - 1)], dsize=(scaled_img_width, scaled_img_height), interpolation=cv2.INTER_AREA) 
121 | 
122 |         for k in list(inputs):
123 |             f = inputs[k]
124 |             if "gated" in k:
125 |                 n, im, i = k
126 |                 inputs[(n, im, i)] = self.to_tensor(f)
127 |                 inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f))
128 | 
129 |     def get_depth(self,frame_index,do_flip):
130 |         depth_gt,_ = self.depth_loader(self.root_dir, frame_index, 'real', depth_normalizer=self.depth_normalizer)
131 |         if do_flip:
132 |             depth_gt = np.fliplr(depth_gt).copy()
133 |         return depth_gt
134 |     
135 |     def get_gated(self, frame_index, do_flip):
136 |         gated = self.loader(self.root_dir,frame_index)
137 | 
138 |         if do_flip:
139 |             gated = np.fliplr(gated).copy()
140 | 
141 |         return gated
142 | 
143 |     def check_depth(self):
144 |         return True # Gated2Depth dataset has lidar data


--------------------------------------------------------------------------------
/src/dataset/gated_dataset.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | import random
  5 | import numpy as np
  6 | import copy
  7 | from PIL import Image  # using pillow-simd for increased speed
  8 | 
  9 | import torch
 10 | import torch.utils.data as data
 11 | from torchvision import transforms
 12 | import cv2
 13 | 
 14 | import json
 15 | 
 16 | 
 17 | def passive_loader(base_dir, img_id, crop_size_h, crop_size_w, cent_fnum,
 18 |                     img_ext='png',
 19 |                     num_bits=10, data_type='real',
 20 |                     scale_images=False,
 21 |                     scaled_img_width=None, scaled_img_height=None):
 22 |     normalizer = 2 ** num_bits - 1.
 23 | 
 24 |     if cent_fnum == 0:
 25 |         dir = os.path.join(base_dir, 'gated_passive_10bit')
 26 |     else:
 27 |         dir = os.path.join(base_dir, 'gated_passive_10bit_history_%d' % (cent_fnum))
 28 |     path = os.path.join(dir, img_id + f'.{img_ext}')
 29 |     assert os.path.exists(path), "No such file : %s" % path
 30 |     img = cv2.imread(os.path.join(dir, img_id + f'.{img_ext}'), cv2.IMREAD_UNCHANGED)
 31 |     if data_type == 'real':
 32 |         img = img[crop_size_h:(img.shape[0] - crop_size_h),
 33 |               crop_size_w:(img.shape[1] - crop_size_w)
 34 |               ]
 35 | 
 36 |         img = img.copy()
 37 |         img[img > 2 ** 10 - 1] = normalizer
 38 | 
 39 |     img = np.float32(img / normalizer)
 40 |     if scale_images:
 41 |         img = cv2.resize(img, dsize=(scaled_img_width, scaled_img_height), interpolation=cv2.INTER_AREA)
 42 |     return img
 43 | 
 44 | 
 45 | def gated_loader(base_dir, img_id, crop_size_h, crop_size_w, history=None,
 46 |                  img_ext='png',   
 47 |                  num_bits=10, data_type='real',   
 48 |                  scale_images=False,
 49 |                  scaled_img_width=None, scaled_img_height=None):
 50 |     gated_imgs = []
 51 |     normalizer = 2 ** num_bits - 1.
 52 | 
 53 | 
 54 | 
 55 |     for gate_id in range(3):
 56 |         if history is None:
 57 |             gate_dir = os.path.join(base_dir,'gated%d_10bit' % gate_id)
 58 |         else:
 59 |             gate_dir = os.path.join(base_dir,'gated%d_10bit_history_%d'%(gate_id,history))
 60 |         path = os.path.join(gate_dir, img_id + f'.{img_ext}')
 61 |         assert os.path.exists(path),"No such file : %s"%path 
 62 |         img = cv2.imread(os.path.join(gate_dir, img_id + f'.{img_ext}'), cv2.IMREAD_UNCHANGED)
 63 |         if data_type == 'real':
 64 |             img = img[ crop_size_h:(img.shape[0] - crop_size_h),
 65 |                        crop_size_w:(img.shape[1] - crop_size_w)
 66 |                      ]
 67 |             
 68 |             img = img.copy()
 69 |             img[img > 2 ** 10 - 1] = normalizer
 70 |         
 71 |         img = np.float32(img / normalizer)
 72 |         gated_imgs.append(np.expand_dims(img, axis=2))
 73 |     img = np.concatenate(gated_imgs, axis=2)
 74 |     if scale_images:
 75 |         img = cv2.resize(img, dsize=(scaled_img_width, scaled_img_height), interpolation=cv2.INTER_AREA)
 76 |     return img
 77 | 
 78 | class GatedDataset(data.Dataset):
 79 | 
 80 |     def __init__(self,
 81 |                  gated_dir,
 82 |                  filenames,
 83 |                  height,
 84 |                  width,
 85 |                  frame_idxs,
 86 |                  num_scales,
 87 |                  is_train=False,
 88 |                  img_ext='png',
 89 |                  load_passive = False):
 90 |         super(GatedDataset, self).__init__()
 91 |         
 92 |         self.root_dir = gated_dir
 93 |         self.filenames = filenames
 94 |         self.height = height
 95 |         self.width = width
 96 |         self.num_scales = num_scales
 97 |         self.img_ext = img_ext
 98 | 
 99 |         self.full_res_shape = (1280, 720)
100 |         self.crop_size_h, self.crop_size_w = int((self.full_res_shape[1]-self.height)/2), int((self.full_res_shape[0]-self.width)/2),
101 | 
102 |         self.frame_idxs = frame_idxs
103 | 
104 |         self.is_train = is_train
105 | 
106 |         self.loader = gated_loader
107 |         self.interp = Image.ANTIALIAS
108 |         self.load_passive = load_passive
109 |         if self.load_passive:
110 |             self.passive_loader = passive_loader
111 |             
112 | 
113 |         self.to_tensor = transforms.ToTensor()
114 | 
115 |         self.resize = {}
116 | 
117 |         for i in range(self.num_scales):
118 |             s = 2 ** i
119 |             self.resize[i] = transforms.Resize((self.height // s, self.width // s),
120 |                                                interpolation=self.interp)
121 | 
122 |         self.K = np.array([[1.81,0.0, 0.52, 0.0 ],
123 |                            [0.0, 3.23, 0.36, 0.0 ],
124 |                            [0.0, 0.0, 1.0, 0.0 ],
125 |                            [0.0, 0.0, 0.0, 1.0 ]], dtype=np.float32)
126 | 
127 |         
128 |         self.load_depth = self.check_depth()
129 | 
130 |     def __getitem__(self, index):
131 |         
132 |         inputs = {}
133 |         do_flip = self.is_train and random.random() > 0.5
134 | 
135 |         # line = self.filenames[index].split()
136 |         line = self.filenames[index].split(',')
137 |         frame_index = line[0]
138 |         cent_fnum = int(line[1])
139 | 
140 |         inputs['frame_info'] = "{}-{}".format(frame_index,cent_fnum)
141 | 
142 |         for i in self.frame_idxs:
143 |             history = i + cent_fnum     # Get temporal next or previous frame depending on frame_indx 
144 |             history = None if history == 0 else history             
145 |             inputs[("gated", i, -1)] = self.get_gated(frame_index,history,do_flip)
146 | 
147 | 
148 |         # adjusting intrinsics to match each scale in the pyramid
149 |         for scale in range(self.num_scales):
150 |             K = self.K.copy()
151 | 
152 |             K[0, :] *= self.width // (2 ** scale)
153 |             K[1, :] *= self.height // (2 ** scale)
154 | 
155 |             inv_K = np.linalg.pinv(K)
156 | 
157 |             inputs[("K", scale)] = torch.from_numpy(K)
158 |             inputs[("inv_K", scale)] = torch.from_numpy(inv_K)
159 | 
160 |         gated_aug = (lambda x: x)
161 |         self.preprocess(inputs, gated_aug)
162 | 
163 |         for i in self.frame_idxs:
164 |             del inputs[("gated", i, -1)]
165 |             del inputs[("gated_aug", i, -1)]
166 | 
167 |         if self.load_depth:
168 |             depth_gt = self.get_depth(frame_index, cent_fnum, do_flip)
169 |             inputs["depth_gt"] = torch.from_numpy(depth_gt)
170 | 
171 |         if self.load_passive:
172 |             passive = self.get_passive(frame_index, cent_fnum, do_flip)
173 |             inputs["passive"] = torch.from_numpy(passive)
174 | 
175 |         
176 | 
177 |         return inputs        
178 | 
179 |     def preprocess(self, inputs, gated_aug):
180 |         """
181 |             Resize gated images to the required scales and augment if required
182 | 
183 |             We create the gated_aug object in advance and apply the same augmentation to all
184 |             images in this item. This ensures that all images input to the pose network receive the
185 |             same augmentation.
186 |         """
187 |         for k in list(inputs):
188 |             frame = inputs[k]
189 |             if "gated" in k:
190 |                 n, im, i = k
191 |                 for i in range(self.num_scales):
192 |                     # inputs[(n, im, i)] = self.resize[i](inputs[(n, im, i - 1)])
193 |                     s = 2 ** i
194 |                     scaled_img_width, scaled_img_height = self.width // s, self.height // s
195 |                     inputs[(n, im, i)] = cv2.resize(inputs[(n, im, i - 1)], dsize=(scaled_img_width, scaled_img_height), interpolation=cv2.INTER_AREA) 
196 | 
197 |         for k in list(inputs):
198 |             f = inputs[k]
199 |             if "gated" in k:
200 |                 n, im, i = k
201 |                 inputs[(n, im, i)] = self.to_tensor(f)
202 |                 inputs[(n + "_aug", im, i)] = self.to_tensor(gated_aug(f))
203 | 
204 |     def __len__(self):
205 |         return len(self.filenames)
206 | 
207 |     def get_gated(self, frame_index, history, do_flip):
208 |         gated = self.loader(self.root_dir, frame_index, self.crop_size_h, self.crop_size_w, history=history, img_ext=self.img_ext)
209 |         if do_flip:
210 |             gated = np.fliplr(gated).copy()
211 |         return gated
212 | 
213 |     def get_passive(self, frame_index, cent_fnum, do_flip):
214 |         passive = self.passive_loader(self.root_dir, frame_index, self.crop_size_h, self.crop_size_w, cent_fnum=cent_fnum, img_ext=self.img_ext)
215 |         if do_flip:
216 |             passive = np.fliplr(passive).copy()
217 |         passive = np.expand_dims(passive, 0).astype(np.float32)
218 |         return passive
219 | 
220 |     def get_depth(self, frame_index, cent_fnum, do_flip):
221 |         if cent_fnum ==  0:
222 |             lidar_filename = os.path.join(self.root_dir, 'lidar_hdl64_strongest_filtered_gated', frame_index + '.npz')
223 |             depth_gt = np.load(lidar_filename)['arr_0']
224 |             depth_gt = depth_gt[self.crop_size_h:self.full_res_shape[1] - self.crop_size_h, self.crop_size_w:self.full_res_shape[0] - self.crop_size_w]
225 |         else:
226 |             depth_gt = np.zeros((self.height, self.width))
227 |         
228 |         if do_flip:
229 |             depth_gt = np.fliplr(depth_gt).copy()
230 | 
231 |         depth_gt = np.expand_dims(depth_gt, 0).astype(np.float32)
232 |         return depth_gt
233 | 
234 | 
235 | 
236 |     def check_depth(self):
237 |         sample = self.filenames[0].split(',')[0]
238 |         lidar_filename = os.path.join(self.root_dir, 'lidar_hdl64_strongest_filtered_gated', '{}.npz'.format(sample))
239 |         return os.path.isfile(lidar_filename)
240 | 
241 | 
242 |     
243 |         
244 | 
245 | 


--------------------------------------------------------------------------------
/src/eval.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | import cv2
  5 | import numpy as np
  6 | import matplotlib.cm as cm
  7 | 
  8 | import torch
  9 | from torch.utils.data import DataLoader
 10 | 
 11 | from layers import disp_to_depth
 12 | # from utils import readlines
 13 | # from options import MonodepthOptions
 14 | import networks
 15 | import argparse
 16 | 
 17 | from torchvision.transforms import ToTensor
 18 | 
 19 | gated_transform = ToTensor()
 20 | from tqdm.contrib import tzip
 21 | 
 22 | import matplotlib.pyplot as plt
 23 | 
 24 | import visualize2D
 25 | import math
 26 | import PIL.Image as pil
 27 | 
 28 | 
 29 | cv2.setNumThreads(0)  # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1)
 30 | 
 31 | 
 32 | def read_sample_files(train_samples_files):
 33 |     samples = []
 34 |     with open(train_samples_files, 'r') as f:
 35 |         samples += f.read().splitlines()
 36 |     samples = [sample.replace(',', '_') for sample in samples]
 37 |     return samples
 38 | 
 39 | 
 40 | def threshold(y1, y2, thr=1.25):
 41 |     max_ratio = np.maximum(y1 / y2, y2 / y1)
 42 |     return np.mean(max_ratio < thr, dtype=np.float64) * 100.
 43 | 
 44 | 
 45 | def rmse(y1, y2):
 46 |     diff = y1 - y2
 47 |     return math.sqrt(np.mean(diff * diff, dtype=np.float64))
 48 | 
 49 | 
 50 | def ard(y1, y2):
 51 |     return np.mean(np.abs(y1 - y2) / y2, dtype=np.float64)
 52 | 
 53 | 
 54 | def mae(y1, y2):
 55 |     return np.mean(np.abs(y1 - y2), dtype=np.float64)
 56 | 
 57 | 
 58 | def compute_errors(groundtruth, output, min_distance=3., max_distance=150.):
 59 |     output = output[groundtruth > 0]
 60 |     groundtruth = groundtruth[groundtruth > 0]
 61 |     output = np.clip(output, min_distance, max_distance)
 62 |     groundtruth = np.clip(groundtruth, min_distance, max_distance)
 63 | 
 64 |     return rmse(output, groundtruth), \
 65 |            mae(output, groundtruth), ard(output, groundtruth), \
 66 |            threshold(output, groundtruth, thr=1.25), \
 67 |            threshold(output, groundtruth, thr=1.25 ** 2), threshold(output, groundtruth, thr=1.25 ** 3)
 68 | 
 69 | 
 70 | def calc_bins(clip_min, clip_max, nb_bins):
 71 |     bins = np.linspace(clip_min, clip_max, num=nb_bins + 1)
 72 |     mean_bins = np.array([0.5 * (bins[i + 1] + bins[i]) for i in range(0, nb_bins)])
 73 |     return bins, mean_bins
 74 | 
 75 | 
 76 | def read_img(img_path,
 77 |              num_bits=10,
 78 |              crop_height=512, crop_width=1024, dataset='g2d'):
 79 |     gated_imgs = []
 80 |     normalizer = 2 ** num_bits - 1.
 81 | 
 82 |     for gate_id in range(3):
 83 |         path = img_path.format(gate_id)
 84 |         assert os.path.exists(path), "No such file : %s" % path
 85 |         img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
 86 |         img = img[((img.shape[0] - crop_height) // 2):((img.shape[0] + crop_height) // 2),
 87 |               ((img.shape[1] - crop_width) // 2):((img.shape[1] + crop_width) // 2)]
 88 |         img = img.copy()
 89 |         img[img > 2 ** 10 - 1] = normalizer
 90 |         img = np.float32(img / normalizer)
 91 |         gated_imgs.append(np.expand_dims(img, axis=2))
 92 |     img = np.concatenate(gated_imgs, axis=2)
 93 |     return img
 94 | 
 95 | 
 96 | def evaluate(opt):
 97 |     """Evaluates a pretrained model using a specified test set
 98 |     """
 99 |     MIN_DEPTH = 3.0
100 |     MAX_DEPTH = 80.0
101 | 
102 |     # Load dataset items
103 |     dataset_dir = opt.data_dir
104 |     eval_files_name = os.path.basename(opt.eval_files_path).replace('.txt', '')
105 | 
106 |     val_ids = sorted(read_sample_files(opt.eval_files_path))
107 |     if opt.dataset == 'g2d':
108 |         lidar_paths = [os.path.join(dataset_dir, "depth_hdl64_gated_compressed", "{}.npz".format(_id)) for _id in
109 |                        val_ids]
110 |         gated_paths = [os.path.join(dataset_dir, "gated{}_10bit", "{}.{}".format(_id,opt.img_ext)) for _id in val_ids]
111 |     elif opt.dataset == 'stf':
112 |         lidar_paths = [os.path.join(dataset_dir, "lidar_hdl64_strongest_filtered_gated", "{}.npz".format(_id)) for _id
113 |                        in val_ids]
114 |         gated_paths = [os.path.join(dataset_dir, "gated{}_10bit", "{}.{}".format(_id,opt.img_ext)) for _id in val_ids]
115 | 
116 |     # Load weights
117 |     assert os.path.isdir(opt.load_weights_folder), "Cannot find a folder at {}".format(opt.load_weights_folder)
118 |     print("-> Loading weights from {}".format(opt.load_weights_folder))
119 |     depth_path = os.path.join(opt.load_weights_folder, "depth.pth")
120 |     depth_dict = torch.load(depth_path)
121 | 
122 |     depth_net = networks.PackNetSlim01(dropout=0.5, version="1A")
123 |     model_dict = depth_net.state_dict()
124 |     depth_net.load_state_dict({k: v for k, v in depth_dict.items() if k in model_dict})
125 |     depth_net.cuda()
126 |     depth_net.eval()
127 | 
128 |     print("-> Computing predictions with size {}x{}".format(opt.height, opt.width))
129 |     if opt.g2d_crop:
130 |         g2d_width = 980
131 |         g2d_height = 420
132 |         assert opt.width >= g2d_width and opt.height >= g2d_height, 'Gated2Depth Crop can only be applied for width >= {} and height >= {}'.format(
133 |             g2d_height, g2d_height)
134 |         print("-> Computing metrics for Gated2Depth crop 420x980".format(opt.height, opt.width))
135 | 
136 |     if not os.path.exists(os.path.join(opt.results_dir)):
137 |         os.makedirs(os.path.join(opt.results_dir))
138 | 
139 |     errors = []
140 | 
141 |     if opt.binned_metrics:
142 |         average_points = 15000
143 |         results_counter = 0
144 |         results = np.zeros((average_points * len(lidar_paths), 2), dtype=np.float32)
145 | 
146 |     with torch.no_grad():
147 |         for lidar_path, gated_path in tzip(lidar_paths, gated_paths):
148 | 
149 |             img_id = os.path.basename(gated_path).split('.')[0]
150 | 
151 |             gated_img = read_img(gated_path, crop_height=opt.height, crop_width=opt.width, dataset=opt.dataset)
152 | 
153 |             lidar = np.load(lidar_path)['arr_0']
154 |             gt_depth = lidar[((lidar.shape[0] - opt.height) // 2):((lidar.shape[0] + opt.height) // 2),
155 |                        ((lidar.shape[1] - opt.width) // 2):((lidar.shape[1] + opt.width) // 2)]
156 | 
157 |             input_patch = gated_transform(gated_img).unsqueeze(0).cuda()
158 |             output = depth_net(input_patch)
159 | 
160 |             _, pred_depth = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth)
161 |             pred_depth = pred_depth[0, 0].cpu().numpy() * opt.depth_normalizer
162 | 
163 |             ### Generate graphics for results ###
164 |             if opt.gen_figs:
165 |                 # Making directory for storing results
166 |                 result_dirs = ['gated2gated_imgs', 'all', 'gated2gated']
167 |                 for result_folder in result_dirs:
168 |                     if not os.path.exists(os.path.join(opt.results_dir, result_folder)):
169 |                         os.makedirs(os.path.join(opt.results_dir, result_folder))
170 |                 input_patch = input_patch.permute(0, 2, 3, 1).cpu().numpy()
171 | 
172 |                 # Generate colorized pointcloud from Lidar
173 |                 depth_lidar1_color = visualize2D.colorize_pointcloud(gt_depth, min_distance=MIN_DEPTH,
174 |                                                                      max_distance=MAX_DEPTH, radius=3, cmap=cm.plasma)
175 | 
176 |                 # Generate colorized depth map
177 |                 depth_map_color = visualize2D.colorize_depth(pred_depth, min_distance=MIN_DEPTH, max_distance=MAX_DEPTH,
178 |                                                              cmap=cm.plasma)
179 | 
180 |                 in_out_shape = (int(depth_map_color.shape[0] + depth_map_color.shape[0] / 3. + gt_depth.shape[0]),
181 |                                 depth_map_color.shape[1], 3)
182 | 
183 |                 input_output = np.zeros(shape=in_out_shape)
184 |                 scaled_input = cv2.resize(input_patch[0, :, :, :],
185 |                                           dsize=(int(input_patch.shape[2] / 3), int(input_patch.shape[1] / 3)),
186 |                                           interpolation=cv2.INTER_AREA) * 255
187 | 
188 |                 for i in range(3):
189 |                     input_output[:scaled_input.shape[0], :scaled_input.shape[1], i] = scaled_input[:, :, 0]
190 |                     input_output[:scaled_input.shape[0], scaled_input.shape[1]: 2 * scaled_input.shape[1],
191 |                     i] = scaled_input[:, :, 1]
192 |                     input_output[:scaled_input.shape[0], scaled_input.shape[1] * 2:scaled_input.shape[1] * 3,
193 |                     i] = scaled_input[:, :, 2]
194 | 
195 |                 input_output[scaled_input.shape[0]: scaled_input.shape[0] + depth_map_color.shape[0], :,
196 |                 :] = depth_map_color
197 |                 input_output[scaled_input.shape[0] + depth_map_color.shape[0]:, :, :] = depth_lidar1_color
198 |                 depth_map_color = pil.fromarray(depth_map_color.astype(np.uint8))
199 |                 input_output = pil.fromarray(input_output.astype(np.uint8))
200 |                 depth_map_color.save(os.path.join(opt.results_dir, 'gated2gated_imgs', '{}.jpg'.format(img_id)))
201 |                 input_output.save(os.path.join(opt.results_dir, 'all', '{}.jpg'.format(img_id)))
202 | 
203 |                 np.savez_compressed(os.path.join(opt.results_dir, 'gated2gated', '{}'.format(img_id)), pred_depth)
204 | 
205 |             # check whether groundtruth depthmap contains any lidar point
206 | 
207 | 
208 |             if opt.g2d_crop:
209 |                 gt_depth = gt_depth[((gt_depth.shape[0] - g2d_height) // 2):((gt_depth.shape[0] + g2d_height) // 2),
210 |                            ((gt_depth.shape[1] - g2d_width) // 2):((gt_depth.shape[1] + g2d_width) // 2)]
211 |                 pred_depth = pred_depth[
212 |                              ((pred_depth.shape[0] - g2d_height) // 2):((pred_depth.shape[0] + g2d_height) // 2),
213 |                              ((pred_depth.shape[1] - g2d_width) // 2):((pred_depth.shape[1] + g2d_width) // 2)]
214 | 
215 |             if np.sum(gt_depth > 0.0) > 0.:
216 | 
217 |                 error = compute_errors(gt_depth, pred_depth, min_distance=MIN_DEPTH, max_distance=MAX_DEPTH)
218 |                 errors.append(error)
219 | 
220 |                 if opt.binned_metrics:
221 |                     pred_depth = pred_depth[gt_depth > 0]
222 |                     gt_depth = gt_depth[gt_depth > 0]
223 | 
224 |                     if results_counter + len(gt_depth) > results.shape[0]:
225 |                         print('Overflow')
226 |                         break
227 | 
228 |                     results[results_counter:results_counter + len(gt_depth), 0] = gt_depth
229 |                     results[results_counter:results_counter + len(gt_depth), 1] = pred_depth
230 | 
231 |                     results_counter += len(gt_depth)
232 | 
233 |         # Print and save metrics
234 |         print('### Metrics ###')
235 |         res = np.array(errors).mean(0)
236 |         metric_str = ['rmse', 'mae', 'ard', 'delta1', 'delta2', 'delta3']
237 |         res_str = ''
238 |         for i in range(res.shape[0]):
239 |             res_str += '{}={:.2f} \n'.format(metric_str[i], res[i])
240 |         print(res_str)
241 |         with open(os.path.join(opt.results_dir, '{}_results.txt'.format(eval_files_name)), 'w') as f:
242 |             f.write(res_str)
243 |         with open(os.path.join(opt.results_dir, '{}_results.tex'.format(eval_files_name)), 'w') as f:
244 |             f.write(' & '.join(metric_str) + '\n')
245 |             f.write(' & '.join(['{:.2f}'.format(r) for r in res]))
246 | 
247 |         # Print and save binned metrics
248 |         if opt.binned_metrics:
249 |             print('### Binned Metrics ###')
250 |             results = results[results[:, 0] != 0]
251 | 
252 |             bins = np.linspace(MIN_DEPTH, MAX_DEPTH, num=12)
253 |             inds = np.digitize(results[:, 0], bins)
254 | 
255 |             binned_results = np.zeros((len(bins), 6 + 1))
256 |             for i, bin in enumerate(bins):
257 |                 metrics = compute_errors(results[inds == i + 1, 0], results[inds == i + 1, 1], min_distance=MIN_DEPTH,
258 |                                          max_distance=MAX_DEPTH)
259 |                 binned_results[i, 0] = bin
260 |                 binned_results[i, 1:] = metrics
261 | 
262 |             with open(os.path.join(opt.results_dir, '{}_binned_distance_results.txt'.format(eval_files_name)),
263 |                       'w') as f:
264 |                 np.savetxt(f, binned_results, delimiter=' ')
265 | 
266 |             mean_error_binned = np.zeros((6, 1))
267 |             for i in range(0, 6):
268 |                 mean_error_binned[i] = np.mean(binned_results[~np.isnan(binned_results[:, i + 1]), i + 1])
269 |             res_str = ''
270 |             for i in range(res.shape[0]):
271 |                 res_str += '{}={:.2f} \n'.format(metric_str[i], float(mean_error_binned[i]))
272 |             print(res_str)
273 |             with open(os.path.join(opt.results_dir, '{}_binned_results.txt'.format(eval_files_name)), 'w') as f:
274 |                 f.write(res_str)
275 |             with open(os.path.join(opt.results_dir, '{}_binned_results.tex'.format(eval_files_name)), 'w') as f:
276 |                 f.write(' & '.join(metric_str) + '\n')
277 |                 np.savetxt(f, np.transpose(mean_error_binned), delimiter=' & ', fmt='%.2f')
278 | 
279 | 
280 | if __name__ == "__main__":
281 |     options = argparse.ArgumentParser()
282 |     options.add_argument("--data_dir", required=True,
283 |                          help="Path to the dataset directory")
284 |     options.add_argument("--min_depth", default=0.1,
285 |                          type=float,
286 |                          help="Minimum depth value to evaluate")
287 |     options.add_argument("--max_depth", default=100.0,
288 |                          type=float,
289 |                          help="Max depth value to evaluate")
290 |     options.add_argument("--height", default=512,
291 |                          type=int,
292 |                          help="height of crop for gated image")
293 |     options.add_argument("--width", default=1024,
294 |                          type=int,
295 |                          help="width of crop for gated image")
296 |     options.add_argument("--img_ext", default='png',
297 |                          help="image extension (without .)")
298 |     options.add_argument("--depth_normalizer", default=70.0,
299 |                          type=float,
300 |                          help="depth normalizer to multiply predicted depth with")
301 |     options.add_argument("--load_weights_folder", required=True,
302 |                          help="Path where weights are stored")
303 |     options.add_argument("--results_dir", required=True,
304 |                          help="Path where results are stored")
305 |     options.add_argument("--gen_figs", action='store_true',
306 |                          help="Whether to generate figures or not")
307 |     options.add_argument("--eval_files_path",
308 |                          help="Path to file with validation/evaluation file names.",
309 |                          required=True)
310 |     options.add_argument("--dataset", default='stf',
311 |                          choices=['stf', 'g2d'],
312 |                          help="Which dataset is used for evaluation.")
313 |     options.add_argument('--g2d_crop', help='Use same crop as used for Evaluation in Gated2Depth Paper.',
314 |                          action='store_true', required=False)
315 |     options.add_argument('--binned_metrics', help='Calculate additional binned metrics',
316 |                          action='store_true', required=False)
317 | 
318 |     options = options.parse_args()
319 |     evaluate(options)


--------------------------------------------------------------------------------
/src/inference.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | import visualize2D
  3 | import networks
  4 | 
  5 | import argparse
  6 | import os
  7 | import cv2
  8 | import numpy as np
  9 | import matplotlib.pyplot as plt
 10 | from matplotlib.cm import get_cmap
 11 | import PIL.Image as pil
 12 | import matplotlib.cm as cm
 13 | import numpy as np
 14 | from layers import disp_to_depth
 15 | 
 16 | 
 17 | 
 18 | import torch
 19 | from torchvision import transforms
 20 | to_tensor = transforms.ToTensor()
 21 | 
 22 | cmap_dict = {
 23 |     'jet': cm.jet,
 24 |     'jet_r': cm.jet_r,
 25 |     'plasma': cm.plasma,
 26 |     'plasma_r': cm.plasma_r,
 27 |     'magma': cm.magma,
 28 |     'magma_r': cm.magma_r,
 29 |     'inferno': cm.inferno,
 30 |     'inferno_r': cm.inferno_r
 31 | }
 32 | 
 33 | def read_gated_image(base_dir, img_id, num_bits=10, data_type='real',
 34 |                      scale_images=False, scaled_img_width=None, crop_size_h=104, crop_size_w=128, scaled_img_height=None):
 35 | 
 36 |     gated_imgs = []
 37 |     normalizer = 2 ** num_bits - 1.
 38 | 
 39 |     for gate_id in range(3):
 40 |         gate_dir = os.path.join(base_dir, 'gated%d_10bit' % gate_id)
 41 |         path = os.path.join(gate_dir, img_id + '.png')
 42 |         assert os.path.exists(path), "No such file : %s" % path
 43 |         img = cv2.imread(os.path.join(
 44 |             gate_dir, img_id + '.png'), cv2.IMREAD_UNCHANGED)
 45 |         if data_type == 'real':
 46 |             img = img[crop_size_h:(img.shape[0] - crop_size_h),
 47 |                       crop_size_w:(img.shape[1] - crop_size_w)]
 48 |             img = img.copy()
 49 |             img[img > 2 ** 10 - 1] = normalizer
 50 | 
 51 |         img = np.float32(img / normalizer)
 52 |         gated_imgs.append(np.expand_dims(img, axis=2))
 53 |     img = np.concatenate(gated_imgs, axis=2)
 54 |     if scale_images:
 55 |         img = cv2.resize(img, dsize=(scaled_img_width,
 56 |                          scaled_img_height), interpolation=cv2.INTER_AREA)
 57 |     return img
 58 | 
 59 | 
 60 | def load_weights(model, pretrained_weights_path):
 61 |     model_dict = model.state_dict()
 62 |     assert os.path.isfile(pretrained_weights_path), "{} not found in the location".format(
 63 |         os.path.basename(pretrained_weights_path))
 64 |     pretrained_dict = torch.load(pretrained_weights_path)
 65 |     pretrained_dict = {k: v for k,
 66 |                        v in pretrained_dict.items() if k in model_dict}
 67 |     model_dict.update(pretrained_dict)
 68 |     model.load_state_dict(model_dict)
 69 |     return model
 70 | 
 71 | 
 72 | def save_depth_viz(depthmap, save_path, min_depth, max_depth, colormap):
 73 |     # Generate colorized depth map
 74 |     depth_map_color = visualize2D.colorize_depth(
 75 |         depthmap, min_distance=min_depth, max_distance=max_depth, cmap=colormap)
 76 |     depth_map_color = pil.fromarray(depth_map_color.astype(np.uint8))
 77 |     depth_map_color.save(save_path)
 78 | 
 79 | 
 80 | def inference(options):
 81 | 
 82 |     models = {}
 83 | 
 84 |     models["depth"] = networks.PackNetSlim01(
 85 |         dropout=0.5, version="{}{}".format(1, 'A'))
 86 |     models["depth"].to('cuda')
 87 | 
 88 |     models["encoder"] = networks.Encoder(num_convs=4)
 89 |     models["encoder"].to('cuda')
 90 | 
 91 |     models["albedo"] = networks.Decoder(
 92 |         name="albedo", scales=range(1), out_channels=1)
 93 |     models["albedo"].to('cuda')
 94 | 
 95 |     models["ambient"] = networks.Decoder(
 96 |         name="ambient", scales=range(1), out_channels=1)
 97 |     models["ambient"].to('cuda')
 98 | 
 99 |     # Load model weights
100 |     models["depth"] = load_weights(
101 |         models["depth"], os.path.join(options.weights_dir, "depth.pth"))
102 |     models["encoder"] = load_weights(
103 |         models["encoder"], os.path.join(options.weights_dir, "encoder.pth"))
104 |     models["albedo"] = load_weights(
105 |         models["albedo"], os.path.join(options.weights_dir, "albedo.pth"))
106 |     models["ambient"] = load_weights(
107 |         models["ambient"], os.path.join(options.weights_dir, "ambient.pth"))
108 | 
109 |     # Eval Mode
110 |     for model in models.values():
111 |         model.eval()
112 | 
113 |     results_dirs = ["depth", "ambient", "albedo"]
114 |     for _dir in results_dirs:
115 |         os.makedirs(os.path.join(options.results_dir, _dir), exist_ok=True)
116 | 
117 |     imgs_names = [sample for sample in os.listdir(os.path.join(options.data_dir, "gated0_10bit")) if '.png' in sample]
118 |     img_ids = list(map(lambda x: x.split('.')[0], imgs_names))
119 | 
120 |     with torch.no_grad():
121 |         for img_id in img_ids:
122 |             gated_img = to_tensor(read_gated_image(
123 |                 options.data_dir, img_id)).unsqueeze(0).to('cuda')
124 | 
125 |             # Getting depth
126 |             disp = models['depth'](gated_img)[('disp', 0)]
127 |             _, pred_depth = disp_to_depth(
128 |                 disp, options.min_depth, options.max_depth)
129 |             pred_depth = pred_depth[0, 0].cpu(
130 |             ).numpy() * options.depth_normalizer
131 |             pred_depth = np.clip(pred_depth, 0.0, options.clip_depth)
132 |             np.savez(os.path.join(options.results_dir, "depth",
133 |                      "{}.npz".format(img_id)), pred_depth)
134 |             save_depth_viz(pred_depth,os.path.join(options.results_dir, "depth",
135 |                      "{}.png".format(img_id)), 0.0, options.clip_depth,
136 |                            cmap_dict["inferno_r"])
137 | 
138 |             feats = models['encoder'](gated_img)
139 | 
140 |             # Getting ambient
141 |             _ambient = models['ambient'](feats)[('ambient', 0)]
142 |             ambient = _ambient[0, 0].cpu().numpy()
143 |             ambient = np.clip(ambient, 0.0, 1.0) * 255.
144 |             ambient = pil.fromarray(ambient.astype(np.uint8))
145 |             ambient.save(os.path.join(options.results_dir, "ambient",
146 |                      "{}.png".format(img_id)))
147 | 
148 |             # Getting albedo
149 |             _albedo = models['albedo'](feats)[('albedo', 0)]
150 |             albedo = _albedo[0, 0].cpu().numpy()
151 |             albedo = np.clip(albedo, 0.0, 1.0) * 255.
152 |             albedo = pil.fromarray(albedo.astype(np.uint8))
153 |             albedo.save(os.path.join(options.results_dir, "albedo",
154 |                                       "{}.png".format(img_id)))
155 | 
156 | 
157 | if __name__ == "__main__":
158 |     options = argparse.ArgumentParser()
159 |     options.add_argument("--data_dir", required=True,
160 |                          help="Path to the dataset directory")
161 |     options.add_argument("--min_depth", default=0.1,
162 |                          type=float,
163 |                          help="Minimum depth value to evaluate")
164 |     options.add_argument("--max_depth", default=100.0,
165 |                          type=float,
166 |                          help="Max depth value to evaluate")
167 |     options.add_argument("--clip_depth", default=80.0,
168 |                          type=float,
169 |                          help="clip depth to this value")
170 |     options.add_argument("--height", default=512,
171 |                          type=int,
172 |                          help="height of crop for gated image")
173 |     options.add_argument("--width", default=1024,
174 |                          type=int,
175 |                          help="width of crop for gated image")
176 |     options.add_argument("--depth_normalizer", default=70.0,
177 |                          type=float,
178 |                          help="depth normalizer to multiply predicted depth with")
179 |     options.add_argument("--weights_dir", required=True,
180 |                          help="Path where weights are stored")
181 |     options.add_argument("--results_dir", required=True,
182 |                          help="Path where results are stored")
183 |     options.add_argument("--cmap", default='inferno_r',
184 |                          choices=['jet', 'jet_r', 'plasma', 'plasma_r',
185 |                                   'magma', 'magma_r', 'inferno', 'inferno_r'],
186 |                          help="Which colormap to use for generating results")
187 | 
188 |     options = options.parse_args()
189 |     inference(options)
190 | 


--------------------------------------------------------------------------------
/src/layers.py:
--------------------------------------------------------------------------------
  1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
  2 | #
  3 | # This software is licensed under the terms of the Monodepth2 licence
  4 | # which allows for non-commercial use only, the full terms of which are made
  5 | # available in the LICENSE file.
  6 | 
  7 | from __future__ import absolute_import, division, print_function
  8 | 
  9 | import os
 10 | import numpy as np
 11 | 
 12 | 
 13 | import torch
 14 | import torch.nn as nn
 15 | import torch.nn.functional as F
 16 | 
 17 | 
 18 | def disp_to_depth(disp, min_depth, max_depth):
 19 |     """Convert network's sigmoid output into depth prediction
 20 |     The formula for this conversion is given in the 'additional considerations'
 21 |     section of the paper.
 22 |     """
 23 |     min_disp = 1 / max_depth
 24 |     max_disp = 1 / min_depth
 25 |     scaled_disp = min_disp + (max_disp - min_disp) * disp
 26 |     depth = 1 / scaled_disp
 27 |     return scaled_disp, depth
 28 | 
 29 | 
 30 | def transformation_from_parameters(axisangle, translation, invert=False):
 31 |     """Convert the network's (axisangle, translation) output into a 4x4 matrix
 32 |     """
 33 |     R = rot_from_axisangle(axisangle)
 34 |     t = translation.clone()
 35 | 
 36 |     if invert:
 37 |         R = R.transpose(1, 2)
 38 |         t *= -1
 39 | 
 40 |     T = get_translation_matrix(t)
 41 | 
 42 |     if invert:
 43 |         M = torch.matmul(R, T)
 44 |     else:
 45 |         M = torch.matmul(T, R)
 46 | 
 47 |     return M
 48 | 
 49 | 
 50 | def get_translation_matrix(translation_vector):
 51 |     """Convert a translation vector into a 4x4 transformation matrix
 52 |     """
 53 |     T = torch.zeros(translation_vector.shape[0], 4, 4).to(device=translation_vector.device)
 54 | 
 55 |     t = translation_vector.contiguous().view(-1, 3, 1)
 56 | 
 57 |     T[:, 0, 0] = 1
 58 |     T[:, 1, 1] = 1
 59 |     T[:, 2, 2] = 1
 60 |     T[:, 3, 3] = 1
 61 |     T[:, :3, 3, None] = t
 62 | 
 63 |     return T
 64 | 
 65 | 
 66 | def rot_from_axisangle(vec):
 67 |     """Convert an axisangle rotation into a 4x4 transformation matrix
 68 |     (adapted from https://github.com/Wallacoloo/printipi)
 69 |     Input 'vec' has to be Bx1x3
 70 |     """
 71 |     angle = torch.norm(vec, 2, 2, True)
 72 |     axis = vec / (angle + 1e-7)
 73 | 
 74 |     ca = torch.cos(angle)
 75 |     sa = torch.sin(angle)
 76 |     C = 1 - ca
 77 | 
 78 |     x = axis[..., 0].unsqueeze(1)
 79 |     y = axis[..., 1].unsqueeze(1)
 80 |     z = axis[..., 2].unsqueeze(1)
 81 | 
 82 |     xs = x * sa
 83 |     ys = y * sa
 84 |     zs = z * sa
 85 |     xC = x * C
 86 |     yC = y * C
 87 |     zC = z * C
 88 |     xyC = x * yC
 89 |     yzC = y * zC
 90 |     zxC = z * xC
 91 | 
 92 |     rot = torch.zeros((vec.shape[0], 4, 4)).to(device=vec.device)
 93 | 
 94 |     rot[:, 0, 0] = torch.squeeze(x * xC + ca)
 95 |     rot[:, 0, 1] = torch.squeeze(xyC - zs)
 96 |     rot[:, 0, 2] = torch.squeeze(zxC + ys)
 97 |     rot[:, 1, 0] = torch.squeeze(xyC + zs)
 98 |     rot[:, 1, 1] = torch.squeeze(y * yC + ca)
 99 |     rot[:, 1, 2] = torch.squeeze(yzC - xs)
100 |     rot[:, 2, 0] = torch.squeeze(zxC - ys)
101 |     rot[:, 2, 1] = torch.squeeze(yzC + xs)
102 |     rot[:, 2, 2] = torch.squeeze(z * zC + ca)
103 |     rot[:, 3, 3] = 1
104 | 
105 |     return rot
106 | 
107 | 
108 | class ConvBlock(nn.Module):
109 |     """Layer to perform a convolution followed by ELU
110 |     """
111 |     def __init__(self, in_channels, out_channels):
112 |         super(ConvBlock, self).__init__()
113 | 
114 |         self.conv = Conv3x3(in_channels, out_channels)
115 |         self.nonlin = nn.ELU(inplace=True)
116 | 
117 |     def forward(self, x):
118 |         out = self.conv(x)
119 |         out = self.nonlin(out)
120 |         return out
121 | 
122 | 
123 | class Conv3x3(nn.Module):
124 |     """Layer to pad and convolve input
125 |     """
126 |     def __init__(self, in_channels, out_channels, use_refl=True):
127 |         super(Conv3x3, self).__init__()
128 | 
129 |         if use_refl:
130 |             self.pad = nn.ReflectionPad2d(1)
131 |         else:
132 |             self.pad = nn.ZeroPad2d(1)
133 |         self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3)
134 | 
135 |     def forward(self, x):
136 |         out = self.pad(x)
137 |         out = self.conv(out)
138 |         return out
139 | 
140 | 
141 | class BackprojectDepth(nn.Module):
142 |     """Layer to transform a depth image into a point cloud
143 |     """
144 |     def __init__(self, batch_size, height, width):
145 |         super(BackprojectDepth, self).__init__()
146 | 
147 |         self.batch_size = batch_size
148 |         self.height = height
149 |         self.width = width
150 | 
151 |         meshgrid = np.meshgrid(range(self.width), range(self.height), indexing='xy')
152 |         self.id_coords = np.stack(meshgrid, axis=0).astype(np.float32)
153 |         self.id_coords = nn.Parameter(torch.from_numpy(self.id_coords),
154 |                                       requires_grad=False)
155 | 
156 |         self.ones = nn.Parameter(torch.ones(self.batch_size, 1, self.height * self.width),
157 |                                  requires_grad=False)
158 | 
159 |         self.pix_coords = torch.unsqueeze(torch.stack(
160 |             [self.id_coords[0].view(-1), self.id_coords[1].view(-1)], 0), 0)
161 |         self.pix_coords = self.pix_coords.repeat(batch_size, 1, 1)
162 |         self.pix_coords = nn.Parameter(torch.cat([self.pix_coords, self.ones], 1),
163 |                                        requires_grad=False)
164 | 
165 |     def forward(self, depth, inv_K):
166 |         cam_points = torch.matmul(inv_K[:, :3, :3], self.pix_coords)
167 |         cam_points = depth.view(self.batch_size, 1, -1) * cam_points
168 |         cam_points = torch.cat([cam_points, self.ones], 1)
169 | 
170 |         return cam_points
171 | 
172 | 
173 | class Project3D(nn.Module):
174 |     """Layer which projects 3D points into a camera with intrinsics K and at position T
175 |     """
176 |     def __init__(self, batch_size, height, width, eps=1e-7):
177 |         super(Project3D, self).__init__()
178 | 
179 |         self.batch_size = batch_size
180 |         self.height = height
181 |         self.width = width
182 |         self.eps = eps
183 | 
184 |     def forward(self, points, K, T):
185 |         P = torch.matmul(K, T)[:, :3, :]
186 | 
187 |         cam_points = torch.matmul(P, points)
188 | 
189 |         pix_coords = cam_points[:, :2, :] / (cam_points[:, 2, :].unsqueeze(1) + self.eps)
190 |         pix_coords = pix_coords.view(self.batch_size, 2, self.height, self.width)
191 |         pix_coords = pix_coords.permute(0, 2, 3, 1)
192 |         pix_coords[..., 0] /= self.width - 1
193 |         pix_coords[..., 1] /= self.height - 1
194 |         pix_coords = (pix_coords - 0.5) * 2
195 |         return pix_coords
196 | 
197 | 
198 | def upsample(x):
199 |     """Upsample input tensor by a factor of 2
200 |     """
201 |     return F.interpolate(x, scale_factor=2, mode="nearest")
202 | 
203 | 
204 | def get_smooth_loss(disp, img):
205 |     """Computes the smoothness loss for a disparity image
206 |     The color image is used for edge-aware smoothness
207 |     """
208 |     grad_disp_x = torch.abs(disp[:, :, :, :-1] - disp[:, :, :, 1:])
209 |     grad_disp_y = torch.abs(disp[:, :, :-1, :] - disp[:, :, 1:, :])
210 | 
211 |     grad_img_x = torch.mean(torch.abs(img[:, :, :, :-1] - img[:, :, :, 1:]), 1, keepdim=True)
212 |     grad_img_y = torch.mean(torch.abs(img[:, :, :-1, :] - img[:, :, 1:, :]), 1, keepdim=True)
213 | 
214 |     grad_disp_x *= torch.exp(-grad_img_x)
215 |     grad_disp_y *= torch.exp(-grad_img_y)
216 | 
217 |     return grad_disp_x.mean() + grad_disp_y.mean()
218 | 
219 | 
220 | class SSIM(nn.Module):
221 |     """Layer to compute the SSIM loss between a pair of images
222 |     """
223 |     def __init__(self):
224 |         super(SSIM, self).__init__()
225 |         self.mu_x_pool   = nn.AvgPool2d(3, 1)
226 |         self.mu_y_pool   = nn.AvgPool2d(3, 1)
227 |         self.sig_x_pool  = nn.AvgPool2d(3, 1)
228 |         self.sig_y_pool  = nn.AvgPool2d(3, 1)
229 |         self.sig_xy_pool = nn.AvgPool2d(3, 1)
230 | 
231 |         self.refl = nn.ReflectionPad2d(1)
232 | 
233 |         self.C1 = 0.01 ** 2
234 |         self.C2 = 0.03 ** 2
235 | 
236 |     def forward(self, x, y):
237 |         x = self.refl(x)
238 |         y = self.refl(y)
239 | 
240 |         mu_x = self.mu_x_pool(x)
241 |         mu_y = self.mu_y_pool(y)
242 | 
243 |         sigma_x  = self.sig_x_pool(x ** 2) - mu_x ** 2
244 |         sigma_y  = self.sig_y_pool(y ** 2) - mu_y ** 2
245 |         sigma_xy = self.sig_xy_pool(x * y) - mu_x * mu_y
246 | 
247 |         SSIM_n = (2 * mu_x * mu_y + self.C1) * (2 * sigma_xy + self.C2)
248 |         SSIM_d = (mu_x ** 2 + mu_y ** 2 + self.C1) * (sigma_x + sigma_y + self.C2)
249 | 
250 |         return torch.clamp((1 - SSIM_n / SSIM_d) / 2, 0, 1)
251 | 
252 | 
253 | def compute_depth_errors(gt, pred):
254 |     """Computation of error metrics between predicted and ground truth depths
255 |     """
256 |     thresh = torch.max((gt / pred), (pred / gt))
257 |     a1 = (thresh < 1.25     ).float().mean()
258 |     a2 = (thresh < 1.25 ** 2).float().mean()
259 |     a3 = (thresh < 1.25 ** 3).float().mean()
260 | 
261 |     rmse = (gt - pred) ** 2
262 |     rmse = torch.sqrt(rmse.mean())
263 | 
264 |     rmse_log = (torch.log(gt) - torch.log(pred)) ** 2
265 |     rmse_log = torch.sqrt(rmse_log.mean())
266 | 
267 |     abs_rel = torch.mean(torch.abs(gt - pred) / gt)
268 |     abs = torch.mean(torch.abs(gt - pred))
269 | 
270 |     sq_rel = torch.mean((gt - pred) ** 2 / gt)
271 | 
272 |     return abs, abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3
273 | 
274 | class SimulateGated(nn.Module):
275 |     
276 |     def __init__(self, cheb_path, dark_levels, depth_normalizer = 150.0, num_bits = 10, min_depth = 0.1, max_depth = 100.0) -> None:
277 |         super(SimulateGated,self).__init__()
278 |         
279 |         self.cheb_path = cheb_path
280 |         self.depth_normalizer = depth_normalizer
281 |         self.intensity_normalizer = 2.**num_bits -1.
282 |         self.min_depth = min_depth
283 |         self.max_depth = max_depth
284 |         self.c = nn.Parameter(torch.Tensor(np.loadtxt(os.path.join(cheb_path, 'cheb_coef_real_degree6.txt')).reshape(7,1,3,1,1)),
285 |                               requires_grad=False)
286 |         self.dark_levels = nn.Parameter(torch.Tensor(dark_levels),requires_grad=False)
287 | 
288 | 
289 |     def chebval(self, x):
290 |         """
291 |         This function implements chebyschev polynomial of first kind on 2D data.
292 |             x : tensor of dimension B x 1 x H x W
293 |         """       
294 |         t0 = torch.ones_like(x)
295 |         t1 = x
296 |         t2 = 2 * x ** 2 - 1
297 |         t3 = 4 * x ** 3 - 3 * x
298 |         t4 = 8 * x ** 4 - 8 * x ** 2 + 1
299 |         t5 = 16 * x ** 5 - 20 * x ** 3 + 5 * x
300 |         t6 = 32 * x ** 6 - 48 * x ** 4 + 18 * x ** 2 - 1
301 |         return self.c[0] * t0 + self.c[1] * t1 + self.c[2] * t2 + self.c[3] * t3 + self.c[4] * t4 + self.c[5] * t5 + self.c[6] * t6
302 | 
303 |     def forward(self, depth, albedo, ambient):
304 |         
305 |         # depth = torch.clamp(depth * self.depth_normalizer, self.min_depth, self.max_depth)
306 |         
307 |         albedo = torch.clamp(albedo, 0., 1.)
308 |         sim_gated = albedo * self.chebval(depth) * 2 + self.dark_levels
309 |         sim_gated = torch.clamp(sim_gated , 0., self.intensity_normalizer)/self.intensity_normalizer + ambient
310 |         sim_gated = torch.clamp(sim_gated, 0., 1.)
311 |         return sim_gated,depth,albedo


--------------------------------------------------------------------------------
/src/networks/UNet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from torch.nn.modules.batchnorm import BatchNorm2d
  8 | import torchvision.models as models
  9 | import torch.utils.model_zoo as model_zoo
 10 | from collections import OrderedDict
 11 | from layers import Conv3x3
 12 | import torch.nn.functional as F
 13 | import math
 14 | 
 15 | 
 16 | class Conv1x1(nn.Module):
 17 |     """Layer to pad and convolve input
 18 |     """
 19 |     def __init__(self, in_channels, out_channels):
 20 |         super(Conv1x1, self).__init__()
 21 |         self.conv = nn.Conv2d(int(in_channels), int(out_channels), 1)
 22 | 
 23 |     def forward(self, x):
 24 |         out = self.conv(x)
 25 |         return out
 26 | 
 27 | class ConvBlock(nn.Module):
 28 |   def __init__(self,in_channels, out_channels):
 29 |     super(ConvBlock,self).__init__()
 30 |     self.conv1 = Conv3x3(in_channels,out_channels)
 31 |     self.conv2 = Conv3x3(out_channels,out_channels)
 32 |     self.nonlin = nn.LeakyReLU(negative_slope=0.2, inplace=True)
 33 |     self.batchnorm1 = nn.BatchNorm2d(out_channels)
 34 |     self.batchnorm2 = nn.BatchNorm2d(out_channels)
 35 |   
 36 |   def forward(self,x):
 37 |     out = self.conv1(x)
 38 |     out = self.nonlin(out)
 39 |     out = self.batchnorm1(out)
 40 |     
 41 |     out = self.conv2(out)
 42 |     out = self.nonlin(out)
 43 |     out = self.batchnorm2(out)
 44 |     
 45 |     return out
 46 | 
 47 | class MultiChannelG2D(nn.Module):
 48 |     def __init__(self, num_convs = 4, scales = range(1), out_channels = 1,
 49 |                  init_channels = 32, height = 512, width = 1024,
 50 |                  use_depth = True):
 51 |         super(MultiChannelG2D,self).__init__()
 52 | 
 53 |         self.scales = scales
 54 |         self.height = height
 55 |         self.width  = width
 56 | 
 57 |         self.encoder = Encoder(num_convs, init_channels)
 58 |         self.use_depth = use_depth # Whether to use UNet for depth output or not
 59 |         
 60 |         if self.use_depth:
 61 |             self.depth_decoder = Decoder(scales, num_convs, init_channels, out_channels, 'depth')
 62 |         
 63 |         self.albedo_decoder = Decoder(scales, num_convs, init_channels, out_channels, 'albedo')
 64 |         self.ambient_decoder = Decoder(scales, num_convs, init_channels, out_channels, 'ambient')
 65 | 
 66 |     
 67 |     def forward(self,x):
 68 |         output = {}
 69 |          
 70 |         enc_feats = self.encoder(x)
 71 |         if self.use_depth:
 72 |             output.update(self.depth_decoder(enc_feats))
 73 |         output.update(self.albedo_decoder(enc_feats))
 74 |         output.update(self.ambient_decoder(enc_feats))
 75 | 
 76 |         # for scale in self.scales:
 77 |         #     if self.use_depth:
 78 |         #         output[('depth',scale)] = F.interpolate(output[('depth',scale)], [self.height, self.width], mode="bilinear", align_corners=False)
 79 |             
 80 |         #     output[('albedo',scale)] = F.interpolate(output[('albedo',scale)], [self.height, self.width], mode="bilinear", align_corners=False)
 81 |         #     output[('ambient',scale)] = F.interpolate(output[('ambient',scale)], [self.height, self.width], mode="bilinear", align_corners=False)
 82 |         return output
 83 | 
 84 | 
 85 | class Encoder(nn.Module):
 86 | 
 87 |     def __init__(self, num_convs = 4, init_channels=32):
 88 |         """[UNet Encoder for gated images]
 89 | 
 90 |         Args:
 91 |             num_convs (int, optional): [number of up/down levels]. Defaults to 4.
 92 |             init_channels (int, optional): [initial number of encoding channels]. Defaults to 32.
 93 |         """
 94 |         super(Encoder, self).__init__()
 95 |         self.channels = [init_channels*2**(i) for i in range(0,num_convs+1)]
 96 |         self.channels = [3] + self.channels # number of channels in gated image appended in the beginning
 97 |         self.enc_blocks = nn.ModuleList([ConvBlock(self.channels[i], self.channels[i+1]) for i in range(len(self.channels)-1)])
 98 |         self.maxpool = nn.MaxPool2d(2)
 99 | 
100 |     def forward(self, x):
101 |         
102 |         skips = []
103 |         for i,enc_block in enumerate(self.enc_blocks):
104 |             # print("input shape {} = {}".format(i,x.shape))
105 |             x = enc_block(x)
106 |             # print("conv block {} = {}".format(i,x.shape))
107 |             skips.append(x)
108 |             x = self.maxpool(x)
109 |             # print("maxpool block {} = {}".format(i,x.shape))
110 |             
111 |         return skips
112 | 
113 | class Decoder(nn.Module):
114 |     
115 |     def __init__(self, name = "output", scales = range(1), num_convs = 4, init_channels=32, out_channels = 1):
116 |         """[UNet Decoder for multi-headed output]
117 | 
118 |         Args:
119 |             scales (list(int), optional): [scales to get output]. Defaults to [0].
120 |             num_convs (int, optional): [number of up/down levels]. Defaults to 4.
121 |             init_channels (int, optional): [initial number of encoding channels]. Defaults to 32.
122 |             out_channels (int, optional): [number of channels in the output]. Defaults to 1.
123 |             name (str, optional): [name of the output]. Defaults to "output".
124 |         """
125 |         super(Decoder,self).__init__()
126 |         self.channels   = [init_channels*2**(i) for i in range(0,num_convs+1)] # [32,64,128,256,512]
127 |         self.channels   = self.channels[::-1]  # Reverse the list to up sample in opposite way # [512,256,128,64,32]
128 |         self.scales     = scales  
129 |         self.num_convs  = num_convs    
130 |         self.name = name
131 |         self.up_convs = nn.ModuleList([nn.Sequential(nn.ConvTranspose2d(in_channels=self.channels[i], out_channels=self.channels[i+1], kernel_size=2,
132 |                                       stride=2), nn.BatchNorm2d(self.channels[i+1])) for i in range(len(self.channels)-1)])  # [(512->256),(256->128),(128->64),(64->32)]
133 | 
134 |         # [(256+256 -> 256),(128+128 -> 128),(64+64 -> 64),(32+32 -> 32)] = [(512 -> 256),(256 -> 128),(128 -> 64),(64 -> 32)]
135 |         self.conv_blocks = nn.ModuleList([ConvBlock(
136 |             self.channels[i], self.channels[i+1]) for i in range(len(self.channels)-1)])
137 |         self.out_convs = nn.ModuleList([Conv1x1(in_channels=self.channels[-(
138 |             s+1)], out_channels=out_channels) for s in self.scales])  # in_channels = [32,64,128,256]
139 | 
140 |     def forward(self,encoder_feats):
141 |         output = {}
142 |         x = encoder_feats[-1]
143 |         
144 |         for i in range(len(self.channels)-1):
145 |             # print("input shape = {}".format(x.shape))    
146 |             x = self.up_convs[i](x)
147 |             # print("upsample shape = {}".format(x.shape))
148 |             enc_ftrs = encoder_feats[-(i+2)]
149 |             x = torch.cat([x,enc_ftrs],dim=1)
150 |             # print("concat shape = {}".format(x.shape))
151 |             x = self.conv_blocks[i](x)
152 |             # print("up conv shape = {}".format(x.shape))
153 |             curr_scale = self.num_convs-i-1
154 |             if  curr_scale in self.scales:
155 |                 output[(self.name,curr_scale)] = self.out_convs[curr_scale](x)
156 |                 # print("output shape = {}".format(output[(self.name,curr_scale)].shape)) 
157 |         return output
158 | 
159 | 
160 | 


--------------------------------------------------------------------------------
/src/networks/__init__.py:
--------------------------------------------------------------------------------
 1 | from .resnet_encoder import ResnetEncoder
 2 | from .depth_decoder import DepthDecoder
 3 | from .pose_decoder import PoseDecoder
 4 | from .pose_cnn import PoseCNN
 5 | 
 6 | from .depth.DepthResNet import DepthResNet
 7 | from .depth.PackNet01 import PackNet01
 8 | from .depth.PackNetSlim01 import PackNetSlim01
 9 | from .depth.PackNetSlim01MultiDecoder import PackNetSlim01MultiDecoder
10 | from .UNet import Encoder, Decoder
11 | # from .depth.PackNetSlim01MultiOutput import PackNetSlim01MultiOutput
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/src/networks/depth/DepthResNet.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | import torch.nn as nn
 4 | from functools import partial
 5 | 
 6 | from networks.layers.resnet.resnet_encoder import ResnetEncoder
 7 | from networks.layers.resnet.depth_decoder import DepthDecoder
 8 | from networks.layers.resnet.layers import disp_to_depth
 9 | 
10 | ########################################################################################################################
11 | 
12 | class DepthResNet(nn.Module):
13 |     """
14 |     Inverse depth network based on the ResNet architecture.
15 | 
16 |     Parameters
17 |     ----------
18 |     version : str
19 |         Has a XY format, where:
20 |         X is the number of residual layers [18, 34, 50] and
21 |         Y is an optional ImageNet pretrained flag added by the "pt" suffix
22 |         Example: "18pt" initializes a pretrained ResNet18, and "34" initializes a ResNet34 from scratch
23 |     kwargs : dict
24 |         Extra parameters
25 |     """
26 |     def __init__(self, version=None, **kwargs):
27 |         super().__init__()
28 |         assert version is not None, "DispResNet needs a version"
29 | 
30 |         num_layers = int(version[:2])       # First two characters are the number of layers
31 |         pretrained = version[2:] == 'pt'    # If the last characters are "pt", use ImageNet pretraining
32 |         assert num_layers in [18, 34, 50], 'ResNet version {} not available'.format(num_layers)
33 | 
34 |         self.encoder = ResnetEncoder(num_layers=num_layers, pretrained=pretrained)
35 |         self.decoder = DepthDecoder(num_ch_enc=self.encoder.num_ch_enc)
36 |         self.scale_inv_depth = partial(disp_to_depth, min_depth=0.1, max_depth=100.0)
37 | 
38 |     def forward(self, x):
39 |         """
40 |         Runs the network and returns inverse depth maps
41 |         (4 scales if training and 1 if not).
42 |         """
43 |         x = self.encoder(x)
44 |         x = self.decoder(x)
45 |         disps = [x[('disp', i)] for i in range(4)]
46 |         outputs = {}
47 |         for i in range(4):
48 |             outputs[("disp",i)] = self.scale_inv_depth(disps[i])[0]
49 |         return outputs
50 |         # if self.training:
51 |         #     return [self.scale_inv_depth(d)[0] for d in disps]
52 |         # else:
53 |         #     return self.scale_inv_depth(disps[0])[0]
54 | 
55 | ########################################################################################################################
56 | 


--------------------------------------------------------------------------------
/src/networks/depth/PackNet01.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from networks.layers.packnet.layers01 import \
  6 |     PackLayerConv3d, UnpackLayerConv3d, Conv2D, ResidualBlock, InvDepth
  7 | 
  8 | class PackNet01(nn.Module):
  9 |     """
 10 |     PackNet network with 3d convolutions (version 01, from the CVPR paper).
 11 | 
 12 |     https://arxiv.org/abs/1905.02693
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     dropout : float
 17 |         Dropout value to use
 18 |     version : str
 19 |         Has a XY format, where:
 20 |         X controls upsampling variations (not used at the moment).
 21 |         Y controls feature stacking (A for concatenation and B for addition)
 22 |     kwargs : dict
 23 |         Extra parameters
 24 |     """
 25 |     def __init__(self, dropout=None, version=None, **kwargs):
 26 |         super().__init__()
 27 |         self.version = version[1:]
 28 |         # Input/output channels
 29 |         in_channels = 3
 30 |         out_channels = 1
 31 |         # Hyper-parameters
 32 |         ni, no = 64, out_channels
 33 |         n1, n2, n3, n4, n5 = 64, 64, 128, 256, 512
 34 |         num_blocks = [2, 2, 3, 3]
 35 |         pack_kernel = [5, 3, 3, 3, 3]
 36 |         unpack_kernel = [3, 3, 3, 3, 3]
 37 |         iconv_kernel = [3, 3, 3, 3, 3]
 38 |         # Initial convolutional layer
 39 |         self.pre_calc = Conv2D(in_channels, ni, 5, 1)
 40 |         # Support for different versions
 41 |         if self.version == 'A':  # Channel concatenation
 42 |             n1o, n1i = n1, n1 + ni + no
 43 |             n2o, n2i = n2, n2 + n1 + no
 44 |             n3o, n3i = n3, n3 + n2 + no
 45 |             n4o, n4i = n4, n4 + n3
 46 |             n5o, n5i = n5, n5 + n4
 47 |         elif self.version == 'B':  # Channel addition
 48 |             n1o, n1i = n1, n1 + no
 49 |             n2o, n2i = n2, n2 + no
 50 |             n3o, n3i = n3//2, n3//2 + no
 51 |             n4o, n4i = n4//2, n4//2
 52 |             n5o, n5i = n5//2, n5//2
 53 |         else:
 54 |             raise ValueError('Unknown PackNet version {}'.format(version))
 55 | 
 56 |         # Encoder
 57 | 
 58 |         self.pack1 = PackLayerConv3d(n1, pack_kernel[0])
 59 |         self.pack2 = PackLayerConv3d(n2, pack_kernel[1])
 60 |         self.pack3 = PackLayerConv3d(n3, pack_kernel[2])
 61 |         self.pack4 = PackLayerConv3d(n4, pack_kernel[3])
 62 |         self.pack5 = PackLayerConv3d(n5, pack_kernel[4])
 63 | 
 64 |         self.conv1 = Conv2D(ni, n1, 7, 1)
 65 |         self.conv2 = ResidualBlock(n1, n2, num_blocks[0], 1, dropout=dropout)
 66 |         self.conv3 = ResidualBlock(n2, n3, num_blocks[1], 1, dropout=dropout)
 67 |         self.conv4 = ResidualBlock(n3, n4, num_blocks[2], 1, dropout=dropout)
 68 |         self.conv5 = ResidualBlock(n4, n5, num_blocks[3], 1, dropout=dropout)
 69 | 
 70 |         # Decoder
 71 | 
 72 |         self.unpack5 = UnpackLayerConv3d(n5, n5o, unpack_kernel[0])
 73 |         self.unpack4 = UnpackLayerConv3d(n5, n4o, unpack_kernel[1])
 74 |         self.unpack3 = UnpackLayerConv3d(n4, n3o, unpack_kernel[2])
 75 |         self.unpack2 = UnpackLayerConv3d(n3, n2o, unpack_kernel[3])
 76 |         self.unpack1 = UnpackLayerConv3d(n2, n1o, unpack_kernel[4])
 77 | 
 78 |         self.iconv5 = Conv2D(n5i, n5, iconv_kernel[0], 1)
 79 |         self.iconv4 = Conv2D(n4i, n4, iconv_kernel[1], 1)
 80 |         self.iconv3 = Conv2D(n3i, n3, iconv_kernel[2], 1)
 81 |         self.iconv2 = Conv2D(n2i, n2, iconv_kernel[3], 1)
 82 |         self.iconv1 = Conv2D(n1i, n1, iconv_kernel[4], 1)
 83 | 
 84 |         # Depth Layers
 85 | 
 86 |         self.unpack_disps = nn.PixelShuffle(2)
 87 |         self.unpack_disp4 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None)
 88 |         self.unpack_disp3 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None)
 89 |         self.unpack_disp2 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None)
 90 | 
 91 |         self.disp4_layer = InvDepth(n4, out_channels=out_channels)
 92 |         self.disp3_layer = InvDepth(n3, out_channels=out_channels)
 93 |         self.disp2_layer = InvDepth(n2, out_channels=out_channels)
 94 |         self.disp1_layer = InvDepth(n1, out_channels=out_channels)
 95 | 
 96 |         self.init_weights()
 97 | 
 98 |     def init_weights(self):
 99 |         """Initializes network weights."""
100 |         for m in self.modules():
101 |             if isinstance(m, (nn.Conv2d, nn.Conv3d)):
102 |                 nn.init.xavier_uniform_(m.weight)
103 |                 if m.bias is not None:
104 |                     m.bias.data.zero_()
105 | 
106 |     def forward(self, x):
107 |         """
108 |         Runs the network and returns inverse depth maps
109 |         (4 scales if training and 1 if not).
110 |         """
111 |         x = self.pre_calc(x)
112 | 
113 |         # Encoder
114 | 
115 |         x1 = self.conv1(x)
116 |         x1p = self.pack1(x1)
117 |         x2 = self.conv2(x1p)
118 |         x2p = self.pack2(x2)
119 |         x3 = self.conv3(x2p)
120 |         x3p = self.pack3(x3)
121 |         x4 = self.conv4(x3p)
122 |         x4p = self.pack4(x4)
123 |         x5 = self.conv5(x4p)
124 |         x5p = self.pack5(x5)
125 | 
126 |         # Skips
127 | 
128 |         skip1 = x
129 |         skip2 = x1p
130 |         skip3 = x2p
131 |         skip4 = x3p
132 |         skip5 = x4p
133 | 
134 |         # Decoder
135 | 
136 |         unpack5 = self.unpack5(x5p)
137 |         if self.version == 'A':
138 |             concat5 = torch.cat((unpack5, skip5), 1)
139 |         else:
140 |             concat5 = unpack5 + skip5
141 |         iconv5 = self.iconv5(concat5)
142 | 
143 |         unpack4 = self.unpack4(iconv5)
144 |         if self.version == 'A':
145 |             concat4 = torch.cat((unpack4, skip4), 1)
146 |         else:
147 |             concat4 = unpack4 + skip4
148 |         iconv4 = self.iconv4(concat4)
149 |         disp4 = self.disp4_layer(iconv4)
150 |         udisp4 = self.unpack_disp4(disp4)
151 | 
152 |         unpack3 = self.unpack3(iconv4)
153 |         if self.version == 'A':
154 |             concat3 = torch.cat((unpack3, skip3, udisp4), 1)
155 |         else:
156 |             concat3 = torch.cat((unpack3 + skip3, udisp4), 1)
157 |         iconv3 = self.iconv3(concat3)
158 |         disp3 = self.disp3_layer(iconv3)
159 |         udisp3 = self.unpack_disp3(disp3)
160 | 
161 |         unpack2 = self.unpack2(iconv3)
162 |         if self.version == 'A':
163 |             concat2 = torch.cat((unpack2, skip2, udisp3), 1)
164 |         else:
165 |             concat2 = torch.cat((unpack2 + skip2, udisp3), 1)
166 |         iconv2 = self.iconv2(concat2)
167 |         disp2 = self.disp2_layer(iconv2)
168 |         udisp2 = self.unpack_disp2(disp2)
169 | 
170 |         unpack1 = self.unpack1(iconv2)
171 |         if self.version == 'A':
172 |             concat1 = torch.cat((unpack1, skip1, udisp2), 1)
173 |         else:
174 |             concat1 = torch.cat((unpack1 +  skip1, udisp2), 1)
175 |         iconv1 = self.iconv1(concat1)
176 |         disp1 = self.disp1_layer(iconv1)
177 |         disps = [disp1, disp2, disp3, disp4]
178 |         outputs = {}
179 |         for i,disp in enumerate(disps):
180 |             outputs[("disp",i)] = disp
181 |         return outputs
182 |         # if self.training:
183 |         #     return [disp1, disp2, disp3, disp4]
184 |         # else:
185 |         #     return disp1
186 | 


--------------------------------------------------------------------------------
/src/networks/depth/PackNetSlim01.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from networks.layers.packnet.layers01 import \
  6 |     PackLayerConv3d, UnpackLayerConv3d, Conv2D, ResidualBlock, InvDepth
  7 | 
  8 | class PackNetSlim01(nn.Module):
  9 |     """
 10 |     PackNet network with 3d convolutions (version 01, from the CVPR paper).
 11 |     Slimmer version, with fewer feature channels
 12 | 
 13 |     https://arxiv.org/abs/1905.02693
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     dropout : float
 18 |         Dropout value to use
 19 |     version : str
 20 |         Has a XY format, where:
 21 |         X controls upsampling variations (not used at the moment).
 22 |         Y controls feature stacking (A for concatenation and B for addition)
 23 |     kwargs : dict
 24 |         Extra parameters
 25 |     """
 26 |     def __init__(self, dropout=None, version=None, **kwargs):
 27 |         super().__init__()
 28 |         self.version = version[1:]
 29 |         # Input/output channels
 30 |         in_channels = 3
 31 |         out_channels = 1
 32 |         # Hyper-parameters
 33 |         ni, no = 32, out_channels
 34 |         n1, n2, n3, n4, n5 = 32, 64, 128, 256, 512
 35 |         num_blocks = [2, 2, 3, 3]
 36 |         pack_kernel = [5, 3, 3, 3, 3]
 37 |         unpack_kernel = [3, 3, 3, 3, 3]
 38 |         iconv_kernel = [3, 3, 3, 3, 3]
 39 |         num_3d_feat = 4
 40 |         # Initial convolutional layer
 41 |         self.pre_calc = Conv2D(in_channels, ni, 5, 1)
 42 |         # Support for different versions
 43 |         if self.version == 'A':  # Channel concatenation
 44 |             n1o, n1i = n1, n1 + ni + no
 45 |             n2o, n2i = n2, n2 + n1 + no
 46 |             n3o, n3i = n3, n3 + n2 + no
 47 |             n4o, n4i = n4, n4 + n3
 48 |             n5o, n5i = n5, n5 + n4
 49 |         elif self.version == 'B':  # Channel addition
 50 |             n1o, n1i = n1, n1 + no
 51 |             n2o, n2i = n2, n2 + no
 52 |             n3o, n3i = n3//2, n3//2 + no
 53 |             n4o, n4i = n4//2, n4//2
 54 |             n5o, n5i = n5//2, n5//2
 55 |         else:
 56 |             raise ValueError('Unknown PackNet version {}'.format(version))
 57 | 
 58 |         # Encoder
 59 | 
 60 |         self.pack1 = PackLayerConv3d(n1, pack_kernel[0], d=num_3d_feat)
 61 |         self.pack2 = PackLayerConv3d(n2, pack_kernel[1], d=num_3d_feat)
 62 |         self.pack3 = PackLayerConv3d(n3, pack_kernel[2], d=num_3d_feat)
 63 |         self.pack4 = PackLayerConv3d(n4, pack_kernel[3], d=num_3d_feat)
 64 |         self.pack5 = PackLayerConv3d(n5, pack_kernel[4], d=num_3d_feat)
 65 | 
 66 |         self.conv1 = Conv2D(ni, n1, 7, 1)
 67 |         self.conv2 = ResidualBlock(n1, n2, num_blocks[0], 1, dropout=dropout)
 68 |         self.conv3 = ResidualBlock(n2, n3, num_blocks[1], 1, dropout=dropout)
 69 |         self.conv4 = ResidualBlock(n3, n4, num_blocks[2], 1, dropout=dropout)
 70 |         self.conv5 = ResidualBlock(n4, n5, num_blocks[3], 1, dropout=dropout)
 71 | 
 72 |         # Decoder
 73 | 
 74 |         self.unpack5 = UnpackLayerConv3d(n5, n5o, unpack_kernel[0], d=num_3d_feat)
 75 |         self.unpack4 = UnpackLayerConv3d(n5, n4o, unpack_kernel[1], d=num_3d_feat)
 76 |         self.unpack3 = UnpackLayerConv3d(n4, n3o, unpack_kernel[2], d=num_3d_feat)
 77 |         self.unpack2 = UnpackLayerConv3d(n3, n2o, unpack_kernel[3], d=num_3d_feat)
 78 |         self.unpack1 = UnpackLayerConv3d(n2, n1o, unpack_kernel[4], d=num_3d_feat)
 79 | 
 80 |         self.iconv5 = Conv2D(n5i, n5, iconv_kernel[0], 1)
 81 |         self.iconv4 = Conv2D(n4i, n4, iconv_kernel[1], 1)
 82 |         self.iconv3 = Conv2D(n3i, n3, iconv_kernel[2], 1)
 83 |         self.iconv2 = Conv2D(n2i, n2, iconv_kernel[3], 1)
 84 |         self.iconv1 = Conv2D(n1i, n1, iconv_kernel[4], 1)
 85 | 
 86 |         # Depth Layers
 87 | 
 88 |         self.unpack_disps = nn.PixelShuffle(2)
 89 |         self.unpack_disp4 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None)
 90 |         self.unpack_disp3 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None)
 91 |         self.unpack_disp2 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None)
 92 | 
 93 |         self.disp4_layer = InvDepth(n4, out_channels=out_channels)
 94 |         self.disp3_layer = InvDepth(n3, out_channels=out_channels)
 95 |         self.disp2_layer = InvDepth(n2, out_channels=out_channels)
 96 |         self.disp1_layer = InvDepth(n1, out_channels=out_channels)
 97 | 
 98 |         self.init_weights()
 99 | 
100 |     def init_weights(self):
101 |         """Initializes network weights."""
102 |         for m in self.modules():
103 |             if isinstance(m, (nn.Conv2d, nn.Conv3d)):
104 |                 nn.init.xavier_uniform_(m.weight)
105 |                 if m.bias is not None:
106 |                     m.bias.data.zero_()
107 | 
108 |     def forward(self, x):
109 |         """
110 |         Runs the network and returns inverse depth maps
111 |         (4 scales if training and 1 if not).
112 |         """
113 |         x = self.pre_calc(x)
114 | 
115 |         # Encoder
116 | 
117 |         x1 = self.conv1(x)
118 |         x1p = self.pack1(x1)
119 |         x2 = self.conv2(x1p)
120 |         x2p = self.pack2(x2)
121 |         x3 = self.conv3(x2p)
122 |         x3p = self.pack3(x3)
123 |         x4 = self.conv4(x3p)
124 |         x4p = self.pack4(x4)
125 |         x5 = self.conv5(x4p)
126 |         x5p = self.pack5(x5)
127 | 
128 |         # Skips
129 | 
130 |         skip1 = x
131 |         skip2 = x1p
132 |         skip3 = x2p
133 |         skip4 = x3p
134 |         skip5 = x4p
135 | 
136 |         # Decoder
137 | 
138 |         unpack5 = self.unpack5(x5p)
139 |         if self.version == 'A':
140 |             concat5 = torch.cat((unpack5, skip5), 1)
141 |         else:
142 |             concat5 = unpack5 + skip5
143 |         iconv5 = self.iconv5(concat5)
144 | 
145 |         unpack4 = self.unpack4(iconv5)
146 |         if self.version == 'A':
147 |             concat4 = torch.cat((unpack4, skip4), 1)
148 |         else:
149 |             concat4 = unpack4 + skip4
150 |         iconv4 = self.iconv4(concat4)
151 |         disp4 = self.disp4_layer(iconv4)
152 |         udisp4 = self.unpack_disp4(disp4)
153 | 
154 |         unpack3 = self.unpack3(iconv4)
155 |         if self.version == 'A':
156 |             concat3 = torch.cat((unpack3, skip3, udisp4), 1)
157 |         else:
158 |             concat3 = torch.cat((unpack3 + skip3, udisp4), 1)
159 |         iconv3 = self.iconv3(concat3)
160 |         disp3 = self.disp3_layer(iconv3)
161 |         udisp3 = self.unpack_disp3(disp3)
162 | 
163 |         unpack2 = self.unpack2(iconv3)
164 |         if self.version == 'A':
165 |             concat2 = torch.cat((unpack2, skip2, udisp3), 1)
166 |         else:
167 |             concat2 = torch.cat((unpack2 + skip2, udisp3), 1)
168 |         iconv2 = self.iconv2(concat2)
169 |         disp2 = self.disp2_layer(iconv2)
170 |         udisp2 = self.unpack_disp2(disp2)
171 | 
172 |         unpack1 = self.unpack1(iconv2)
173 |         if self.version == 'A':
174 |             concat1 = torch.cat((unpack1, skip1, udisp2), 1)
175 |         else:
176 |             concat1 = torch.cat((unpack1 +  skip1, udisp2), 1)
177 |         iconv1 = self.iconv1(concat1)
178 |         disp1 = self.disp1_layer(iconv1)
179 | 
180 |         disps = [disp1, disp2, disp3, disp4]
181 |         outputs = {}
182 |         for i,disp in enumerate(disps):
183 |             outputs[("disp",i)] = disp
184 |         return outputs    
185 |         # if self.training:
186 |         #     return [disp1, disp2, disp3, disp4]
187 |         # else:
188 |         #     return disp1
189 | 


--------------------------------------------------------------------------------
/src/networks/depth/PackNetSlim01MultiDecoder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from networks.layers.packnet.layers01 import PackLayerConv3d, UnpackLayerConv3d, Conv2D, ResidualBlock, InvDepth, activations
  4 | 
  5 | class Decoder(nn.Module):
  6 |     def __init__(self, name, version, ni, no, n1, n2, n3, n4, n5, unpack_kernel, iconv_kernel, num_3d_feat, out_channels, activation = 'sigmoid', use_batchnorm=False):
  7 |         super().__init__()
  8 |         self.name = name
  9 |         self.version = version
 10 | 
 11 |         # Support for different versions
 12 |         if self.version == 'A':  # Channel concatenation
 13 |             n1o, n1i = n1, n1 + ni + no
 14 |             n2o, n2i = n2, n2 + n1 + no
 15 |             n3o, n3i = n3, n3 + n2 + no
 16 |             n4o, n4i = n4, n4 + n3
 17 |             n5o, n5i = n5, n5 + n4
 18 |         elif self.version == 'B':  # Channel addition
 19 |             n1o, n1i = n1, n1 + no
 20 |             n2o, n2i = n2, n2 + no
 21 |             n3o, n3i = n3//2, n3//2 + no
 22 |             n4o, n4i = n4//2, n4//2
 23 |             n5o, n5i = n5//2, n5//2
 24 |         else:
 25 |             raise ValueError('Unknown PackNet version {}'.format(self.version))
 26 | 
 27 |         # Decoder
 28 |         self.unpack5 = UnpackLayerConv3d(n5, n5o, unpack_kernel[0], d=num_3d_feat)
 29 |         self.unpack4 = UnpackLayerConv3d(n5, n4o, unpack_kernel[1], d=num_3d_feat)
 30 |         self.unpack3 = UnpackLayerConv3d(n4, n3o, unpack_kernel[2], d=num_3d_feat)
 31 |         self.unpack2 = UnpackLayerConv3d(n3, n2o, unpack_kernel[3], d=num_3d_feat)
 32 |         self.unpack1 = UnpackLayerConv3d(n2, n1o, unpack_kernel[4], d=num_3d_feat)
 33 | 
 34 |         self.iconv5 = Conv2D(n5i, n5, iconv_kernel[0], 1, use_batchnorm=use_batchnorm)
 35 |         self.iconv4 = Conv2D(n4i, n4, iconv_kernel[1], 1, use_batchnorm=use_batchnorm)
 36 |         self.iconv3 = Conv2D(n3i, n3, iconv_kernel[2], 1, use_batchnorm=use_batchnorm)
 37 |         self.iconv2 = Conv2D(n2i, n2, iconv_kernel[3], 1, use_batchnorm=use_batchnorm)
 38 |         self.iconv1 = Conv2D(n1i, n1, iconv_kernel[4], 1, use_batchnorm=use_batchnorm)
 39 | 
 40 |         # Depth Layers
 41 | 
 42 |         self.unpack_out4 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None)
 43 |         self.unpack_out3 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None)
 44 |         self.unpack_out2 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None)
 45 | 
 46 |         self.out4_layer = InvDepth(n4, out_channels=out_channels, activation=activation)
 47 |         self.out3_layer = InvDepth(n3, out_channels=out_channels, activation=activation)
 48 |         self.out2_layer = InvDepth(n2, out_channels=out_channels, activation=activation)
 49 |         self.out1_layer = InvDepth(n1, out_channels=out_channels, activation=activation)
 50 | 
 51 | 
 52 |     def forward(self, x5p, skip1, skip2, skip3, skip4, skip5):
 53 |         # Decoder
 54 | 
 55 |         unpack5 = self.unpack5(x5p)
 56 |         if self.version == 'A':
 57 |             concat5 = torch.cat((unpack5, skip5), 1)
 58 |         else:
 59 |             concat5 = unpack5 + skip5
 60 |         iconv5 = self.iconv5(concat5)
 61 | 
 62 |         unpack4 = self.unpack4(iconv5)
 63 |         if self.version == 'A':
 64 |             concat4 = torch.cat((unpack4, skip4), 1)
 65 |         else:
 66 |             concat4 = unpack4 + skip4
 67 |         iconv4 = self.iconv4(concat4)
 68 |         out4 = self.out4_layer(iconv4)
 69 |         uout4 = self.unpack_out4(out4)
 70 | 
 71 |         unpack3 = self.unpack3(iconv4)
 72 |         if self.version == 'A':
 73 |             concat3 = torch.cat((unpack3, skip3, uout4), 1)
 74 |         else:
 75 |             concat3 = torch.cat((unpack3 + skip3, uout4), 1)
 76 |         iconv3 = self.iconv3(concat3)
 77 |         out3 = self.out3_layer(iconv3)
 78 |         uout3 = self.unpack_out3(out3)
 79 | 
 80 |         unpack2 = self.unpack2(iconv3)
 81 |         if self.version == 'A':
 82 |             concat2 = torch.cat((unpack2, skip2, uout3), 1)
 83 |         else:
 84 |             concat2 = torch.cat((unpack2 + skip2, uout3), 1)
 85 |         iconv2 = self.iconv2(concat2)
 86 |         out2 = self.out2_layer(iconv2)
 87 |         uout2 = self.unpack_out2(out2)
 88 | 
 89 |         unpack1 = self.unpack1(iconv2)
 90 |         if self.version == 'A':
 91 |             concat1 = torch.cat((unpack1, skip1, uout2), 1)
 92 |         else:
 93 |             concat1 = torch.cat((unpack1 +  skip1, uout2), 1)
 94 |         iconv1 = self.iconv1(concat1)
 95 |         out1 = self.out1_layer(iconv1)
 96 | 
 97 |         outs = [out1, out2, out3, out4]
 98 |         outputs = {}
 99 |         for i,out in enumerate(outs):
100 |             outputs[(self.name,i)] = out
101 |         return outputs
102 | 
103 | class PackNetSlim01MultiDecoder(nn.Module):
104 |     """
105 |     PackNet network with 3d convolutions (version 01, from the CVPR paper).
106 |     Slimmer version, with fewer feature channels
107 |     https://arxiv.org/abs/1905.02693
108 |     Parameters
109 |     ----------
110 |     dropout : float
111 |         Dropout value to use
112 |     version : str
113 |         Has a XY format, where:
114 |         X controls upsampling variations (not used at the moment).
115 |         Y controls feature stacking (A for concatenation and B for addition)
116 |     kwargs : dict
117 |         Extra parameters
118 |     """
119 |     def __init__(self, dropout=None, version=None, cycle_loss=False, use_batchnorm = False,  **kwargs):
120 |         super().__init__()
121 |         self.version = version[1:]
122 |         name = 'depth' if cycle_loss else 'disp'
123 | 
124 |         # Input/output channels
125 |         in_channels = 3
126 |         out_channels = 1
127 |         # Hyper-parameters
128 |         ni, no = 32, out_channels
129 |         n1, n2, n3, n4, n5 = 32, 64, 128, 256, 512
130 |         num_blocks = [2, 2, 3, 3]
131 |         pack_kernel = [5, 3, 3, 3, 3]
132 |         unpack_kernel = [3, 3, 3, 3, 3]
133 |         iconv_kernel = [3, 3, 3, 3, 3]
134 |         num_3d_feat = 4
135 |         # Initial convolutional layer
136 |         self.pre_calc = Conv2D(in_channels, ni, 5, 1,use_batchnorm=use_batchnorm)
137 | 
138 | 
139 |         # Encoder
140 | 
141 |         self.pack1 = PackLayerConv3d(n1, pack_kernel[0], d=num_3d_feat)
142 |         self.pack2 = PackLayerConv3d(n2, pack_kernel[1], d=num_3d_feat)
143 |         self.pack3 = PackLayerConv3d(n3, pack_kernel[2], d=num_3d_feat)
144 |         self.pack4 = PackLayerConv3d(n4, pack_kernel[3], d=num_3d_feat)
145 |         self.pack5 = PackLayerConv3d(n5, pack_kernel[4], d=num_3d_feat)
146 | 
147 |         self.conv1 = Conv2D(ni, n1, 7, 1, use_batchnorm=use_batchnorm)
148 |         self.conv2 = ResidualBlock(n1, n2, num_blocks[0], 1, dropout=dropout, use_batchnorm=use_batchnorm)
149 |         self.conv3 = ResidualBlock(n2, n3, num_blocks[1], 1, dropout=dropout, use_batchnorm=use_batchnorm)
150 |         self.conv4 = ResidualBlock(n3, n4, num_blocks[2], 1, dropout=dropout, use_batchnorm=use_batchnorm)
151 |         self.conv5 = ResidualBlock(n4, n5, num_blocks[3], 1, dropout=dropout, use_batchnorm=use_batchnorm)
152 | 
153 |         # Decoder
154 | 
155 |         self.depth_decoder = Decoder(name, self.version, ni, no, n1, n2, n3, n4, n5, unpack_kernel, iconv_kernel, num_3d_feat, out_channels, activation='sigmoid', use_batchnorm=use_batchnorm)
156 |         self.albedo_decoder = Decoder('albedo', self.version, ni, no, n1, n2, n3, n4, n5, unpack_kernel, iconv_kernel, num_3d_feat, out_channels, activation='sigmoid', use_batchnorm=use_batchnorm)
157 |         self.ambient_decoder = Decoder('ambient', self.version, ni, no, n1, n2, n3, n4, n5, unpack_kernel, iconv_kernel, num_3d_feat, out_channels, activation='sigmoid', use_batchnorm=use_batchnorm)
158 | 
159 |         self.init_weights()
160 | 
161 |     def init_weights(self):
162 |         """Initializes network weights."""
163 |         for m in self.modules():
164 |             if isinstance(m, (nn.Conv2d, nn.Conv3d)):
165 |                 nn.init.xavier_uniform_(m.weight)
166 |                 if m.bias is not None:
167 |                     m.bias.data.zero_()
168 | 
169 |     def forward(self, x):
170 |         """
171 |         Runs the network and returns inverse depth maps
172 |         (4 scales if training and 1 if not).
173 |         """
174 |         x = self.pre_calc(x)
175 | 
176 |         # Encoder
177 | 
178 |         x1 = self.conv1(x)
179 |         x1p = self.pack1(x1)
180 |         x2 = self.conv2(x1p)
181 |         x2p = self.pack2(x2)
182 |         x3 = self.conv3(x2p)
183 |         x3p = self.pack3(x3)
184 |         x4 = self.conv4(x3p)
185 |         x4p = self.pack4(x4)
186 |         x5 = self.conv5(x4p)
187 |         x5p = self.pack5(x5)
188 | 
189 |         # Skips
190 | 
191 |         skip1 = x
192 |         skip2 = x1p
193 |         skip3 = x2p
194 |         skip4 = x3p
195 |         skip5 = x4p
196 | 
197 |         # Decoder
198 |         outputs = {}
199 |         outputs.update(self.depth_decoder(x5p, skip1, skip2, skip3, skip4, skip5))
200 |         outputs.update(self.albedo_decoder(x5p, skip1, skip2, skip3, skip4, skip5))
201 |         outputs.update(self.ambient_decoder(x5p, skip1, skip2, skip3, skip4, skip5))
202 | 
203 |         return outputs
204 | 
205 | 
206 | 
207 | if __name__ == '__main__':
208 |     import sys
209 |     import os
210 |     sys.path.append("..")
211 |     from layers.packnet.layers01 import PackLayerConv3d, UnpackLayerConv3d, Conv2D, ResidualBlock, InvDepth
212 |     os.environ["CUDA_VISIBLE_DEVICES"] = "0"
213 | 
214 |     dropout = 0.5
215 |     version = '1A'
216 |     model = PackNetSlim01MultiDecoder(dropout,version)
217 |     img = torch.rand(1,3, 512, 1024)
218 |     outputs = model(img)
219 |     print(outputs)


--------------------------------------------------------------------------------
/src/networks/depth/depth_decoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
 2 | #
 3 | # This software is licensed under the terms of the Monodepth2 licence
 4 | # which allows for non-commercial use only, the full terms of which are made
 5 | # available in the LICENSE file.
 6 | 
 7 | from __future__ import absolute_import, division, print_function
 8 | 
 9 | import numpy as np
10 | import torch
11 | import torch.nn as nn
12 | 
13 | from collections import OrderedDict
14 | from layers import *
15 | 
16 | 
17 | class DepthDecoder(nn.Module):
18 |     def __init__(self, num_ch_enc, scales=range(4), num_output_channels=1, use_skips=True):
19 |         super(DepthDecoder, self).__init__()
20 | 
21 |         self.num_output_channels = num_output_channels
22 |         self.use_skips = use_skips
23 |         self.upsample_mode = 'nearest'
24 |         self.scales = scales
25 | 
26 |         self.num_ch_enc = num_ch_enc
27 |         self.num_ch_dec = np.array([16, 32, 64, 128, 256])
28 | 
29 |         # decoder
30 |         self.convs = OrderedDict()
31 |         for i in range(4, -1, -1):
32 |             # upconv_0
33 |             num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1]
34 |             num_ch_out = self.num_ch_dec[i]
35 |             self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out)
36 | 
37 |             # upconv_1
38 |             num_ch_in = self.num_ch_dec[i]
39 |             if self.use_skips and i > 0:
40 |                 num_ch_in += self.num_ch_enc[i - 1]
41 |             num_ch_out = self.num_ch_dec[i]
42 |             self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out)
43 | 
44 |         for s in self.scales:
45 |             self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels)
46 | 
47 |         self.decoder = nn.ModuleList(list(self.convs.values()))
48 |         self.sigmoid = nn.Sigmoid()
49 | 
50 |     def forward(self, input_features):
51 |         self.outputs = {}
52 | 
53 |         # decoder
54 |         x = input_features[-1]
55 |         for i in range(4, -1, -1):
56 |             x = self.convs[("upconv", i, 0)](x)
57 |             x = [upsample(x)]
58 |             if self.use_skips and i > 0:
59 |                 x += [input_features[i - 1]]
60 |             x = torch.cat(x, 1)
61 |             x = self.convs[("upconv", i, 1)](x)
62 |             if i in self.scales:
63 |                 self.outputs[("disp", i)] = self.sigmoid(self.convs[("dispconv", i)](x))
64 | 
65 |         return self.outputs
66 | 


--------------------------------------------------------------------------------
/src/networks/depth_decoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
 2 | #
 3 | # This software is licensed under the terms of the Monodepth2 licence
 4 | # which allows for non-commercial use only, the full terms of which are made
 5 | # available in the LICENSE file.
 6 | 
 7 | from __future__ import absolute_import, division, print_function
 8 | 
 9 | import numpy as np
10 | import torch
11 | import torch.nn as nn
12 | 
13 | from collections import OrderedDict
14 | from layers import *
15 | 
16 | 
17 | class DepthDecoder(nn.Module):
18 |     def __init__(self, num_ch_enc, scales=range(4), num_output_channels=1, use_skips=True):
19 |         super(DepthDecoder, self).__init__()
20 | 
21 |         self.num_output_channels = num_output_channels
22 |         self.use_skips = use_skips
23 |         self.upsample_mode = 'nearest'
24 |         self.scales = scales
25 | 
26 |         self.num_ch_enc = num_ch_enc
27 |         self.num_ch_dec = np.array([16, 32, 64, 128, 256])
28 | 
29 |         # decoder
30 |         self.convs = OrderedDict()
31 |         for i in range(4, -1, -1):
32 |             # upconv_0
33 |             num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1]
34 |             num_ch_out = self.num_ch_dec[i]
35 |             self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out)
36 | 
37 |             # upconv_1
38 |             num_ch_in = self.num_ch_dec[i]
39 |             if self.use_skips and i > 0:
40 |                 num_ch_in += self.num_ch_enc[i - 1]
41 |             num_ch_out = self.num_ch_dec[i]
42 |             self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out)
43 | 
44 |         for s in self.scales:
45 |             self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels)
46 | 
47 |         self.decoder = nn.ModuleList(list(self.convs.values()))
48 |         self.sigmoid = nn.Sigmoid()
49 | 
50 |     def forward(self, input_features):
51 |         self.outputs = {}
52 | 
53 |         # decoder
54 |         x = input_features[-1]
55 |         for i in range(4, -1, -1):
56 |             x = self.convs[("upconv", i, 0)](x)
57 |             x = [upsample(x)]
58 |             if self.use_skips and i > 0:
59 |                 x += [input_features[i - 1]]
60 |             x = torch.cat(x, 1)
61 |             x = self.convs[("upconv", i, 1)](x)
62 |             if i in self.scales:
63 |                 self.outputs[("disp", i)] = self.sigmoid(self.convs[("dispconv", i)](x))
64 | 
65 |         return self.outputs


--------------------------------------------------------------------------------
/src/networks/layers/packnet/layers01.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from functools import partial
  6 | import torch.nn.functional as F
  7 | 
  8 | ########################################################################################################################
  9 | activations = {
 10 |     'tanh'   :  nn.Tanh(),
 11 |     'sigmoid':  nn.Sigmoid(),
 12 |     'relu'   :  nn.ReLU(inplace=True),
 13 |     'linear' :  lambda x : x      
 14 | }
 15 | 
 16 | ########################################################################################################################
 17 | 
 18 | class Conv2D(nn.Module):
 19 |     """
 20 |     2D convolution with GroupNorm and ELU
 21 |     Parameters
 22 |     ----------
 23 |     in_channels : int
 24 |         Number of input channels
 25 |     out_channels : int
 26 |         Number of output channels
 27 |     kernel_size : int
 28 |         Kernel size
 29 |     stride : int
 30 |         Stride
 31 |     """
 32 |     def __init__(self, in_channels, out_channels, kernel_size, stride, use_batchnorm=False):
 33 |         super().__init__()
 34 |         self.kernel_size = kernel_size
 35 |         self.conv_base = nn.Conv2d(
 36 |             in_channels, out_channels, kernel_size=kernel_size, stride=stride)
 37 |         self.pad = nn.ConstantPad2d([kernel_size // 2] * 4, value=0)
 38 |         if not use_batchnorm:
 39 |             self.normalize = torch.nn.GroupNorm(16, out_channels)
 40 |         else:
 41 |             self.normalize = torch.nn.BatchNorm2d(out_channels)
 42 |         self.activ = nn.ELU(inplace=True)
 43 | 
 44 |     def forward(self, x):
 45 |         """Runs the Conv2D layer."""
 46 |         x = self.conv_base(self.pad(x))
 47 |         return self.activ(self.normalize(x))
 48 | 
 49 | 
 50 | class ResidualConv(nn.Module):
 51 |     """2D Convolutional residual block with GroupNorm and ELU"""
 52 |     def __init__(self, in_channels, out_channels, stride, dropout=None, use_batchnorm=False):
 53 |         """
 54 |         Initializes a ResidualConv object.
 55 |         Parameters
 56 |         ----------
 57 |         in_channels : int
 58 |             Number of input channels
 59 |         out_channels : int
 60 |             Number of output channels
 61 |         stride : int
 62 |             Stride
 63 |         dropout : float
 64 |             Dropout value
 65 |         """
 66 |         super().__init__()
 67 |         self.conv1 = Conv2D(in_channels, out_channels, 3, stride)
 68 |         self.conv2 = Conv2D(out_channels, out_channels, 3, 1)
 69 |         self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)
 70 |         if not use_batchnorm:
 71 |             self.normalize = torch.nn.GroupNorm(16, out_channels)
 72 |         else:
 73 |             self.normalize = torch.nn.BatchNorm2d(out_channels)
 74 |         self.activ = nn.ELU(inplace=True)
 75 | 
 76 |         if dropout:
 77 |             self.conv3 = nn.Sequential(self.conv3, nn.Dropout2d(dropout))
 78 | 
 79 |     def forward(self, x):
 80 |         """Runs the ResidualConv layer."""
 81 |         x_out = self.conv1(x)
 82 |         x_out = self.conv2(x_out)
 83 |         shortcut = self.conv3(x)
 84 |         return self.activ(self.normalize(x_out + shortcut))
 85 | 
 86 | 
 87 | def ResidualBlock(in_channels, out_channels, num_blocks, stride, dropout=None, use_batchnorm=False):
 88 |     """
 89 |     Returns a ResidualBlock with various ResidualConv layers.
 90 |     Parameters
 91 |     ----------
 92 |     in_channels : int
 93 |         Number of input channels
 94 |     out_channels : int
 95 |         Number of output channels
 96 |     num_blocks : int
 97 |         Number of residual blocks
 98 |     stride : int
 99 |         Stride
100 |     dropout : float
101 |         Dropout value
102 |     """
103 |     layers = [ResidualConv(in_channels, out_channels, stride, dropout=dropout,use_batchnorm=use_batchnorm)]
104 |     for i in range(1, num_blocks):
105 |         layers.append(ResidualConv(out_channels, out_channels, 1, dropout=dropout))
106 |     return nn.Sequential(*layers)
107 | 
108 | 
109 | class InvDepth(nn.Module):
110 |     """Inverse depth layer"""
111 |     def __init__(self, in_channels, out_channels=1, min_depth=0.5, activation = 'sigmoid'):
112 |         """
113 |         Initializes an InvDepth object.
114 |         Parameters
115 |         ----------
116 |         in_channels : int
117 |             Number of input channels
118 |         out_channels : int
119 |             Number of output channels
120 |         min_depth : float
121 |             Minimum depth value to calculate
122 |         """
123 |         super().__init__()
124 |         self.min_depth = min_depth
125 |         self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1)
126 |         self.pad = nn.ConstantPad2d([1] * 4, value=0)
127 |         # self.activ = nn.Sigmoid()
128 |         self.activ = activations[activation]    # changed it to dict because for albedo and ambient should use linear activations
129 | 
130 |     def forward(self, x):
131 |         """Runs the InvDepth layer."""
132 |         x = self.conv1(self.pad(x))
133 |         return self.activ(x) / self.min_depth
134 | 
135 | ########################################################################################################################
136 | 
137 | def packing(x, r=2):
138 |     """
139 |     Takes a [B,C,H,W] tensor and returns a [B,(r^2)C,H/r,W/r] tensor, by concatenating
140 |     neighbor spatial pixels as extra channels. It is the inverse of nn.PixelShuffle
141 |     (if you apply both sequentially you should get the same tensor)
142 |     Parameters
143 |     ----------
144 |     x : torch.Tensor [B,C,H,W]
145 |         Input tensor
146 |     r : int
147 |         Packing ratio
148 |     Returns
149 |     -------
150 |     out : torch.Tensor [B,(r^2)C,H/r,W/r]
151 |         Packed tensor
152 |     """
153 |     b, c, h, w = x.shape
154 |     out_channel = c * (r ** 2)
155 |     out_h, out_w = h // r, w // r
156 |     x = x.contiguous().view(b, c, out_h, r, out_w, r)
157 |     return x.permute(0, 1, 3, 5, 2, 4).contiguous().view(b, out_channel, out_h, out_w)
158 | 
159 | ########################################################################################################################
160 | 
161 | class PackLayerConv2d(nn.Module):
162 |     """
163 |     Packing layer with 2d convolutions. Takes a [B,C,H,W] tensor, packs it
164 |     into [B,(r^2)C,H/r,W/r] and then convolves it to produce [B,C,H/r,W/r].
165 |     """
166 |     def __init__(self, in_channels, kernel_size, r=2):
167 |         """
168 |         Initializes a PackLayerConv2d object.
169 |         Parameters
170 |         ----------
171 |         in_channels : int
172 |             Number of input channels
173 |         kernel_size : int
174 |             Kernel size
175 |         r : int
176 |             Packing ratio
177 |         """
178 |         super().__init__()
179 |         self.conv = Conv2D(in_channels * (r ** 2), in_channels, kernel_size, 1)
180 |         self.pack = partial(packing, r=r)
181 | 
182 |     def forward(self, x):
183 |         """Runs the PackLayerConv2d layer."""
184 |         x = self.pack(x)
185 |         x = self.conv(x)
186 |         return x
187 | 
188 | 
189 | class UnpackLayerConv2d(nn.Module):
190 |     """
191 |     Unpacking layer with 2d convolutions. Takes a [B,C,H,W] tensor, convolves it
192 |     to produce [B,(r^2)C,H,W] and then unpacks it to produce [B,C,rH,rW].
193 |     """
194 |     def __init__(self, in_channels, out_channels, kernel_size, r=2):
195 |         """
196 |         Initializes a UnpackLayerConv2d object.
197 |         Parameters
198 |         ----------
199 |         in_channels : int
200 |             Number of input channels
201 |         out_channels : int
202 |             Number of output channels
203 |         kernel_size : int
204 |             Kernel size
205 |         r : int
206 |             Packing ratio
207 |         """
208 |         super().__init__()
209 |         self.conv = Conv2D(in_channels, out_channels * (r ** 2), kernel_size, 1)
210 |         self.unpack = nn.PixelShuffle(r)
211 | 
212 |     def forward(self, x):
213 |         """Runs the UnpackLayerConv2d layer."""
214 |         x = self.conv(x)
215 |         x = self.unpack(x)
216 |         return x
217 | 
218 | ########################################################################################################################
219 | 
220 | class PackLayerConv3d(nn.Module):
221 |     """
222 |     Packing layer with 3d convolutions. Takes a [B,C,H,W] tensor, packs it
223 |     into [B,(r^2)C,H/r,W/r] and then convolves it to produce [B,C,H/r,W/r].
224 |     """
225 |     def __init__(self, in_channels, kernel_size, r=2, d=8):
226 |         """
227 |         Initializes a PackLayerConv3d object.
228 |         Parameters
229 |         ----------
230 |         in_channels : int
231 |             Number of input channels
232 |         kernel_size : int
233 |             Kernel size
234 |         r : int
235 |             Packing ratio
236 |         d : int
237 |             Number of 3D features
238 |         """
239 |         super().__init__()
240 |         self.conv = Conv2D(in_channels * (r ** 2) * d, in_channels, kernel_size, 1)
241 |         self.pack = partial(packing, r=r)
242 |         self.conv3d = nn.Conv3d(1, d, kernel_size=(3, 3, 3),
243 |                                 stride=(1, 1, 1), padding=(1, 1, 1))
244 | 
245 |     def forward(self, x):
246 |         """Runs the PackLayerConv3d layer."""
247 |         x = self.pack(x)
248 |         x = x.unsqueeze(1)
249 |         x = self.conv3d(x)
250 |         b, c, d, h, w = x.shape
251 |         x = x.view(b, c * d, h, w)
252 |         x = self.conv(x)
253 |         return x
254 | 
255 | 
256 | class UnpackLayerConv3d(nn.Module):
257 |     """
258 |     Unpacking layer with 3d convolutions. Takes a [B,C,H,W] tensor, convolves it
259 |     to produce [B,(r^2)C,H,W] and then unpacks it to produce [B,C,rH,rW].
260 |     """
261 |     def __init__(self, in_channels, out_channels, kernel_size, r=2, d=8):
262 |         """
263 |         Initializes a UnpackLayerConv3d object.
264 |         Parameters
265 |         ----------
266 |         in_channels : int
267 |             Number of input channels
268 |         out_channels : int
269 |             Number of output channels
270 |         kernel_size : int
271 |             Kernel size
272 |         r : int
273 |             Packing ratio
274 |         d : int
275 |             Number of 3D features
276 |         """
277 |         super().__init__()
278 |         self.conv = Conv2D(in_channels, out_channels * (r ** 2) // d, kernel_size, 1)
279 |         self.unpack = nn.PixelShuffle(r)
280 |         self.conv3d = nn.Conv3d(1, d, kernel_size=(3, 3, 3),
281 |                                 stride=(1, 1, 1), padding=(1, 1, 1))
282 | 
283 |     def forward(self, x):
284 |         """Runs the UnpackLayerConv3d layer."""
285 |         x = self.conv(x)
286 |         x = x.unsqueeze(1)
287 |         x = self.conv3d(x)
288 |         b, c, d, h, w = x.shape
289 |         x = x.view(b, c * d, h, w)
290 |         x = self.unpack(x)
291 |         return x
292 | 
293 | ########################################################################################################################


--------------------------------------------------------------------------------
/src/networks/layers/resnet/depth_decoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | # Adapted from monodepth2
 4 | # https://github.com/nianticlabs/monodepth2/blob/master/networks/depth_decoder.py
 5 | 
 6 | from __future__ import absolute_import, division, print_function
 7 | 
 8 | import numpy as np
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | from collections import OrderedDict
13 | from .layers import ConvBlock, Conv3x3, upsample
14 | 
15 | 
16 | class DepthDecoder(nn.Module):
17 |     def __init__(self, num_ch_enc, scales=range(4), num_output_channels=1, use_skips=True):
18 |         super(DepthDecoder, self).__init__()
19 | 
20 |         self.num_output_channels = num_output_channels
21 |         self.use_skips = use_skips
22 |         self.upsample_mode = 'nearest'
23 |         self.scales = scales
24 | 
25 |         self.num_ch_enc = num_ch_enc
26 |         self.num_ch_dec = np.array([16, 32, 64, 128, 256])
27 | 
28 |         # decoder
29 |         self.convs = OrderedDict()
30 |         for i in range(4, -1, -1):
31 |             # upconv_0
32 |             num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1]
33 |             num_ch_out = self.num_ch_dec[i]
34 |             self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out)
35 | 
36 |             # upconv_1
37 |             num_ch_in = self.num_ch_dec[i]
38 |             if self.use_skips and i > 0:
39 |                 num_ch_in += self.num_ch_enc[i - 1]
40 |             num_ch_out = self.num_ch_dec[i]
41 |             self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out)
42 | 
43 |         for s in self.scales:
44 |             self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels)
45 | 
46 |         self.decoder = nn.ModuleList(list(self.convs.values()))
47 |         self.sigmoid = nn.Sigmoid()
48 | 
49 |     def forward(self, input_features):
50 |         self.outputs = {}
51 | 
52 |         # decoder
53 |         x = input_features[-1]
54 |         for i in range(4, -1, -1):
55 |             x = self.convs[("upconv", i, 0)](x)
56 |             x = [upsample(x)]
57 |             if self.use_skips and i > 0:
58 |                 x += [input_features[i - 1]]
59 |             x = torch.cat(x, 1)
60 |             x = self.convs[("upconv", i, 1)](x)
61 |             if i in self.scales:
62 |                 self.outputs[("disp", i)] = self.sigmoid(self.convs[("dispconv", i)](x))
63 | 
64 |         return self.outputs


--------------------------------------------------------------------------------
/src/networks/layers/resnet/layers.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
 2 | 
 3 | # Adapted from monodepth2
 4 | # https://github.com/nianticlabs/monodepth2/blob/master/layers.py
 5 | 
 6 | from __future__ import absolute_import, division, print_function
 7 | 
 8 | import torch.nn as nn
 9 | import torch.nn.functional as F
10 | 
11 | 
12 | def disp_to_depth(disp, min_depth, max_depth):
13 |     """Convert network's sigmoid output into depth prediction
14 |     The formula for this conversion is given in the 'additional considerations'
15 |     section of the paper.
16 |     """
17 |     min_disp = 1 / max_depth
18 |     max_disp = 1 / min_depth
19 |     scaled_disp = min_disp + (max_disp - min_disp) * disp
20 |     depth = 1 / scaled_disp
21 |     return scaled_disp, depth
22 | 
23 | 
24 | class ConvBlock(nn.Module):
25 |     """Layer to perform a convolution followed by ELU
26 |     """
27 |     def __init__(self, in_channels, out_channels):
28 |         super(ConvBlock, self).__init__()
29 | 
30 |         self.conv = Conv3x3(in_channels, out_channels)
31 |         self.nonlin = nn.ELU(inplace=True)
32 | 
33 |     def forward(self, x):
34 |         out = self.conv(x)
35 |         out = self.nonlin(out)
36 |         return out
37 | 
38 | 
39 | class Conv3x3(nn.Module):
40 |     """Layer to pad and convolve input
41 |     """
42 |     def __init__(self, in_channels, out_channels, use_refl=True):
43 |         super(Conv3x3, self).__init__()
44 | 
45 |         if use_refl:
46 |             self.pad = nn.ReflectionPad2d(1)
47 |         else:
48 |             self.pad = nn.ZeroPad2d(1)
49 |         self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3)
50 | 
51 |     def forward(self, x):
52 |         out = self.pad(x)
53 |         out = self.conv(out)
54 |         return out
55 | 
56 | 
57 | def upsample(x):
58 |     """Upsample input tensor by a factor of 2
59 |     """
60 |     return F.interpolate(x, scale_factor=2, mode="nearest")


--------------------------------------------------------------------------------
/src/networks/layers/resnet/resnet_encoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
 2 | #
 3 | # This software is licensed under the terms of the Monodepth2 licence
 4 | # which allows for non-commercial use only, the full terms of which are made
 5 | # available in the LICENSE file.
 6 | 
 7 | from __future__ import absolute_import, division, print_function
 8 | 
 9 | import numpy as np
10 | 
11 | import torch
12 | import torch.nn as nn
13 | import torchvision.models as models
14 | import torch.utils.model_zoo as model_zoo
15 | 
16 | 
17 | class ResNetMultiImageInput(models.ResNet):
18 |     """Constructs a resnet model with varying number of input images.
19 |     Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
20 |     """
21 |     def __init__(self, block, layers, num_classes=1000, num_input_images=1):
22 |         super(ResNetMultiImageInput, self).__init__(block, layers)
23 |         self.inplanes = 64
24 |         self.conv1 = nn.Conv2d(
25 |             num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
26 |         self.bn1 = nn.BatchNorm2d(64)
27 |         self.relu = nn.ReLU(inplace=True)
28 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
29 |         self.layer1 = self._make_layer(block, 64, layers[0])
30 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
31 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
32 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
33 | 
34 |         for m in self.modules():
35 |             if isinstance(m, nn.Conv2d):
36 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
37 |             elif isinstance(m, nn.BatchNorm2d):
38 |                 nn.init.constant_(m.weight, 1)
39 |                 nn.init.constant_(m.bias, 0)
40 | 
41 | 
42 | def resnet_multiimage_input(num_layers, pretrained=False, num_input_images=1):
43 |     """Constructs a ResNet model.
44 |     Args:
45 |         num_layers (int): Number of resnet layers. Must be 18 or 50
46 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
47 |         num_input_images (int): Number of frames stacked as input
48 |     """
49 |     assert num_layers in [18, 50], "Can only run with 18 or 50 layer resnet"
50 |     blocks = {18: [2, 2, 2, 2], 50: [3, 4, 6, 3]}[num_layers]
51 |     block_type = {18: models.resnet.BasicBlock, 50: models.resnet.Bottleneck}[num_layers]
52 |     model = ResNetMultiImageInput(block_type, blocks, num_input_images=num_input_images)
53 | 
54 |     if pretrained:
55 |         loaded = model_zoo.load_url(models.resnet.model_urls['resnet{}'.format(num_layers)])
56 |         loaded['conv1.weight'] = torch.cat(
57 |             [loaded['conv1.weight']] * num_input_images, 1) / num_input_images
58 |         model.load_state_dict(loaded)
59 |     return model
60 | 
61 | 
62 | class ResnetEncoder(nn.Module):
63 |     """Pytorch module for a resnet encoder
64 |     """
65 |     def __init__(self, num_layers, pretrained, num_input_images=1):
66 |         super(ResnetEncoder, self).__init__()
67 | 
68 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
69 | 
70 |         resnets = {18: models.resnet18,
71 |                    34: models.resnet34,
72 |                    50: models.resnet50,
73 |                    101: models.resnet101,
74 |                    152: models.resnet152}
75 | 
76 |         if num_layers not in resnets:
77 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
78 | 
79 |         if num_input_images > 1:
80 |             self.encoder = resnet_multiimage_input(num_layers, pretrained, num_input_images)
81 |         else:
82 |             self.encoder = resnets[num_layers](pretrained)
83 | 
84 |         if num_layers > 34:
85 |             self.num_ch_enc[1:] *= 4
86 | 
87 |     def forward(self, input_image):
88 |         self.features = []
89 |         x = (input_image - 0.45) / 0.225
90 |         x = self.encoder.conv1(x)
91 |         x = self.encoder.bn1(x)
92 |         self.features.append(self.encoder.relu(x))
93 |         self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
94 |         self.features.append(self.encoder.layer2(self.features[-1]))
95 |         self.features.append(self.encoder.layer3(self.features[-1]))
96 |         self.features.append(self.encoder.layer4(self.features[-1]))
97 | 
98 |         return self.features


--------------------------------------------------------------------------------
/src/networks/layers/resnet_encoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
 2 | #
 3 | # This software is licensed under the terms of the Monodepth2 licence
 4 | # which allows for non-commercial use only, the full terms of which are made
 5 | # available in the LICENSE file.
 6 | 
 7 | from __future__ import absolute_import, division, print_function
 8 | 
 9 | import numpy as np
10 | 
11 | import torch
12 | import torch.nn as nn
13 | import torchvision.models as models
14 | import torch.utils.model_zoo as model_zoo
15 | 
16 | 
17 | class ResNetMultiImageInput(models.ResNet):
18 |     """Constructs a resnet model with varying number of input images.
19 |     Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
20 |     """
21 |     def __init__(self, block, layers, num_classes=1000, num_input_images=1):
22 |         super(ResNetMultiImageInput, self).__init__(block, layers)
23 |         self.inplanes = 64
24 |         self.conv1 = nn.Conv2d(
25 |             num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
26 |         self.bn1 = nn.BatchNorm2d(64)
27 |         self.relu = nn.ReLU(inplace=True)
28 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
29 |         self.layer1 = self._make_layer(block, 64, layers[0])
30 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
31 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
32 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
33 | 
34 |         for m in self.modules():
35 |             if isinstance(m, nn.Conv2d):
36 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
37 |             elif isinstance(m, nn.BatchNorm2d):
38 |                 nn.init.constant_(m.weight, 1)
39 |                 nn.init.constant_(m.bias, 0)
40 | 
41 | 
42 | def resnet_multiimage_input(num_layers, pretrained=False, num_input_images=1):
43 |     """Constructs a ResNet model.
44 |     Args:
45 |         num_layers (int): Number of resnet layers. Must be 18 or 50
46 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
47 |         num_input_images (int): Number of frames stacked as input
48 |     """
49 |     assert num_layers in [18, 50], "Can only run with 18 or 50 layer resnet"
50 |     blocks = {18: [2, 2, 2, 2], 50: [3, 4, 6, 3]}[num_layers]
51 |     block_type = {18: models.resnet.BasicBlock, 50: models.resnet.Bottleneck}[num_layers]
52 |     model = ResNetMultiImageInput(block_type, blocks, num_input_images=num_input_images)
53 | 
54 |     if pretrained:
55 |         loaded = model_zoo.load_url(models.resnet.model_urls['resnet{}'.format(num_layers)])
56 |         loaded['conv1.weight'] = torch.cat(
57 |             [loaded['conv1.weight']] * num_input_images, 1) / num_input_images
58 |         model.load_state_dict(loaded)
59 |     return model
60 | 
61 | 
62 | class ResnetEncoder(nn.Module):
63 |     """Pytorch module for a resnet encoder
64 |     """
65 |     def __init__(self, num_layers, pretrained, num_input_images=1):
66 |         super(ResnetEncoder, self).__init__()
67 | 
68 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
69 | 
70 |         resnets = {18: models.resnet18,
71 |                    34: models.resnet34,
72 |                    50: models.resnet50,
73 |                    101: models.resnet101,
74 |                    152: models.resnet152}
75 | 
76 |         if num_layers not in resnets:
77 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
78 | 
79 |         if num_input_images > 1:
80 |             self.encoder = resnet_multiimage_input(num_layers, pretrained, num_input_images)
81 |         else:
82 |             self.encoder = resnets[num_layers](pretrained)
83 | 
84 |         if num_layers > 34:
85 |             self.num_ch_enc[1:] *= 4
86 | 
87 |     def forward(self, input_image):
88 |         self.features = []
89 |         x = (input_image - 0.45) / 0.225
90 |         x = self.encoder.conv1(x)
91 |         x = self.encoder.bn1(x)
92 |         self.features.append(self.encoder.relu(x))
93 |         self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
94 |         self.features.append(self.encoder.layer2(self.features[-1]))
95 |         self.features.append(self.encoder.layer3(self.features[-1]))
96 |         self.features.append(self.encoder.layer4(self.features[-1]))
97 | 
98 |         return self.features
99 | 


--------------------------------------------------------------------------------
/src/networks/layers01.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2020 Toyota Research Institute.  All rights reserved.
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from functools import partial
  6 | import torch.nn.functional as F
  7 | 
  8 | ########################################################################################################################
  9 | activations = {
 10 |     'tanh'   :  nn.Tanh(),
 11 |     'sigmoid':  nn.Sigmoid(),
 12 |     'relu'   :  nn.ReLU(inplace=True),
 13 |     'linear' :  lambda x : x      
 14 | }
 15 | 
 16 | class Conv2D(nn.Module):
 17 |     """
 18 |     2D convolution with GroupNorm and ELU
 19 |     Parameters
 20 |     ----------
 21 |     in_channels : int
 22 |         Number of input channels
 23 |     out_channels : int
 24 |         Number of output channels
 25 |     kernel_size : int
 26 |         Kernel size
 27 |     stride : int
 28 |         Stride
 29 |     """
 30 |     def __init__(self, in_channels, out_channels, kernel_size, stride):
 31 |         super().__init__()
 32 |         self.kernel_size = kernel_size
 33 |         self.conv_base = nn.Conv2d(
 34 |             in_channels, out_channels, kernel_size=kernel_size, stride=stride)
 35 |         self.pad = nn.ConstantPad2d([kernel_size // 2] * 4, value=0)
 36 |         self.normalize = torch.nn.GroupNorm(16, out_channels)
 37 |         self.activ = nn.ELU(inplace=True)
 38 | 
 39 |     def forward(self, x):
 40 |         """Runs the Conv2D layer."""
 41 |         x = self.conv_base(self.pad(x))
 42 |         return self.activ(self.normalize(x))
 43 | 
 44 | 
 45 | class ResidualConv(nn.Module):
 46 |     """2D Convolutional residual block with GroupNorm and ELU"""
 47 |     def __init__(self, in_channels, out_channels, stride, dropout=None):
 48 |         """
 49 |         Initializes a ResidualConv object.
 50 |         Parameters
 51 |         ----------
 52 |         in_channels : int
 53 |             Number of input channels
 54 |         out_channels : int
 55 |             Number of output channels
 56 |         stride : int
 57 |             Stride
 58 |         dropout : float
 59 |             Dropout value
 60 |         """
 61 |         super().__init__()
 62 |         self.conv1 = Conv2D(in_channels, out_channels, 3, stride)
 63 |         self.conv2 = Conv2D(out_channels, out_channels, 3, 1)
 64 |         self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)
 65 |         self.normalize = torch.nn.GroupNorm(16, out_channels)
 66 |         self.activ = nn.ELU(inplace=True)
 67 | 
 68 |         if dropout:
 69 |             self.conv3 = nn.Sequential(self.conv3, nn.Dropout2d(dropout))
 70 | 
 71 |     def forward(self, x):
 72 |         """Runs the ResidualConv layer."""
 73 |         x_out = self.conv1(x)
 74 |         x_out = self.conv2(x_out)
 75 |         shortcut = self.conv3(x)
 76 |         return self.activ(self.normalize(x_out + shortcut))
 77 | 
 78 | 
 79 | def ResidualBlock(in_channels, out_channels, num_blocks, stride, dropout=None):
 80 |     """
 81 |     Returns a ResidualBlock with various ResidualConv layers.
 82 |     Parameters
 83 |     ----------
 84 |     in_channels : int
 85 |         Number of input channels
 86 |     out_channels : int
 87 |         Number of output channels
 88 |     num_blocks : int
 89 |         Number of residual blocks
 90 |     stride : int
 91 |         Stride
 92 |     dropout : float
 93 |         Dropout value
 94 |     """
 95 |     layers = [ResidualConv(in_channels, out_channels, stride, dropout=dropout)]
 96 |     for i in range(1, num_blocks):
 97 |         layers.append(ResidualConv(out_channels, out_channels, 1, dropout=dropout))
 98 |     return nn.Sequential(*layers)
 99 | 
100 | 
101 | class InvDepth(nn.Module):
102 |     """Inverse depth layer"""
103 |     def __init__(self, in_channels, out_channels=1, min_depth=0.5, activation = 'sigmoid'):
104 |         """
105 |         Initializes an InvDepth object.
106 |         Parameters
107 |         ----------
108 |         in_channels : int
109 |             Number of input channels
110 |         out_channels : int
111 |             Number of output channels
112 |         min_depth : float
113 |             Minimum depth value to calculate
114 |         """
115 |         super().__init__()
116 |         self.min_depth = min_depth
117 |         self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1)
118 |         self.pad = nn.ConstantPad2d([1] * 4, value=0)
119 |         # self.activ = nn.Sigmoid()
120 |         self.activ = activations[activation]    # changed it to dict because for albedo and ambient should use linear activations
121 | 
122 |     def forward(self, x):
123 |         """Runs the InvDepth layer."""
124 |         x = self.conv1(self.pad(x))
125 |         return self.activ(x) / self.min_depth
126 | 
127 | ########################################################################################################################
128 | 
129 | def packing(x, r=2):
130 |     """
131 |     Takes a [B,C,H,W] tensor and returns a [B,(r^2)C,H/r,W/r] tensor, by concatenating
132 |     neighbor spatial pixels as extra channels. It is the inverse of nn.PixelShuffle
133 |     (if you apply both sequentially you should get the same tensor)
134 |     Parameters
135 |     ----------
136 |     x : torch.Tensor [B,C,H,W]
137 |         Input tensor
138 |     r : int
139 |         Packing ratio
140 |     Returns
141 |     -------
142 |     out : torch.Tensor [B,(r^2)C,H/r,W/r]
143 |         Packed tensor
144 |     """
145 |     b, c, h, w = x.shape
146 |     out_channel = c * (r ** 2)
147 |     out_h, out_w = h // r, w // r
148 |     x = x.contiguous().view(b, c, out_h, r, out_w, r)
149 |     return x.permute(0, 1, 3, 5, 2, 4).contiguous().view(b, out_channel, out_h, out_w)
150 | 
151 | ########################################################################################################################
152 | 
153 | class PackLayerConv2d(nn.Module):
154 |     """
155 |     Packing layer with 2d convolutions. Takes a [B,C,H,W] tensor, packs it
156 |     into [B,(r^2)C,H/r,W/r] and then convolves it to produce [B,C,H/r,W/r].
157 |     """
158 |     def __init__(self, in_channels, kernel_size, r=2):
159 |         """
160 |         Initializes a PackLayerConv2d object.
161 |         Parameters
162 |         ----------
163 |         in_channels : int
164 |             Number of input channels
165 |         kernel_size : int
166 |             Kernel size
167 |         r : int
168 |             Packing ratio
169 |         """
170 |         super().__init__()
171 |         self.conv = Conv2D(in_channels * (r ** 2), in_channels, kernel_size, 1)
172 |         self.pack = partial(packing, r=r)
173 | 
174 |     def forward(self, x):
175 |         """Runs the PackLayerConv2d layer."""
176 |         x = self.pack(x)
177 |         x = self.conv(x)
178 |         return x
179 | 
180 | 
181 | class UnpackLayerConv2d(nn.Module):
182 |     """
183 |     Unpacking layer with 2d convolutions. Takes a [B,C,H,W] tensor, convolves it
184 |     to produce [B,(r^2)C,H,W] and then unpacks it to produce [B,C,rH,rW].
185 |     """
186 |     def __init__(self, in_channels, out_channels, kernel_size, r=2):
187 |         """
188 |         Initializes a UnpackLayerConv2d object.
189 |         Parameters
190 |         ----------
191 |         in_channels : int
192 |             Number of input channels
193 |         out_channels : int
194 |             Number of output channels
195 |         kernel_size : int
196 |             Kernel size
197 |         r : int
198 |             Packing ratio
199 |         """
200 |         super().__init__()
201 |         self.conv = Conv2D(in_channels, out_channels * (r ** 2), kernel_size, 1)
202 |         self.unpack = nn.PixelShuffle(r)
203 | 
204 |     def forward(self, x):
205 |         """Runs the UnpackLayerConv2d layer."""
206 |         x = self.conv(x)
207 |         x = self.unpack(x)
208 |         return x
209 | 
210 | ########################################################################################################################
211 | 
212 | class PackLayerConv3d(nn.Module):
213 |     """
214 |     Packing layer with 3d convolutions. Takes a [B,C,H,W] tensor, packs it
215 |     into [B,(r^2)C,H/r,W/r] and then convolves it to produce [B,C,H/r,W/r].
216 |     """
217 |     def __init__(self, in_channels, kernel_size, r=2, d=8):
218 |         """
219 |         Initializes a PackLayerConv3d object.
220 |         Parameters
221 |         ----------
222 |         in_channels : int
223 |             Number of input channels
224 |         kernel_size : int
225 |             Kernel size
226 |         r : int
227 |             Packing ratio
228 |         d : int
229 |             Number of 3D features
230 |         """
231 |         super().__init__()
232 |         self.conv = Conv2D(in_channels * (r ** 2) * d, in_channels, kernel_size, 1)
233 |         self.pack = partial(packing, r=r)
234 |         self.conv3d = nn.Conv3d(1, d, kernel_size=(3, 3, 3),
235 |                                 stride=(1, 1, 1), padding=(1, 1, 1))
236 | 
237 |     def forward(self, x):
238 |         """Runs the PackLayerConv3d layer."""
239 |         x = self.pack(x)
240 |         x = x.unsqueeze(1)
241 |         x = self.conv3d(x)
242 |         b, c, d, h, w = x.shape
243 |         x = x.view(b, c * d, h, w)
244 |         x = self.conv(x)
245 |         return x
246 | 
247 | 
248 | class UnpackLayerConv3d(nn.Module):
249 |     """
250 |     Unpacking layer with 3d convolutions. Takes a [B,C,H,W] tensor, convolves it
251 |     to produce [B,(r^2)C,H,W] and then unpacks it to produce [B,C,rH,rW].
252 |     """
253 |     def __init__(self, in_channels, out_channels, kernel_size, r=2, d=8):
254 |         """
255 |         Initializes a UnpackLayerConv3d object.
256 |         Parameters
257 |         ----------
258 |         in_channels : int
259 |             Number of input channels
260 |         out_channels : int
261 |             Number of output channels
262 |         kernel_size : int
263 |             Kernel size
264 |         r : int
265 |             Packing ratio
266 |         d : int
267 |             Number of 3D features
268 |         """
269 |         super().__init__()
270 |         self.conv = Conv2D(in_channels, out_channels * (r ** 2) // d, kernel_size, 1)
271 |         self.unpack = nn.PixelShuffle(r)
272 |         self.conv3d = nn.Conv3d(1, d, kernel_size=(3, 3, 3),
273 |                                 stride=(1, 1, 1), padding=(1, 1, 1))
274 | 
275 |     def forward(self, x):
276 |         """Runs the UnpackLayerConv3d layer."""
277 |         x = self.conv(x)
278 |         x = x.unsqueeze(1)
279 |         x = self.conv3d(x)
280 |         b, c, d, h, w = x.shape
281 |         x = x.view(b, c * d, h, w)
282 |         x = self.unpack(x)
283 |         return x


--------------------------------------------------------------------------------
/src/networks/pose/pose_cnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
 2 | #
 3 | # This software is licensed under the terms of the Monodepth2 licence
 4 | # which allows for non-commercial use only, the full terms of which are made
 5 | # available in the LICENSE file.
 6 | 
 7 | from __future__ import absolute_import, division, print_function
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | 
13 | class PoseCNN(nn.Module):
14 |     def __init__(self, num_input_frames):
15 |         super(PoseCNN, self).__init__()
16 | 
17 |         self.num_input_frames = num_input_frames
18 | 
19 |         self.convs = {}
20 |         self.convs[0] = nn.Conv2d(3 * num_input_frames, 16, 7, 2, 3)
21 |         self.convs[1] = nn.Conv2d(16, 32, 5, 2, 2)
22 |         self.convs[2] = nn.Conv2d(32, 64, 3, 2, 1)
23 |         self.convs[3] = nn.Conv2d(64, 128, 3, 2, 1)
24 |         self.convs[4] = nn.Conv2d(128, 256, 3, 2, 1)
25 |         self.convs[5] = nn.Conv2d(256, 256, 3, 2, 1)
26 |         self.convs[6] = nn.Conv2d(256, 256, 3, 2, 1)
27 | 
28 |         self.pose_conv = nn.Conv2d(256, 6 * (num_input_frames - 1), 1)
29 | 
30 |         self.num_convs = len(self.convs)
31 | 
32 |         self.relu = nn.ReLU(True)
33 | 
34 |         self.net = nn.ModuleList(list(self.convs.values()))
35 | 
36 |     def forward(self, out):
37 | 
38 |         for i in range(self.num_convs):
39 |             out = self.convs[i](out)
40 |             out = self.relu(out)
41 | 
42 |         out = self.pose_conv(out)
43 |         out = out.mean(3).mean(2)
44 | 
45 |         out = 0.01 * out.view(-1, self.num_input_frames - 1, 1, 6)
46 | 
47 |         axisangle = out[..., :3]
48 |         translation = out[..., 3:]
49 | 
50 |         return axisangle, translation
51 | 


--------------------------------------------------------------------------------
/src/networks/pose/pose_decoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
 2 | #
 3 | # This software is licensed under the terms of the Monodepth2 licence
 4 | # which allows for non-commercial use only, the full terms of which are made
 5 | # available in the LICENSE file.
 6 | 
 7 | from __future__ import absolute_import, division, print_function
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | from collections import OrderedDict
12 | 
13 | 
14 | class PoseDecoder(nn.Module):
15 |     def __init__(self, num_ch_enc, num_input_features, num_frames_to_predict_for=None, stride=1):
16 |         super(PoseDecoder, self).__init__()
17 | 
18 |         self.num_ch_enc = num_ch_enc
19 |         self.num_input_features = num_input_features
20 | 
21 |         if num_frames_to_predict_for is None:
22 |             num_frames_to_predict_for = num_input_features - 1
23 |         self.num_frames_to_predict_for = num_frames_to_predict_for
24 | 
25 |         self.convs = OrderedDict()
26 |         self.convs[("squeeze")] = nn.Conv2d(self.num_ch_enc[-1], 256, 1)
27 |         self.convs[("pose", 0)] = nn.Conv2d(num_input_features * 256, 256, 3, stride, 1)
28 |         self.convs[("pose", 1)] = nn.Conv2d(256, 256, 3, stride, 1)
29 |         self.convs[("pose", 2)] = nn.Conv2d(256, 6 * num_frames_to_predict_for, 1)
30 | 
31 |         self.relu = nn.ReLU()
32 | 
33 |         self.net = nn.ModuleList(list(self.convs.values()))
34 | 
35 |     def forward(self, input_features):
36 |         last_features = [f[-1] for f in input_features]
37 | 
38 |         cat_features = [self.relu(self.convs["squeeze"](f)) for f in last_features]
39 |         cat_features = torch.cat(cat_features, 1)
40 | 
41 |         out = cat_features
42 |         for i in range(3):
43 |             out = self.convs[("pose", i)](out)
44 |             if i != 2:
45 |                 out = self.relu(out)
46 | 
47 |         out = out.mean(3).mean(2)
48 | 
49 |         out = 0.01 * out.view(-1, self.num_frames_to_predict_for, 1, 6)
50 | 
51 |         axisangle = out[..., :3]
52 |         translation = out[..., 3:]
53 | 
54 |         return axisangle, translation
55 | 


--------------------------------------------------------------------------------
/src/networks/pose_cnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
 2 | #
 3 | # This software is licensed under the terms of the Monodepth2 licence
 4 | # which allows for non-commercial use only, the full terms of which are made
 5 | # available in the LICENSE file.
 6 | 
 7 | from __future__ import absolute_import, division, print_function
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | 
12 | 
13 | class PoseCNN(nn.Module):
14 |     def __init__(self, num_input_frames):
15 |         super(PoseCNN, self).__init__()
16 | 
17 |         self.num_input_frames = num_input_frames
18 | 
19 |         self.convs = {}
20 |         self.convs[0] = nn.Conv2d(3 * num_input_frames, 16, 7, 2, 3)
21 |         self.convs[1] = nn.Conv2d(16, 32, 5, 2, 2)
22 |         self.convs[2] = nn.Conv2d(32, 64, 3, 2, 1)
23 |         self.convs[3] = nn.Conv2d(64, 128, 3, 2, 1)
24 |         self.convs[4] = nn.Conv2d(128, 256, 3, 2, 1)
25 |         self.convs[5] = nn.Conv2d(256, 256, 3, 2, 1)
26 |         self.convs[6] = nn.Conv2d(256, 256, 3, 2, 1)
27 | 
28 |         self.pose_conv = nn.Conv2d(256, 6 * (num_input_frames - 1), 1)
29 | 
30 |         self.num_convs = len(self.convs)
31 | 
32 |         self.relu = nn.ReLU(True)
33 | 
34 |         self.net = nn.ModuleList(list(self.convs.values()))
35 | 
36 |     def forward(self, out):
37 | 
38 |         for i in range(self.num_convs):
39 |             out = self.convs[i](out)
40 |             out = self.relu(out)
41 | 
42 |         out = self.pose_conv(out)
43 |         out = out.mean(3).mean(2)
44 | 
45 |         out = 0.01 * out.view(-1, self.num_input_frames - 1, 1, 6)
46 | 
47 |         axisangle = out[..., :3]
48 |         translation = out[..., 3:]
49 | 
50 |         return axisangle, translation
51 | 


--------------------------------------------------------------------------------
/src/networks/pose_decoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
 2 | #
 3 | # This software is licensed under the terms of the Monodepth2 licence
 4 | # which allows for non-commercial use only, the full terms of which are made
 5 | # available in the LICENSE file.
 6 | 
 7 | from __future__ import absolute_import, division, print_function
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | from collections import OrderedDict
12 | 
13 | 
14 | class PoseDecoder(nn.Module):
15 |     def __init__(self, num_ch_enc, num_input_features, num_frames_to_predict_for=None, stride=1):
16 |         super(PoseDecoder, self).__init__()
17 | 
18 |         self.num_ch_enc = num_ch_enc
19 |         self.num_input_features = num_input_features
20 | 
21 |         if num_frames_to_predict_for is None:
22 |             num_frames_to_predict_for = num_input_features - 1
23 |         self.num_frames_to_predict_for = num_frames_to_predict_for
24 | 
25 |         self.convs = OrderedDict()
26 |         self.convs[("squeeze")] = nn.Conv2d(self.num_ch_enc[-1], 256, 1)
27 |         self.convs[("pose", 0)] = nn.Conv2d(num_input_features * 256, 256, 3, stride, 1)
28 |         self.convs[("pose", 1)] = nn.Conv2d(256, 256, 3, stride, 1)
29 |         self.convs[("pose", 2)] = nn.Conv2d(256, 6 * num_frames_to_predict_for, 1)
30 | 
31 |         self.relu = nn.ReLU()
32 | 
33 |         self.net = nn.ModuleList(list(self.convs.values()))
34 | 
35 |     def forward(self, input_features):
36 |         last_features = [f[-1] for f in input_features]
37 | 
38 |         cat_features = [self.relu(self.convs["squeeze"](f)) for f in last_features]
39 |         cat_features = torch.cat(cat_features, 1)
40 | 
41 |         out = cat_features
42 |         for i in range(3):
43 |             out = self.convs[("pose", i)](out)
44 |             if i != 2:
45 |                 out = self.relu(out)
46 | 
47 |         out = out.mean(3).mean(2)
48 | 
49 |         out = 0.01 * out.view(-1, self.num_frames_to_predict_for, 1, 6)
50 | 
51 |         axisangle = out[..., :3]
52 |         translation = out[..., 3:]
53 | 
54 |         return axisangle, translation
55 | 


--------------------------------------------------------------------------------
/src/networks/resnet_encoder.py:
--------------------------------------------------------------------------------
 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
 2 | #
 3 | # This software is licensed under the terms of the Monodepth2 licence
 4 | # which allows for non-commercial use only, the full terms of which are made
 5 | # available in the LICENSE file.
 6 | 
 7 | from __future__ import absolute_import, division, print_function
 8 | 
 9 | import numpy as np
10 | 
11 | import torch
12 | import torch.nn as nn
13 | import torchvision.models as models
14 | import torch.utils.model_zoo as model_zoo
15 | 
16 | 
17 | class ResNetMultiImageInput(models.ResNet):
18 |     """Constructs a resnet model with varying number of input images.
19 |     Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
20 |     """
21 |     def __init__(self, block, layers, num_classes=1000, num_input_images=1):
22 |         super(ResNetMultiImageInput, self).__init__(block, layers)
23 |         self.inplanes = 64
24 |         self.conv1 = nn.Conv2d(
25 |             num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
26 |         self.bn1 = nn.BatchNorm2d(64)
27 |         self.relu = nn.ReLU(inplace=True)
28 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
29 |         self.layer1 = self._make_layer(block, 64, layers[0])
30 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
31 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
32 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
33 | 
34 |         for m in self.modules():
35 |             if isinstance(m, nn.Conv2d):
36 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
37 |             elif isinstance(m, nn.BatchNorm2d):
38 |                 nn.init.constant_(m.weight, 1)
39 |                 nn.init.constant_(m.bias, 0)
40 | 
41 | 
42 | def resnet_multiimage_input(num_layers, pretrained=False, num_input_images=1):
43 |     """Constructs a ResNet model.
44 |     Args:
45 |         num_layers (int): Number of resnet layers. Must be 18 or 50
46 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
47 |         num_input_images (int): Number of frames stacked as input
48 |     """
49 |     assert num_layers in [18, 50], "Can only run with 18 or 50 layer resnet"
50 |     blocks = {18: [2, 2, 2, 2], 50: [3, 4, 6, 3]}[num_layers]
51 |     block_type = {18: models.resnet.BasicBlock, 50: models.resnet.Bottleneck}[num_layers]
52 |     model = ResNetMultiImageInput(block_type, blocks, num_input_images=num_input_images)
53 | 
54 |     if pretrained:
55 |         loaded = model_zoo.load_url(models.resnet.model_urls['resnet{}'.format(num_layers)])
56 |         loaded['conv1.weight'] = torch.cat(
57 |             [loaded['conv1.weight']] * num_input_images, 1) / num_input_images
58 |         model.load_state_dict(loaded)
59 |     return model
60 | 
61 | 
62 | class ResnetEncoder(nn.Module):
63 |     """Pytorch module for a resnet encoder
64 |     """
65 |     def __init__(self, num_layers, pretrained, num_input_images=1):
66 |         super(ResnetEncoder, self).__init__()
67 | 
68 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
69 | 
70 |         resnets = {18: models.resnet18,
71 |                    34: models.resnet34,
72 |                    50: models.resnet50,
73 |                    101: models.resnet101,
74 |                    152: models.resnet152}
75 | 
76 |         if num_layers not in resnets:
77 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
78 | 
79 |         if num_input_images > 1:
80 |             self.encoder = resnet_multiimage_input(num_layers, pretrained, num_input_images)
81 |         else:
82 |             self.encoder = resnets[num_layers](pretrained)
83 | 
84 |         if num_layers > 34:
85 |             self.num_ch_enc[1:] *= 4
86 | 
87 |     def forward(self, input_image):
88 |         self.features = []
89 |         x = (input_image - 0.45) / 0.225
90 |         x = self.encoder.conv1(x)
91 |         x = self.encoder.bn1(x)
92 |         self.features.append(self.encoder.relu(x))
93 |         self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
94 |         self.features.append(self.encoder.layer2(self.features[-1]))
95 |         self.features.append(self.encoder.layer3(self.features[-1]))
96 |         self.features.append(self.encoder.layer4(self.features[-1]))
97 | 
98 |         return self.features
99 | 


--------------------------------------------------------------------------------
/src/networks/resnet_encoder2.py:
--------------------------------------------------------------------------------
 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
 2 | #
 3 | # This software is licensed under the terms of the Monodepth2 licence
 4 | # which allows for non-commercial use only, the full terms of which are made
 5 | # available in the LICENSE file.
 6 | 
 7 | from __future__ import absolute_import, division, print_function
 8 | 
 9 | import numpy as np
10 | 
11 | import torch
12 | import torch.nn as nn
13 | import torchvision.models as models
14 | import torch.utils.model_zoo as model_zoo
15 | 
16 | 
17 | class ResNetMultiImageInput(models.ResNet):
18 |     """Constructs a resnet model with varying number of input images.
19 |     Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
20 |     """
21 |     def __init__(self, block, layers, num_classes=1000, num_input_images=1):
22 |         super(ResNetMultiImageInput, self).__init__(block, layers)
23 |         self.inplanes = 64
24 |         self.conv1 = nn.Conv2d(
25 |             num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False)
26 |         self.bn1 = nn.BatchNorm2d(64)
27 |         self.relu = nn.ReLU(inplace=True)
28 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
29 |         self.layer1 = self._make_layer(block, 64, layers[0])
30 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
31 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
32 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
33 | 
34 |         for m in self.modules():
35 |             if isinstance(m, nn.Conv2d):
36 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
37 |             elif isinstance(m, nn.BatchNorm2d):
38 |                 nn.init.constant_(m.weight, 1)
39 |                 nn.init.constant_(m.bias, 0)
40 | 
41 | 
42 | def resnet_multiimage_input(num_layers, pretrained=False, num_input_images=1):
43 |     """Constructs a ResNet model.
44 |     Args:
45 |         num_layers (int): Number of resnet layers. Must be 18 or 50
46 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
47 |         num_input_images (int): Number of frames stacked as input
48 |     """
49 |     assert num_layers in [18, 50], "Can only run with 18 or 50 layer resnet"
50 |     blocks = {18: [2, 2, 2, 2], 50: [3, 4, 6, 3]}[num_layers]
51 |     block_type = {18: models.resnet.BasicBlock, 50: models.resnet.Bottleneck}[num_layers]
52 |     model = ResNetMultiImageInput(block_type, blocks, num_input_images=num_input_images)
53 | 
54 |     if pretrained:
55 |         loaded = model_zoo.load_url(models.resnet.model_urls['resnet{}'.format(num_layers)])
56 |         loaded['conv1.weight'] = torch.cat(
57 |             [loaded['conv1.weight']] * num_input_images, 1) / num_input_images
58 |         model.load_state_dict(loaded)
59 |     return model
60 | 
61 | 
62 | class ResnetEncoder(nn.Module):
63 |     """Pytorch module for a resnet encoder
64 |     """
65 |     def __init__(self, num_layers, pretrained, num_input_images=1):
66 |         super(ResnetEncoder, self).__init__()
67 | 
68 |         self.num_ch_enc = np.array([64, 64, 128, 256, 512])
69 | 
70 |         resnets = {18: models.resnet18,
71 |                    34: models.resnet34,
72 |                    50: models.resnet50,
73 |                    101: models.resnet101,
74 |                    152: models.resnet152}
75 | 
76 |         if num_layers not in resnets:
77 |             raise ValueError("{} is not a valid number of resnet layers".format(num_layers))
78 | 
79 |         if num_input_images > 1:
80 |             self.encoder = resnet_multiimage_input(num_layers, pretrained, num_input_images)
81 |         else:
82 |             self.encoder = resnets[num_layers](pretrained)
83 | 
84 |         if num_layers > 34:
85 |             self.num_ch_enc[1:] *= 4
86 | 
87 |     def forward(self, input_image):
88 |         self.features = []
89 |         x = (input_image - 0.45) / 0.225
90 |         x = self.encoder.conv1(x)
91 |         x = self.encoder.bn1(x)
92 |         self.features.append(self.encoder.relu(x))
93 |         self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1])))
94 |         self.features.append(self.encoder.layer2(self.features[-1]))
95 |         self.features.append(self.encoder.layer3(self.features[-1]))
96 |         self.features.append(self.encoder.layer4(self.features[-1]))
97 | 
98 |         return self.features


--------------------------------------------------------------------------------
/src/options.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division, print_function
  2 | 
  3 | import os
  4 | import argparse
  5 | 
  6 | class GatedOptions:
  7 | 
  8 |     def __init__(self) -> None:
  9 |         self.parser = argparse.ArgumentParser(description="Depth From Gated Profile Options")
 10 | 
 11 |         # PATH options
 12 |         self.parser.add_argument("--data_dir",
 13 |                                     type=str,
 14 |                                     required=True,
 15 |                                     help="directory gated dataset")
 16 |         self.parser.add_argument("--log_dir",
 17 |                                     type=str,
 18 |                                     required=True,
 19 |                                     help="directory to store logs")
 20 |         self.parser.add_argument("--coeff_fpath",
 21 |                                     type=str,
 22 |                                     required=True,
 23 |                                     help="file with stored chebychev coefficients")
 24 |         self.parser.add_argument("--depth_flat_world_fpath",
 25 |                                     type=str,
 26 |                                     required=False,
 27 |                                     help="path to flat world npz file")
 28 |         
 29 |         # TRAINING options
 30 |         self.parser.add_argument("--model_name",
 31 |                                     type=str,
 32 |                                     help="the name of the folder to save the model in",
 33 |                                     default="gated2gated")
 34 |         self.parser.add_argument("--model_type",
 35 |                                     type=str,
 36 |                                     help="model structure to use",
 37 |                                     default="multinetwork",
 38 |                                     choices=["multinetwork","multioutput"])
 39 |         self.parser.add_argument("--depth_model",
 40 |                                     type=str,
 41 |                                     help="depth model to use",
 42 |                                     default="packnet",
 43 |                                     choices=["packnet","resnet","packnet_full"])
 44 |         self.parser.add_argument("--img_ext",
 45 |                                     type=str,
 46 |                                     help="image extension to use",
 47 |                                     default="png",
 48 |                                     choices=["png","tiff"])
 49 |         self.parser.add_argument("--exp_num",
 50 |                                     type=int,
 51 |                                     help="experiment number",
 52 |                                     default=-1)
 53 |         self.parser.add_argument("--exp_name",
 54 |                                     type=str,
 55 |                                     help="the name of the folder to save the model in",
 56 |                                     default="gated2gated")
 57 |         self.parser.add_argument("--exp_metainfo",
 58 |                                     type=str,
 59 |                                     default="Main Experiment",
 60 |                                     help="additional info regarding experiment")
 61 |         self.parser.add_argument("--height",
 62 |                                     type=int,
 63 |                                     default=512,
 64 |                                     help="crop height of the image")
 65 |         self.parser.add_argument("--width",
 66 |                                     type=int,
 67 |                                     default=1024,
 68 |                                     help="crop width of the image")
 69 |         self.parser.add_argument("--num_bits",
 70 |                                     type=int,
 71 |                                     help="number of bits for gated image intensity",
 72 |                                     default=10)
 73 |         self.parser.add_argument("--scales",
 74 |                                     nargs="+",
 75 |                                     type=int,
 76 |                                     help="scales used in the loss",
 77 |                                     default=[0,1,2,3])
 78 |         self.parser.add_argument("--frame_ids",
 79 |                                     nargs="+",
 80 |                                     type=int,
 81 |                                     help="frames to load",
 82 |                                     default=[0, -1, 1])
 83 |         self.parser.add_argument("--pose_model_type",
 84 |                                     type=str,
 85 |                                     help="normal or shared",
 86 |                                     default="separate_resnet",
 87 |                                     choices=["posecnn", "separate_resnet"])
 88 |         self.parser.add_argument("--num_layers",
 89 |                                     type=int,
 90 |                                     help="number of resnet layers",
 91 |                                     default=18,
 92 |                                     choices=[18, 34, 50, 101, 152])
 93 |         self.parser.add_argument("--weights_init",
 94 |                                     type=str,
 95 |                                     help="pretrained or scratch",
 96 |                                     default="pretrained",
 97 |                                     choices=["pretrained", "scratch"])
 98 |         self.parser.add_argument("--pose_model_input",
 99 |                                     type=str,
100 |                                     help="how many images the pose network gets",
101 |                                     default="pairs",
102 |                                     choices=["pairs", "all"])
103 |         self.parser.add_argument("--depth_normalizer",
104 |                                     type=float,
105 |                                     help="constant to normalize depth",
106 |                                     default=150.0)
107 |         self.parser.add_argument("--train_depth_normalizer",
108 |                                     action='store_true',
109 |                                     help="train only a single scalar constant,\
110 |                                           while  freezing depth, pose, ambient, and albedo head")
111 |         self.parser.add_argument("--min_depth",
112 |                                     type=float,
113 |                                     help="minimum depth",
114 |                                     default=0.1)
115 |         self.parser.add_argument("--max_depth",
116 |                                     type=float,
117 |                                     help="maximum depth",
118 |                                     default=100.0)
119 |         self.parser.add_argument("--snr_mask",
120 |                                     action='store_true',
121 |                                     help="whether to use SNR based mask for reprojection loss")
122 |         self.parser.add_argument("--intensity_mask",
123 |                                     action='store_true',
124 |                                     help="whether to use Intensity based mask for reprojection loss")
125 |         self.parser.add_argument("--min_snr_val",
126 |                                     type=float,
127 |                                     default=0.04,
128 |                                     help="Minimum SNR value for SNR mask")
129 |         self.parser.add_argument("--dataset",
130 |                                     type=str,
131 |                                     help="dataset to train on",
132 |                                     default="gated",
133 |                                     choices=["gated"])
134 |         self.parser.add_argument("--split",
135 |                                     type=str,
136 |                                     help="which training split to use",
137 |                                     choices=["gated2gated"],
138 |                                     default="gated2gated")
139 |         self.parser.add_argument("--dropout",
140 |                                     type=float,
141 |                                     help="dropout rate for packnet",
142 |                                     default=0.5)   
143 |         self.parser.add_argument("--feat_stack",
144 |                                     type=str,
145 |                                     help="whether to use concatenation(A) or Addition (B)",
146 |                                     default="A",
147 |                                     choices=["A", "B"])
148 |         self.parser.add_argument("--num_convs",
149 |                                     type=int,
150 |                                     help="number of up/down levels in UNet",
151 |                                     default=4)
152 | 
153 |         # OPTIMIZATION OPTION
154 |         self.parser.add_argument("--batch_size",
155 |                                     type=int,
156 |                                     help="batch size",
157 |                                     default=1)
158 |         self.parser.add_argument("--learning_rate",
159 |                                     type=float,
160 |                                     help="learning rate",
161 |                                     default=1e-4)
162 |         self.parser.add_argument("--start_epoch",
163 |                                     type=int,
164 |                                     help="start epoch to have non-zero starting option for continuing training",
165 |                                     default=0)
166 |         self.parser.add_argument("--num_epochs",
167 |                                     type=int,
168 |                                     help="number of epochs",
169 |                                     default=20)
170 |         self.parser.add_argument("--scheduler_step_size",
171 |                                     type=int,
172 |                                     help="step size of the scheduler",
173 |                                     default=15)    
174 | 
175 |         # LOADING options
176 |         self.parser.add_argument("--load_weights_folder",
177 |                                     type=str,
178 |                                     help="name of model to load")
179 |         self.parser.add_argument("--models_to_load",
180 |                                     nargs="+",
181 |                                     type=str,
182 |                                     help="models to load",
183 |                                     default=["depth", "pose_encoder", "pose"])
184 | 
185 |         # ABLATION options
186 |         self.parser.add_argument("--no_ssim",
187 |                                     action="store_true",
188 |                                     help="if not to use SSIM loss")
189 |         self.parser.add_argument("--cycle_loss",
190 |                                     help="if set, cycle loss is used",
191 |                                     action="store_true")
192 |         self.parser.add_argument("--cycle_weight",
193 |                                     type=float,
194 |                                     default=0.1,
195 |                                     help="cycle loss weight")
196 |         self.parser.add_argument("--temporal_loss",
197 |                                     help="if set, temporal reprojection loss is used",
198 |                                     action="store_true")
199 |         self.parser.add_argument("--temporal_weight",
200 |                                     type=float,
201 |                                     default=1.0,
202 |                                     help="temporal loss weight")
203 |         self.parser.add_argument("--sim_gated",
204 |                                     action="store_true",
205 |                                     help="whether to generate gated simulation image")
206 |         self.parser.add_argument("--disparity_smoothness",
207 |                                     type=float,
208 |                                     default=1e-3,
209 |                                     help="disparity smoothnes weight")
210 |         self.parser.add_argument("--v1_multiscale",
211 |                                     help="if set, uses monodepth v1 multiscale",
212 |                                     action="store_true")
213 |         self.parser.add_argument("--disable_automasking",
214 |                                     help="if set, doesn't do auto-masking",
215 |                                     action="store_true")
216 |         self.parser.add_argument("--avg_reprojection",
217 |                                     help="if set, uses average reprojection loss",
218 |                                     action="store_true")
219 |         self.parser.add_argument("--infty_hole_mask",
220 |                                     help="if set, uses a masking scheme to filter out points with infinite depth close to camera",
221 |                                     action="store_true")
222 |         self.parser.add_argument("--infty_epoch_start",
223 |                                     type=int,
224 |                                     help="start epoch to use infinity masks",
225 |                                     default=0)
226 |         self.parser.add_argument("--close_px_fact",
227 |                                     type=float,
228 |                                     help="factor to select close pixels to the image",
229 |                                     default=0.995)
230 |         self.parser.add_argument("--infty_hole_thresh",
231 |                                     type=float,
232 |                                     help="threshold to consider infinity points",
233 |                                     default=0.01)
234 |         self.parser.add_argument("--use_batchnorm",
235 |                                     action="store_true",
236 |                                     help="whether to use batchnorm2D in packnet module or not")
237 |         self.parser.add_argument("--albedo_offset",
238 |                                     type=float,
239 |                                     default=0.0,
240 |                                     help="constant factor to add to albedo to avoid gradient cutoff")
241 |         self.parser.add_argument("--freeze_pose_net",
242 |                                     action="store_true",
243 |                                     help="whether to freeze the training for pose network")
244 |         self.parser.add_argument("--clip_depth_grad",
245 |                                     type=float,
246 |                                     default=-1.0,
247 |                                     help="clip depth gradient to a certain value if value > 0")
248 |         self.parser.add_argument("--passive_supervision",
249 |                                     action="store_true",
250 |                                     help="supervise learning of passive image with real one")
251 |         self.parser.add_argument("--passive_weight",
252 |                                     type=float,
253 |                                     default=0.1,
254 |                                     help="passive supervision loss weight")
255 | 
256 | 
257 | 
258 |         # LOGGING options
259 |         self.parser.add_argument("--log_frequency",
260 |                                     type=int,
261 |                                     help="number of batches between each tensorboard log",
262 |                                     default=250)
263 |         self.parser.add_argument("--chkpt_frequency",
264 |                                     type=int,
265 |                                     help="number of batches between each checkpoint",
266 |                                     default=250)
267 |         self.parser.add_argument("--save_frequency",
268 |                                     type=int,
269 |                                     help="number of epochs between each save",
270 |                                     default=1)                
271 | 
272 |         # SYSTEM options
273 |         self.parser.add_argument("--no_cuda",
274 |                                     action="store_true",
275 |                                     help="whether to train on cpu")
276 |         self.parser.add_argument("--num_workers",
277 |                                     type=int,
278 |                                     help="number of dataloader workers",
279 |                                     default=12)
280 | 
281 |     def parse(self):
282 |         self.options = self.parser.parse_args()
283 |         return self.options


--------------------------------------------------------------------------------
/src/splits/g2d/real_test_night.txt:
--------------------------------------------------------------------------------
  1 | 03644
  2 | 13755
  3 | 04165
  4 | 13006
  5 | 09623
  6 | 03742
  7 | 09270
  8 | 00874
  9 | 05966
 10 | 13029
 11 | 12142
 12 | 00138
 13 | 09779
 14 | 07951
 15 | 12413
 16 | 06752
 17 | 11152
 18 | 11024
 19 | 04942
 20 | 00964
 21 | 12826
 22 | 05791
 23 | 09303
 24 | 01799
 25 | 10436
 26 | 06197
 27 | 09090
 28 | 13733
 29 | 11777
 30 | 10449
 31 | 11209
 32 | 03295
 33 | 02852
 34 | 06895
 35 | 06575
 36 | 01253
 37 | 12354
 38 | 04187
 39 | 01204
 40 | 06426
 41 | 00945
 42 | 00288
 43 | 08479
 44 | 00413
 45 | 08438
 46 | 07191
 47 | 09894
 48 | 11819
 49 | 04479
 50 | 02473
 51 | 08333
 52 | 05552
 53 | 03305
 54 | 08476
 55 | 12835
 56 | 07141
 57 | 08959
 58 | 08752
 59 | 12602
 60 | 13369
 61 | 10802
 62 | 08617
 63 | 10324
 64 | 09828
 65 | 07534
 66 | 05064
 67 | 06681
 68 | 06976
 69 | 01064
 70 | 07021
 71 | 08254
 72 | 03530
 73 | 07456
 74 | 07022
 75 | 02907
 76 | 07417
 77 | 00202
 78 | 09022
 79 | 08031
 80 | 00592
 81 | 13282
 82 | 03480
 83 | 05289
 84 | 14034
 85 | 00443
 86 | 07800
 87 | 06006
 88 | 01195
 89 | 09731
 90 | 09072
 91 | 11721
 92 | 06015
 93 | 03574
 94 | 01188
 95 | 00855
 96 | 11030
 97 | 05426
 98 | 03502
 99 | 05755
100 | 06326
101 | 04955
102 | 08156
103 | 03622
104 | 12140
105 | 07580
106 | 05063
107 | 08492
108 | 10209
109 | 07182
110 | 10817
111 | 11186
112 | 07814
113 | 01001
114 | 13637
115 | 01040
116 | 01049
117 | 09269
118 | 05045
119 | 04138
120 | 01862
121 | 01813
122 | 10329
123 | 07442
124 | 01137
125 | 01770
126 | 08023
127 | 10462
128 | 13049
129 | 06518
130 | 09079
131 | 02287
132 | 10700
133 | 09373
134 | 00556
135 | 01238
136 | 06025
137 | 02027
138 | 00223
139 | 12565
140 | 08494
141 | 11643
142 | 03613
143 | 07175
144 | 00391
145 | 11109
146 | 09506
147 | 08771
148 | 06085
149 | 06279
150 | 04855
151 | 02216
152 | 03833
153 | 09377
154 | 10346
155 | 12553
156 | 00463
157 | 11377
158 | 12235
159 | 05652
160 | 09985
161 | 00757
162 | 12369
163 | 07703
164 | 03786
165 | 13726
166 | 08196
167 | 01093
168 | 09749
169 | 01155
170 | 06659
171 | 09581
172 | 11536
173 | 02253
174 | 05728
175 | 12657
176 | 05614
177 | 09493
178 | 11420
179 | 04264
180 | 12035
181 | 05004
182 | 03885
183 | 06563
184 | 08454
185 | 01996
186 | 00245
187 | 07184
188 | 11153
189 | 05486
190 | 09535
191 | 01035
192 | 13365
193 | 01812
194 | 09965
195 | 01473
196 | 04584
197 | 10787
198 | 12899
199 | 02646
200 | 03605
201 | 08045
202 | 06802
203 | 00346
204 | 09748
205 | 12089
206 | 07352
207 | 03694
208 | 11317
209 | 11895
210 | 03256
211 | 04938
212 | 09005
213 | 08224
214 | 11114
215 | 13349
216 | 06488
217 | 00049
218 | 07223
219 | 09375
220 | 09781
221 | 01841
222 | 03221
223 | 13339
224 | 06169
225 | 05385
226 | 11710
227 | 10720
228 | 06728
229 | 06259
230 | 05205
231 | 04364
232 | 01741
233 | 01756
234 | 12526
235 | 04836
236 | 11018
237 | 00835
238 | 13400
239 | 09097
240 | 09230
241 | 03873
242 | 09808
243 | 04723
244 | 05113
245 | 08985
246 | 07053
247 | 01157
248 | 08638
249 | 09637
250 | 10099
251 | 13140
252 | 13940
253 | 10851
254 | 09410
255 | 03033
256 | 02863
257 | 06655
258 | 05896
259 | 13051
260 | 12838
261 | 09147
262 | 05663
263 | 07894
264 | 04806
265 | 01976
266 | 06213
267 | 06859
268 | 06520
269 | 13245
270 | 12610
271 | 02681
272 | 02924
273 | 11519
274 | 00038
275 | 01233
276 | 13915
277 | 05760
278 | 07812
279 | 13933
280 | 13898
281 | 06186
282 | 10225
283 | 01207
284 | 03707
285 | 14166
286 | 07843
287 | 08891
288 | 04380
289 | 04746
290 | 01376
291 | 12389
292 | 07981
293 | 07162
294 | 04915
295 | 12400
296 | 10579
297 | 07711
298 | 06458
299 | 11282
300 | 13265
301 | 10913
302 | 02034
303 | 02416
304 | 13126
305 | 00424
306 | 12079
307 | 13845
308 | 13852
309 | 07611
310 | 09666
311 | 04845
312 | 01142
313 | 02259
314 | 07001
315 | 03900
316 | 10155
317 | 09793
318 | 05066
319 | 03593
320 | 07623
321 | 13936
322 | 06072
323 | 10159
324 | 00317
325 | 11598
326 | 00981
327 | 14009
328 | 05003
329 | 11920
330 | 02752
331 | 01790
332 | 05986
333 | 04818
334 | 01044
335 | 08341
336 | 09981
337 | 08839
338 | 03050
339 | 10786
340 | 11028
341 | 00531
342 | 02675
343 | 00262
344 | 07775
345 | 11336
346 | 10316
347 | 12299
348 | 06244
349 | 00931
350 | 13685
351 | 10219
352 | 05023
353 | 02339
354 | 13641
355 | 04106
356 | 00330
357 | 12941
358 | 06863
359 | 10166
360 | 10130
361 | 13743
362 | 05420
363 | 02405
364 | 10707
365 | 00126
366 | 11216
367 | 03405
368 | 08376
369 | 10789
370 | 03532
371 | 05001
372 | 07140
373 | 12867
374 | 11411
375 | 04856
376 | 00411
377 | 06667
378 | 03752
379 | 05255
380 | 03814
381 | 02791
382 | 01763
383 | 12467
384 | 03273
385 | 13213
386 | 03382
387 | 11113
388 | 00515
389 | 12108
390 | 11971
391 | 01472
392 | 02387
393 | 05079
394 | 07507
395 | 08313
396 | 01348
397 | 10147
398 | 03025
399 | 06246
400 | 12932
401 | 11250
402 | 01469
403 | 07201
404 | 08437
405 | 00745
406 | 12272
407 | 00289
408 | 03965
409 | 05790
410 | 05703
411 | 02871
412 | 10342
413 | 04210
414 | 03421
415 | 08741
416 | 09654
417 | 02459
418 | 09372
419 | 13554
420 | 01997
421 | 08838
422 | 02468
423 | 08511
424 | 03464
425 | 04733
426 | 08026
427 | 04554
428 | 13035
429 | 06555
430 | 10526
431 | 14074
432 | 02593
433 | 07626
434 | 05019
435 | 01899
436 | 07226
437 | 01562
438 | 09058
439 | 05425
440 | 05596
441 | 04766
442 | 00769
443 | 03048
444 | 00046
445 | 13899
446 | 01941
447 | 11001
448 | 08750
449 | 00846
450 | 07654
451 | 07503
452 | 06523
453 | 08330
454 | 00956
455 | 09611
456 | 05938
457 | 10542
458 | 04424
459 | 02637
460 | 13355
461 | 11363
462 | 10274
463 | 06691
464 | 00260
465 | 13267
466 | 03330
467 | 13358
468 | 05491
469 | 02081
470 | 08798
471 | 07249
472 | 04778
473 | 05439
474 | 07434
475 | 08169
476 | 12273
477 | 09776
478 | 11376
479 | 11832
480 | 00573
481 | 06249
482 | 03544
483 | 09877
484 | 03261
485 | 08610
486 | 07440
487 | 12300
488 | 04224
489 | 04136
490 | 13048
491 | 07419
492 | 04830
493 | 04058
494 | 09547
495 | 02773
496 | 06528
497 | 04833
498 | 08337
499 | 05917
500 | 13600
501 | 08822
502 | 04320
503 | 12953
504 | 05695
505 | 03385
506 | 14002
507 | 05964
508 | 08911
509 | 00339
510 | 08325
511 | 07567
512 | 03817
513 | 04100
514 | 05237
515 | 04085
516 | 02330
517 | 02554
518 | 13458
519 | 07104
520 | 06208
521 | 03245
522 | 05612
523 | 06032
524 | 07562
525 | 05139
526 | 11664
527 | 13077
528 | 09105
529 | 10463
530 | 02234
531 | 01944
532 | 11882
533 | 06394
534 | 10759
535 | 04756
536 | 12840
537 | 01696
538 | 10265
539 | 12327
540 | 06041
541 | 07121
542 | 00595
543 | 03040
544 | 00553
545 | 10616
546 | 04172
547 | 05397
548 | 05307
549 | 04227
550 | 02621
551 | 


--------------------------------------------------------------------------------
/src/splits/stf/test_dense_fog_day.txt:
--------------------------------------------------------------------------------
  1 | 2018-10-29_16-12-02,01200
  2 | 2018-10-29_14-30-29,02460
  3 | 2018-10-29_14-30-29,00590
  4 | 2018-10-29_14-30-29,00460
  5 | 2018-10-29_14-30-29,00910
  6 | 2018-10-08_08-10-40,03020
  7 | 2018-10-29_14-30-29,00500
  8 | 2018-10-08_08-10-40,00650
  9 | 2018-10-29_14-30-29,01680
 10 | 2018-10-08_08-10-40,03590
 11 | 2018-10-08_08-10-40,03050
 12 | 2018-10-08_08-10-40,03500
 13 | 2018-10-29_14-30-29,02490
 14 | 2018-10-29_14-30-29,00600
 15 | 2018-10-08_08-18-59,00130
 16 | 2018-10-08_08-10-40,03110
 17 | 2018-10-29_14-30-29,01560
 18 | 2018-10-08_08-10-40,03680
 19 | 2018-10-29_14-30-29,00660
 20 | 2018-10-08_08-10-40,03370
 21 | 2018-10-08_08-10-40,02900
 22 | 2018-10-29_14-30-29,02640
 23 | 2018-10-08_08-10-40,03200
 24 | 2018-10-29_14-30-29,01200
 25 | 2018-10-08_08-10-40,02400
 26 | 2018-10-08_08-10-40,04200
 27 | 2018-10-29_14-30-29,02420
 28 | 2018-10-29_14-30-29,01210
 29 | 2018-10-29_14-30-29,01030
 30 | 2018-10-29_14-30-29,01740
 31 | 2018-10-29_14-30-29,02510
 32 | 2018-10-29_14-30-29,01730
 33 | 2018-10-29_14-30-29,00550
 34 | 2018-10-29_14-30-29,01320
 35 | 2018-10-29_14-30-29,02520
 36 | 2018-10-29_14-30-29,02450
 37 | 2018-10-29_14-30-29,02100
 38 | 2018-10-08_08-10-40,03150
 39 | 2018-10-29_14-30-29,01630
 40 | 2018-10-29_15-15-15,01210
 41 | 2018-10-08_08-10-40,03230
 42 | 2018-10-08_08-18-59,00300
 43 | 2018-10-08_08-10-40,03120
 44 | 2018-10-29_14-30-29,00630
 45 | 2018-10-29_14-30-29,01550
 46 | 2018-10-29_14-30-29,01250
 47 | 2018-10-29_14-30-29,02190
 48 | 2018-10-08_08-10-40,03160
 49 | 2018-10-29_14-30-29,02600
 50 | 2018-10-29_14-30-29,02350
 51 | 2018-10-08_08-10-40,03420
 52 | 2018-10-08_08-10-40,02630
 53 | 2018-10-29_14-30-29,02070
 54 | 2018-10-29_15-15-15,01500
 55 | 2018-10-29_14-30-29,01460
 56 | 2018-10-08_08-10-40,04700
 57 | 2018-10-29_15-37-43,01800
 58 | 2018-10-29_15-15-15,01900
 59 | 2018-10-29_14-30-29,00450
 60 | 2018-10-29_16-12-02,01300
 61 | 2018-10-29_14-30-29,00920
 62 | 2018-10-29_14-30-29,01770
 63 | 2018-10-29_14-30-29,00510
 64 | 2018-10-29_14-30-29,01910
 65 | 2018-10-29_14-30-29,01690
 66 | 2018-10-29_14-30-29,01810
 67 | 2018-10-08_08-10-40,03040
 68 | 2018-10-29_14-30-29,02480
 69 | 2018-10-08_08-18-59,00120
 70 | 2018-10-29_14-30-29,01350
 71 | 2018-10-29_14-30-29,02250
 72 | 2018-10-29_15-15-15,00900
 73 | 2018-10-29_15-15-15,01520
 74 | 2018-10-08_08-10-40,03300
 75 | 2018-10-08_08-10-40,02640
 76 | 2018-10-29_14-30-29,01820
 77 | 2018-10-08_08-10-40,03260
 78 | 2018-10-08_08-18-59,00370
 79 | 2018-10-29_14-30-29,02630
 80 | 2018-10-08_08-10-40,03600
 81 | 2018-10-29_14-30-29,00700
 82 | 2018-10-08_08-10-40,03410
 83 | 2018-10-08_08-10-40,03460
 84 | 2018-10-29_14-30-29,02580
 85 | 2018-10-29_14-30-29,00800
 86 | 2018-10-29_14-30-29,01920
 87 | 2018-10-29_15-37-43,01200
 88 | 2018-10-08_08-10-40,02730
 89 | 2018-10-29_14-30-29,01000
 90 | 2018-10-29_15-15-15,01800
 91 | 2018-10-29_14-30-29,01790
 92 | 2018-10-29_14-30-29,01720
 93 | 2018-10-29_14-30-29,00560
 94 | 2018-10-29_14-30-29,00480
 95 | 2018-10-29_14-30-29,02440
 96 | 2018-10-29_14-30-29,01310
 97 | 2018-10-29_14-30-29,01640
 98 | 2018-10-08_08-10-40,03000
 99 | 2018-10-29_15-15-15,01200
100 | 2018-10-08_08-10-40,03640
101 | 2018-10-08_08-10-40,03520
102 | 2018-10-08_08-10-40,03290
103 | 2018-10-08_08-10-40,02320
104 | 2018-10-29_14-30-29,02200
105 | 2018-10-08_08-10-40,02600
106 | 2018-10-08_08-10-40,03730
107 | 2018-10-08_08-10-40,00520
108 | 2018-10-08_08-10-40,02520
109 | 2018-10-29_15-15-15,01510
110 | 2018-10-08_08-10-40,03100
111 | 2018-10-08_08-10-40,02440
112 | 2018-10-29_15-37-43,01400
113 | 2018-10-29_14-30-29,00930
114 | 2018-10-29_14-30-29,02400
115 | 2018-10-29_15-15-15,01560
116 | 2018-10-29_14-30-29,00520
117 | 2018-10-08_08-10-40,04600
118 | 2018-10-29_14-30-29,01760
119 | 2018-10-08_08-10-40,03330
120 | 2018-10-08_08-10-40,03570
121 | 2018-10-29_14-30-29,01900
122 | 2018-10-29_14-30-29,01340
123 | 2018-10-08_08-18-59,00110
124 | 2018-10-29_14-30-29,01220
125 | 2018-10-29_14-30-29,00620
126 | 2018-10-08_08-10-40,03170
127 | 2018-10-08_08-10-40,04500
128 | 2018-10-08_08-10-40,00510
129 | 2018-10-29_15-15-15,00890
130 | 2018-10-29_14-30-29,00710
131 | 2018-10-08_08-10-40,00670
132 | 2018-10-29_14-30-29,00650
133 | 2018-10-08_08-10-40,03670
134 | 2018-10-08_08-10-40,02950
135 | 2018-10-08_08-10-40,04000
136 | 2018-10-08_08-10-40,03400
137 | 2018-10-08_08-10-40,02550
138 | 2018-10-29_14-30-29,01010
139 | 2018-10-08_08-10-40,03900
140 | 2018-10-29_14-30-29,01400
141 | 2018-10-29_14-30-29,02570
142 | 2018-10-29_14-30-29,01780
143 | 2018-10-29_14-30-29,00580
144 | 2018-10-29_14-30-29,00780
145 | 2018-10-29_14-30-29,01710
146 | 2018-10-29_14-30-29,00900
147 | 2018-10-08_08-10-40,03030
148 | 2018-10-29_14-30-29,02470
149 | 2018-10-29_14-30-29,00570
150 | 2018-10-29_14-30-29,01300
151 | 2018-10-29_14-30-29,01650
152 | 2018-10-29_15-15-15,01220
153 | 2018-10-08_08-10-40,00630
154 | 2018-10-29_14-30-29,00610
155 | 2018-10-08_08-10-40,03530
156 | 2018-10-08_08-18-59,00140
157 | 2018-10-29_15-37-43,02800
158 | 2018-10-29_14-30-29,02390
159 | 2018-10-08_08-10-40,03360
160 | 2018-10-29_14-30-29,02300
161 | 2018-10-08_08-10-40,02610
162 | 2018-10-08_08-10-40,03800
163 | 2018-10-29_14-30-29,00720
164 | 2018-10-08_08-18-59,00320
165 | 2018-10-29_14-30-29,02090
166 | 2018-10-29_15-15-15,01190
167 | 2018-10-29_14-30-29,00670
168 | 2018-10-29_14-30-29,02000
169 | 2018-10-29_14-30-29,02430
170 | 2018-10-29_14-30-29,00940
171 | 2018-10-29_16-12-02,01360
172 | 2018-10-29_14-30-29,02530
173 | 2018-10-29_14-30-29,01750
174 | 2018-10-29_14-30-29,00530
175 | 2018-10-29_14-30-29,01450
176 | 2018-10-08_08-10-40,04900
177 | 2018-10-29_14-30-29,00790
178 | 2018-10-29_14-30-29,00540
179 | 2018-10-08_08-18-59,00100
180 | 2018-10-29_14-30-29,01330
181 | 2018-10-29_14-30-29,01620
182 | 2018-10-29_14-30-29,02500
183 | 2018-10-08_08-10-40,03710
184 | 2018-10-08_08-18-59,00310
185 | 2018-10-29_14-30-29,01800
186 | 2018-10-29_14-30-29,02080
187 | 2018-10-29_14-30-29,00640
188 | 2018-10-29_15-37-43,02200
189 | 2018-10-08_08-10-40,02300
190 | 2018-10-29_15-15-15,01910
191 | 2018-10-08_08-10-40,04400
192 | 2018-10-29_16-12-02,01290
193 | 2018-10-08_08-10-40,02530
194 | 2018-10-08_08-18-59,03300
195 | 2018-10-08_08-10-40,03700
196 | 2018-10-08_08-10-40,02620


--------------------------------------------------------------------------------
/src/splits/stf/test_dense_fog_night.txt:
--------------------------------------------------------------------------------
  1 | 2018-02-07_18-39-52,00300
  2 | 2018-12-12_15-21-22,00300
  3 | 2018-12-12_15-21-22,02700
  4 | 2018-12-12_15-21-22,00900
  5 | 2018-12-12_15-32-51,01000
  6 | 2018-12-14_15-36-31,02800
  7 | 2018-02-07_18-20-02,00320
  8 | 2018-02-07_18-06-31,00150
  9 | 2018-12-12_15-32-51,01300
 10 | 2019-01-09_08-07-00,00800
 11 | 2018-02-07_18-39-52,00380
 12 | 2018-12-12_15-30-45,00200
 13 | 2018-12-12_15-30-45,00100
 14 | 2018-02-07_18-39-52,00200
 15 | 2018-12-12_15-21-22,01800
 16 | 2018-12-12_15-21-22,01600
 17 | 2018-12-12_15-30-45,00000
 18 | 2018-12-12_15-30-45,00400
 19 | 2018-02-07_18-39-52,00220
 20 | 2018-12-14_15-36-31,00900
 21 | 2018-12-12_15-21-22,01400
 22 | 2018-02-07_18-20-02,00310
 23 | 2018-12-12_15-30-45,00300
 24 | 2018-12-12_15-21-22,01500
 25 | 2018-02-07_18-25-17,00100
 26 | 2018-02-07_18-39-52,00390
 27 | 2018-02-07_18-06-31,00120
 28 | 2018-02-07_18-39-52,00280
 29 | 2018-12-14_15-36-31,00600
 30 | 2018-02-07_17-56-35,00130
 31 | 2018-12-12_15-21-22,01200
 32 | 2018-12-14_15-36-31,03300
 33 | 2018-02-07_18-25-17,00060
 34 | 2018-12-12_15-32-51,00700
 35 | 2018-12-14_15-36-31,00300
 36 | 2018-12-12_15-21-22,02100
 37 | 2018-12-12_15-32-51,00900
 38 | 2018-12-12_15-21-22,00000
 39 | 2018-02-07_18-39-52,00370
 40 | 2018-12-14_15-36-31,02900
 41 | 2018-02-07_18-39-52,00250
 42 | 2018-02-07_18-39-52,00320
 43 | 2018-12-12_15-32-51,00400
 44 | 2018-12-12_15-30-45,00800
 45 | 2018-12-12_15-21-22,00400
 46 | 2018-02-07_18-20-02,00000
 47 | 2018-12-12_15-32-51,01200
 48 | 2018-12-12_15-21-22,01100
 49 | 2018-12-14_15-36-31,01100
 50 | 2018-12-12_15-30-45,00700
 51 | 2018-02-07_18-20-02,00300
 52 | 2018-12-12_15-21-22,03300
 53 | 2018-02-07_18-25-17,00170
 54 | 2018-02-07_17-56-35,00120
 55 | 2018-12-12_15-32-51,00300
 56 | 2018-12-12_15-21-22,01000
 57 | 2018-12-14_15-36-31,03100
 58 | 2018-12-12_15-21-22,02500
 59 | 2018-12-14_15-36-31,00400
 60 | 2018-12-12_15-32-51,00000
 61 | 2018-12-12_15-21-22,02400
 62 | 2018-02-07_18-06-31,00130
 63 | 2018-12-12_15-21-22,03400
 64 | 2018-12-12_15-21-22,02800
 65 | 2018-12-12_15-21-22,02200
 66 | 2019-01-09_08-09-20,04600
 67 | 2018-02-07_18-39-52,00340
 68 | 2018-12-14_15-36-31,03400
 69 | 2018-02-07_18-39-52,00310
 70 | 2018-02-07_18-20-02,00010
 71 | 2018-12-12_15-21-22,00200
 72 | 2018-12-12_15-21-22,00800
 73 | 2018-12-12_15-32-51,01100
 74 | 2018-12-12_15-21-22,00500
 75 | 2018-12-12_15-30-45,00600
 76 | 2018-12-14_15-36-31,01000
 77 | 2018-12-12_15-21-22,03200
 78 | 2018-12-12_15-30-45,00500
 79 | 2018-12-12_15-21-22,01700
 80 | 2018-12-14_15-36-31,00100
 81 | 2018-12-12_15-21-22,01900
 82 | 2018-02-07_17-56-35,00110
 83 | 2018-12-14_15-36-31,00800
 84 | 2018-12-12_15-21-22,02600
 85 | 2018-12-12_15-21-22,01300
 86 | 2018-12-14_15-13-08,00000
 87 | 2018-12-12_15-21-22,00100
 88 | 2018-12-14_15-36-31,00500
 89 | 2018-02-07_18-39-52,00270
 90 | 2018-12-12_15-21-22,02900
 91 | 2018-02-07_18-39-52,00350
 92 | 2018-12-12_15-32-51,00100
 93 | 2018-02-07_18-39-52,00290
 94 | 2018-12-14_15-36-31,03000
 95 | 2018-12-12_15-21-22,00600
 96 | 2018-12-12_15-21-22,02300
 97 | 2018-12-14_15-36-31,00200
 98 | 2018-02-07_18-20-02,00020
 99 | 2018-12-12_15-32-51,00600
100 | 2018-12-12_15-21-22,02000
101 | 2018-12-12_15-32-51,00800
102 | 2018-02-07_17-56-35,00010
103 | 2018-02-07_17-56-35,00160
104 | 2018-02-07_17-56-35,00050
105 | 2018-02-07_18-06-31,00210


--------------------------------------------------------------------------------
/src/splits/stf/test_light_fog_day.txt:
--------------------------------------------------------------------------------
  1 | 2018-12-12_09-22-59,06600
  2 | 2018-12-12_09-22-59,03400
  3 | 2018-12-12_11-27-36,00000
  4 | 2018-10-08_08-27-03,03600
  5 | 2018-10-29_15-46-53,01200
  6 | 2018-12-16_12-31-35,00100
  7 | 2018-10-08_08-27-03,05900
  8 | 2018-10-29_15-46-53,00500
  9 | 2018-12-12_09-22-59,03700
 10 | 2018-10-29_16-12-02,00250
 11 | 2019-01-09_08-23-49,00200
 12 | 2018-10-29_14-35-02,01080
 13 | 2018-10-29_15-46-53,00200
 14 | 2018-10-29_16-12-02,01000
 15 | 2019-01-08_11-12-54,01000
 16 | 2019-01-09_08-23-49,00700
 17 | 2018-10-29_15-46-53,00800
 18 | 2018-10-08_08-27-03,05300
 19 | 2018-12-12_09-22-59,04600
 20 | 2019-01-09_08-23-49,00900
 21 | 2018-12-12_09-22-59,06300
 22 | 2018-10-08_08-27-03,01100
 23 | 2018-10-08_08-27-03,01250
 24 | 2019-01-09_08-27-29,00500
 25 | 2018-10-29_16-12-02,01150
 26 | 2018-12-12_09-22-59,06000
 27 | 2018-12-12_10-21-21,00100
 28 | 2019-01-09_09-28-07,00300
 29 | 2018-12-12_13-43-01,00100
 30 | 2018-12-12_09-22-59,04300
 31 | 2018-03-15_09-39-45,00100
 32 | 2018-12-16_12-05-56,00700
 33 | 2018-10-08_08-27-03,04200
 34 | 2019-01-09_09-28-07,00200
 35 | 2018-10-08_08-27-03,05200
 36 | 2018-12-12_09-22-59,04900
 37 | 2018-10-29_16-00-52,05400
 38 | 2018-10-29_15-46-53,01000
 39 | 2018-03-15_09-39-45,00200
 40 | 2018-12-16_12-05-56,00000
 41 | 2019-01-09_08-23-49,00400
 42 | 2018-12-12_09-22-59,05300
 43 | 2018-10-08_08-27-03,05110
 44 | 2018-10-29_16-12-02,00450
 45 | 2018-10-08_08-27-03,03840
 46 | 2019-01-09_08-23-49,01200
 47 | 2018-10-08_08-27-03,01700
 48 | 2018-03-15_09-28-05,00200
 49 | 2018-10-29_16-12-02,00350
 50 | 2018-10-08_08-27-03,00000
 51 | 2018-10-29_16-12-02,00950
 52 | 2018-10-08_08-27-03,01380
 53 | 2019-01-09_08-23-49,00000
 54 | 2018-10-29_16-12-02,01050
 55 | 2018-12-12_09-22-59,05800
 56 | 2019-01-09_08-27-29,00300
 57 | 2018-12-12_09-22-59,05600
 58 | 2018-03-15_09-28-05,00100
 59 | 2019-01-08_11-12-54,00700
 60 | 2018-10-08_08-27-03,02100
 61 | 2018-10-08_08-27-03,04100
 62 | 2018-10-08_08-27-03,04000
 63 | 2018-10-29_15-46-53,01400
 64 | 2019-01-09_09-26-35,00100
 65 | 2018-12-12_09-22-59,05500
 66 | 2018-10-08_08-27-03,03400
 67 | 2018-12-12_09-22-59,03300
 68 | 2018-10-29_15-46-53,01100
 69 | 2018-10-29_16-12-02,00300
 70 | 2018-12-12_11-27-36,00900
 71 | 2018-10-08_08-27-03,00850
 72 | 2018-12-16_12-31-35,00200
 73 | 2018-10-08_08-27-03,05800
 74 | 2018-12-12_09-22-59,05200
 75 | 2018-03-15_09-40-42,00000
 76 | 2018-10-08_08-27-03,01850
 77 | 2018-10-29_15-46-53,00400
 78 | 2019-01-09_08-23-49,00300
 79 | 2018-10-08_08-27-03,01770
 80 | 2018-12-12_09-22-59,03600
 81 | 2018-10-29_16-12-02,00900
 82 | 2018-10-29_14-35-02,00850
 83 | 2018-12-12_11-27-36,01200
 84 | 2018-12-12_09-55-58,00000
 85 | 2018-10-29_15-46-53,01300
 86 | 2018-10-08_08-27-03,00750
 87 | 2018-10-08_08-27-03,02250
 88 | 2018-12-12_09-59-21,00100
 89 | 2018-10-08_08-27-03,05400
 90 | 2018-10-08_08-27-03,01000
 91 | 2018-10-08_08-27-03,01400
 92 | 2018-12-12_09-59-21,00000
 93 | 2018-10-08_08-27-03,01150
 94 | 2018-12-12_09-22-59,06100
 95 | 2018-10-29_15-46-53,00000
 96 | 2018-10-29_16-12-02,00600
 97 | 2018-12-12_09-22-59,03900
 98 | 2018-12-12_09-22-59,04000
 99 | 2018-12-16_12-05-56,00100
100 | 2019-01-09_08-23-49,00500
101 | 2018-10-29_16-12-02,00500
102 | 2018-10-08_08-27-03,01950
103 | 2018-12-12_10-21-21,00200
104 | 2018-12-21_11-24-17,00000
105 | 2018-10-29_16-12-02,00550
106 | 2018-10-08_08-27-03,05100
107 | 2018-10-08_08-27-03,00150
108 | 2018-10-29_15-46-53,00700
109 | 2018-10-08_08-27-03,00800
110 | 2018-12-12_11-27-36,00300
111 | 2018-10-08_08-27-03,06000
112 | 2018-02-12_08-56-38,00080
113 | 2018-10-29_15-46-53,00900
114 | 2018-10-08_08-27-03,01780
115 | 2018-10-29_16-12-02,00800
116 | 2018-12-12_09-22-59,06400
117 | 2019-01-09_08-23-49,01100
118 | 2018-10-08_08-27-03,01600
119 | 2018-03-15_09-39-45,00300
120 | 2019-01-08_11-12-54,00300
121 | 2018-12-12_09-55-58,00100
122 | 2018-10-08_08-27-03,03500
123 | 2018-02-04_12-42-39,00000
124 | 2018-10-08_08-27-03,02200
125 | 2019-01-08_11-12-54,00400
126 | 2018-12-12_09-22-59,05100
127 | 2018-03-15_09-39-45,00400
128 | 2018-02-12_08-56-38,00090
129 | 2019-01-08_11-12-54,00600
130 | 2019-01-09_09-26-35,00000
131 | 2018-10-29_15-46-53,01500
132 | 2019-01-09_08-27-29,00000
133 | 2018-10-29_16-12-02,00150
134 | 2018-12-12_09-22-59,05400
135 | 2018-10-08_08-27-03,05130
136 | 2018-10-08_08-27-03,01900
137 | 2018-10-08_08-27-03,01760
138 | 2018-10-08_08-27-03,05500
139 | 2018-12-16_12-31-35,00000
140 | 2018-10-29_15-46-53,01600
141 | 2018-12-12_09-22-59,03200
142 | 2018-02-12_08-56-38,00060
143 | 2018-10-08_08-27-03,05700
144 | 2018-10-29_16-12-02,00850
145 | 2018-12-12_11-27-36,01000
146 | 2018-12-12_09-22-59,03800
147 | 2018-12-12_09-22-59,03500
148 | 2018-12-12_09-22-59,05000
149 | 2018-12-16_12-05-56,00800
150 | 2018-10-29_16-12-02,00750
151 | 2018-12-12_09-22-59,06500
152 | 2019-01-08_11-12-54,00500
153 | 2018-02-04_12-43-00,00000
154 | 2018-12-12_11-27-36,00400
155 | 2018-12-16_12-05-56,00500
156 | 2019-01-09_08-23-49,00100
157 | 2018-10-29_15-46-53,00300
158 | 2018-10-29_16-12-02,00100
159 | 2018-10-08_08-27-03,00700
160 | 2018-10-08_08-27-03,05600
161 | 2019-01-09_08-23-49,01400
162 | 2018-10-08_08-27-03,00050
163 | 2019-01-09_08-23-49,00600
164 | 2019-01-09_08-23-49,00800
165 | 2018-12-12_09-22-59,06200
166 | 2018-02-12_08-56-38,00130
167 | 2018-10-29_14-35-02,01070
168 | 2018-10-08_08-27-03,03200
169 | 2019-01-09_08-27-29,00400
170 | 2018-12-12_09-22-59,02300
171 | 2018-12-12_10-21-21,00000
172 | 2019-01-09_09-28-07,00000
173 | 2019-01-09_08-23-49,01000
174 | 2018-10-08_08-27-03,01050
175 | 2019-01-09_09-28-07,00100
176 | 2018-12-12_09-22-59,06700
177 | 2018-12-12_09-22-59,04200
178 | 2018-10-08_08-27-03,00900
179 | 2018-03-15_09-39-45,00000
180 | 2019-01-09_08-23-49,01300
181 | 2019-01-08_11-12-54,00200
182 | 2019-01-09_14-54-03,01000
183 | 2018-10-29_15-46-53,00100
184 | 2018-03-15_09-30-35,00000
185 | 2018-12-12_09-22-59,04500
186 | 2018-10-29_15-46-53,00600
187 | 2018-10-08_08-27-03,02700
188 | 2018-10-08_08-27-03,03300
189 | 2018-10-08_08-27-03,05120
190 | 2018-12-12_11-27-36,01100
191 | 2019-01-09_08-27-29,00100
192 | 2018-10-08_08-27-03,01750
193 | 2018-10-08_08-27-03,01800
194 | 2018-12-12_09-22-59,05900
195 | 2018-10-08_08-27-03,01200
196 | 2019-01-09_08-27-29,00200
197 | 2018-10-29_16-12-02,00200
198 | 2018-10-08_08-27-03,02000
199 | 2018-10-29_16-12-02,01100
200 | 2018-03-15_09-30-35,00150
201 | 2018-02-04_12-47-32,00000
202 | 2018-02-04_12-47-32,00500
203 | 2018-03-15_10-18-07,00200
204 | 2018-12-19_10-03-38,00300


--------------------------------------------------------------------------------
/src/splits/stf/test_light_fog_night.txt:
--------------------------------------------------------------------------------
  1 | 2018-10-29_16-32-57,02900
  2 | 2018-02-07_18-33-04,00000
  3 | 2018-12-20_15-23-28,00500
  4 | 2018-02-07_18-36-39,00300
  5 | 2018-10-29_16-32-57,00000
  6 | 2018-12-20_15-04-24,00600
  7 | 2018-12-17_09-12-03,00500
  8 | 2018-02-07_18-21-20,00110
  9 | 2018-12-20_15-04-24,00300
 10 | 2018-12-17_09-12-03,01100
 11 | 2018-02-07_18-21-20,00010
 12 | 2018-12-20_16-04-42,00500
 13 | 2018-02-07_18-21-20,00060
 14 | 2018-12-20_14-41-07,00100
 15 | 2018-10-29_16-32-57,00200
 16 | 2018-12-20_15-39-16,00200
 17 | 2018-12-20_17-08-31,00300
 18 | 2018-12-20_15-55-01,00200
 19 | 2018-12-20_15-16-30,00800
 20 | 2018-02-07_18-30-24,00200
 21 | 2018-12-20_15-04-24,02700
 22 | 2018-10-29_16-12-02,01270
 23 | 2018-12-20_15-04-24,01600
 24 | 2018-12-20_15-04-24,02900
 25 | 2018-12-20_16-04-42,00300
 26 | 2018-12-20_14-41-07,01700
 27 | 2018-10-29_16-32-57,00100
 28 | 2018-12-20_15-46-51,00700
 29 | 2018-02-07_18-21-20,00050
 30 | 2018-12-20_15-46-51,00000
 31 | 2018-12-17_09-12-03,01000
 32 | 2018-12-20_15-55-01,00100
 33 | 2018-02-07_18-21-20,00070
 34 | 2018-12-20_15-39-16,00600
 35 | 2018-12-20_15-16-30,00200
 36 | 2018-12-20_15-39-16,00800
 37 | 2018-12-17_21-05-50,00000
 38 | 2018-12-20_15-23-28,00400
 39 | 2018-12-20_16-04-42,00700
 40 | 2018-10-29_16-32-57,02600
 41 | 2018-12-17_09-12-03,00600
 42 | 2018-02-07_18-33-04,00300
 43 | 2018-10-29_16-32-57,02800
 44 | 2018-02-07_18-21-20,00270
 45 | 2018-10-29_16-32-57,00900
 46 | 2018-12-20_15-04-24,00400
 47 | 2018-10-29_16-32-57,00300
 48 | 2018-12-20_14-41-07,00000
 49 | 2018-12-17_09-12-03,01600
 50 | 2018-02-07_18-21-20,00000
 51 | 2018-12-20_16-04-42,00400
 52 | 2018-12-17_09-12-03,00700
 53 | 2018-12-20_15-11-27,01600
 54 | 2018-12-20_16-04-42,00000
 55 | 2018-12-20_16-04-42,00200
 56 | 2018-12-20_15-04-24,01500
 57 | 2018-10-29_16-32-57,02100
 58 | 2018-12-20_15-04-24,01000
 59 | 2018-12-20_17-08-31,00100
 60 | 2018-12-20_15-04-24,00900
 61 | 2018-02-07_18-21-20,00040
 62 | 2018-12-20_15-46-51,00100
 63 | 2018-12-20_15-46-51,00400
 64 | 2018-02-07_18-21-20,00290
 65 | 2018-12-20_15-39-16,00500
 66 | 2018-12-20_15-16-30,00300
 67 | 2018-12-17_09-12-03,01800
 68 | 2018-12-20_15-46-51,00600
 69 | 2018-10-29_16-32-57,00500
 70 | 2018-10-29_16-32-57,01000
 71 | 2019-01-09_08-09-20,04300
 72 | 2018-02-07_18-36-39,00100
 73 | 2018-12-14_15-27-11,01500
 74 | 2018-12-20_15-55-01,00000
 75 | 2018-12-17_21-05-50,00300
 76 | 2018-12-17_09-12-03,00900
 77 | 2018-02-07_18-36-39,00200
 78 | 2018-02-07_18-21-20,00260
 79 | 2018-10-29_16-32-57,02700
 80 | 2018-12-20_15-04-24,03000
 81 | 2018-02-07_18-33-04,00200
 82 | 2018-12-21_16-19-18,00100
 83 | 2018-02-07_18-33-04,00100
 84 | 2018-12-20_16-00-05,00100
 85 | 2018-02-07_18-21-20,00030
 86 | 2018-12-20_15-39-16,00300
 87 | 2018-12-20_15-04-24,00500
 88 | 2018-02-07_18-33-04,00400
 89 | 2018-12-20_15-04-24,02500
 90 | 2018-12-20_16-04-42,00600
 91 | 2018-12-20_16-04-42,00100
 92 | 2018-10-29_16-32-57,02000
 93 | 2018-02-07_18-21-20,00280
 94 | 2018-02-07_18-30-24,00000
 95 | 2018-12-20_15-46-51,00200
 96 | 2018-12-20_15-39-16,00900
 97 | 2018-12-20_15-04-24,02800
 98 | 2018-12-20_15-16-30,00700
 99 | 2018-12-20_16-04-42,00800
100 | 2018-12-20_15-18-12,00200
101 | 2018-12-20_14-19-53,00200
102 | 2018-12-20_15-04-24,02600
103 | 2018-12-20_15-46-51,00500
104 | 2018-12-20_15-18-12,00100
105 | 2018-12-17_21-05-50,00200
106 | 2018-12-20_15-16-30,00400
107 | 2018-10-29_16-32-57,01600
108 | 2018-02-07_18-21-20,00100
109 | 2018-02-07_18-21-20,00250
110 | 2018-12-20_15-46-51,00800
111 | 2018-02-07_18-30-24,00100
112 | 2019-01-09_08-09-20,04200
113 | 2018-12-17_09-12-03,00800
114 | 2018-12-17_09-12-03,01400
115 | 2018-12-20_15-26-45,00000
116 | 2018-12-20_18-03-59,01600
117 | 2018-02-07_18-21-20,00020
118 | 2018-12-20_15-39-16,00700
119 | 2018-12-20_14-41-07,00400
120 | 2018-12-20_14-41-07,00300
121 | 2018-12-16_14-58-58,04000
122 | 2018-02-07_18-06-31,00320
123 | 2018-02-07_18-06-31,00300
124 | 2018-02-07_18-06-31,00100
125 | 2018-02-07_18-06-31,00450
126 | 2018-12-19_16-36-54,04200


--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/src/test.py


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
 2 | #
 3 | # This software is licensed under the terms of the Monodepth2 licence
 4 | # which allows for non-commercial use only, the full terms of which are made
 5 | # available in the LICENSE file.
 6 | 
 7 | from __future__ import absolute_import, division, print_function
 8 | 
 9 | from trainer import Trainer
10 | from options import GatedOptions
11 | 
12 | options = GatedOptions()
13 | opts = options.parse()
14 | 
15 | 
16 | if __name__ == "__main__":
17 |     trainer = Trainer(opts)
18 |     trainer.train()


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright Niantic 2019. Patent Pending. All rights reserved.
  2 | #
  3 | # This software is licensed under the terms of the Monodepth2 licence
  4 | # which allows for non-commercial use only, the full terms of which are made
  5 | # available in the LICENSE file.
  6 | 
  7 | from __future__ import absolute_import, division, print_function
  8 | import os
  9 | import hashlib
 10 | import zipfile
 11 | from six.moves import urllib
 12 | import torch
 13 | import cv2
 14 | import io
 15 | import matplotlib.pyplot as plt
 16 | import numpy as np
 17 | from matplotlib.lines import Line2D
 18 | 
 19 | 
 20 | def readlines(filename):
 21 |     """Read all the lines in a text file and return as a list
 22 |     """
 23 |     with open(filename, 'r') as f:
 24 |         lines = f.read().splitlines()
 25 |     return lines
 26 | 
 27 | 
 28 | def normalize_image(x):
 29 |     """Rescale image pixels to span range [0, 1]
 30 |     """
 31 |     ma = float(x.max().cpu().data)
 32 |     mi = float(x.min().cpu().data)
 33 |     d = ma - mi if ma != mi else 1e5
 34 |     return (x - mi) / d
 35 | 
 36 | 
 37 | def sec_to_hm(t):
 38 |     """Convert time in seconds to time in hours, minutes and seconds
 39 |     e.g. 10239 -> (2, 50, 39)
 40 |     """
 41 |     t = int(t)
 42 |     s = t % 60
 43 |     t //= 60
 44 |     m = t % 60
 45 |     t //= 60
 46 |     return t, m, s
 47 | 
 48 | 
 49 | def sec_to_hm_str(t):
 50 |     """Convert time in seconds to a nice string
 51 |     e.g. 10239 -> '02h50m39s'
 52 |     """
 53 |     h, m, s = sec_to_hm(t)
 54 |     return "{:02d}h{:02d}m{:02d}s".format(h, m, s)
 55 | 
 56 | 
 57 | def download_model_if_doesnt_exist(model_name):
 58 |     """If pretrained kitti model doesn't exist, download and unzip it
 59 |     """
 60 |     # values are tuples of (<google cloud URL>, <md5 checksum>)
 61 |     download_paths = {
 62 |         "mono_640x192":
 63 |             ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono_640x192.zip",
 64 |              "a964b8356e08a02d009609d9e3928f7c"),
 65 |         "stereo_640x192":
 66 |             ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/stereo_640x192.zip",
 67 |              "3dfb76bcff0786e4ec07ac00f658dd07"),
 68 |         "mono+stereo_640x192":
 69 |             ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono%2Bstereo_640x192.zip",
 70 |              "c024d69012485ed05d7eaa9617a96b81"),
 71 |         "mono_no_pt_640x192":
 72 |             ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono_no_pt_640x192.zip",
 73 |              "9c2f071e35027c895a4728358ffc913a"),
 74 |         "stereo_no_pt_640x192":
 75 |             ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/stereo_no_pt_640x192.zip",
 76 |              "41ec2de112905f85541ac33a854742d1"),
 77 |         "mono+stereo_no_pt_640x192":
 78 |             ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono%2Bstereo_no_pt_640x192.zip",
 79 |              "46c3b824f541d143a45c37df65fbab0a"),
 80 |         "mono_1024x320":
 81 |             ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono_1024x320.zip",
 82 |              "0ab0766efdfeea89a0d9ea8ba90e1e63"),
 83 |         "stereo_1024x320":
 84 |             ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/stereo_1024x320.zip",
 85 |              "afc2f2126d70cf3fdf26b550898b501a"),
 86 |         "mono+stereo_1024x320":
 87 |             ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono%2Bstereo_1024x320.zip",
 88 |              "cdc5fc9b23513c07d5b19235d9ef08f7"),
 89 |         }
 90 | 
 91 |     if not os.path.exists("models"):
 92 |         os.makedirs("models")
 93 | 
 94 |     model_path = os.path.join("models", model_name)
 95 | 
 96 |     def check_file_matches_md5(checksum, fpath):
 97 |         if not os.path.exists(fpath):
 98 |             return False
 99 |         with open(fpath, 'rb') as f:
100 |             current_md5checksum = hashlib.md5(f.read()).hexdigest()
101 |         return current_md5checksum == checksum
102 | 
103 |     # see if we have the model already downloaded...
104 |     if not os.path.exists(os.path.join(model_path, "encoder.pth")):
105 | 
106 |         model_url, required_md5checksum = download_paths[model_name]
107 | 
108 |         if not check_file_matches_md5(required_md5checksum, model_path + ".zip"):
109 |             print("-> Downloading pretrained model to {}".format(model_path + ".zip"))
110 |             urllib.request.urlretrieve(model_url, model_path + ".zip")
111 | 
112 |         if not check_file_matches_md5(required_md5checksum, model_path + ".zip"):
113 |             print("   Failed to download a file which matches the checksum - quitting")
114 |             quit()
115 | 
116 |         print("   Unzipping model...")
117 |         with zipfile.ZipFile(model_path + ".zip", 'r') as f:
118 |             f.extractall(model_path)
119 | 
120 |         print("   Model unzipped to {}".format(model_path))
121 | 
122 | def fig2img(fig_buffer):
123 |     buf = io.BytesIO()
124 |     plt.axis('off')
125 |     plt.savefig(buf, format="png",transparent = True, bbox_inches = 'tight', pad_inches = 0,dpi=100, facecolor=(0, 0, 0))
126 |     buf.seek(0)
127 |     img_arr = np.frombuffer(buf.getvalue(), dtype=np.uint8)
128 |     buf.close()
129 |     img = cv2.imdecode(img_arr, 1)
130 |     return img
131 | 
132 | def disp_to_mpimg(disp,colormap='jet_r'):
133 |     fig = plt.figure(figsize=(30,20),dpi=100)
134 |     plt.subplots_adjust(wspace=0.00,hspace=0.00)
135 |     
136 |     fig.add_subplot(111)
137 |     plt.imshow(disp,cmap=colormap)
138 |     plt.axis('off')
139 |     
140 |     img = fig2img(fig)
141 |     plt.close()
142 |     # im_pil = Image.fromarray(img)
143 |     # cv2.imwrite("debug.png",img)
144 |     return img
145 | 
146 | def snr_binary_mask(gated_img, min_intns = 0.04, max_intns = 0.98):
147 |     """[snr_mask_binary calculates a binary mask based on the SNR and the maximum intensity of the input gated image]
148 | 
149 |     Args:
150 |         gated_img ([torch.Tensor]): [gated image of dimension B x 3 x H x W]
151 |     Returns:
152 |         [torch.Tensor]: [Mask with dimension B x 1 x H x W]
153 |     """
154 |     max_intensity,_ = torch.max(gated_img, dim=1, keepdims=True)
155 |     min_intensity,_ = torch.min(gated_img, dim=1, keepdims=True)
156 |     snr = max_intensity - min_intensity
157 |     snr_binary_mask = torch.logical_and(snr > min_intns, max_intensity < max_intns).float() 
158 |     return snr_binary_mask
159 | 
160 | def intensity_mask(gated_img, depth):
161 |     """[intensity_mask calculates a mask based on the intensities of the input gated image and the utilized range intensity profiles and the depth of the flat world]
162 |     Args:
163 |         gated_img ([torch.Tensor]): [gated image of dimension B x 3 x H x W]
164 |     Returns:
165 |         [torch.Tensor]: [Mask with dimension B x 1 x H x W]
166 |     """
167 |     max_intensity,_ = torch.max(gated_img, dim=1, keepdims=True)
168 |     mask1 = max_intensity == gated_img[:,0:1,:,:]
169 |     mask2 = torch.logical_and(max_intensity == gated_img[:,1:2,:,:], depth > 30. * torch.normal(1., 0.1, size=(depth.size())).to(device=depth.device))
170 |     mask3 = torch.logical_and(max_intensity == gated_img[:,2:3,:,:], depth > 73. * torch.normal(1., 0.1, size=(depth.size())).to(device=depth.device))
171 |     intensity_mask = mask1 + mask2 + mask3
172 |     intensity_mask = (intensity_mask > 0.0).float()
173 |     return intensity_mask
174 | 
175 | def plot_grad_flow(named_parameters):
176 |     '''Plots the gradients flowing through different layers in the net during training.
177 |     Can be used for checking for possible gradient vanishing / exploding problems.
178 |     
179 |     Usage: Plug this function in Trainer class after loss.backwards() as 
180 |     "plot_grad_flow(self.model.named_parameters())" to visualize the gradient flow'''
181 |     ave_grads = []
182 |     max_grads= []
183 |     layers = []
184 |     for n, p in named_parameters:
185 |         if(p.requires_grad) and ("bias" not in n) and (p.grad is not None):
186 |             layers.append(n)
187 |             ave_grads.append(p.grad.abs().detach().cpu().numpy().mean())
188 |             max_grads.append(p.grad.abs().detach().cpu().numpy().max())
189 |     
190 |     fig = plt.figure(figsize=(30,20),dpi=100)
191 |     plt.subplots_adjust(wspace=0.00,hspace=0.00)
192 |     
193 |     fig.add_subplot(111)
194 |     plt.bar(np.arange(len(max_grads)), max_grads, alpha=0.1, lw=1, color="c")
195 |     plt.bar(np.arange(len(max_grads)), ave_grads, alpha=0.1, lw=1, color="b")
196 |     plt.hlines(0, 0, len(ave_grads)+1, lw=2, color="k" )
197 |     plt.xticks(range(0,len(ave_grads), 1), layers, rotation="vertical")
198 |     plt.xlim(left=0, right=len(ave_grads))
199 |     plt.ylim(bottom = -0.001, top=0.02) # zoom in on the lower gradient regions
200 |     plt.xlabel("Layers")
201 |     plt.ylabel("average gradient")
202 |     plt.title("Gradient flow")
203 |     plt.grid(True)
204 |     plt.legend([Line2D([0], [0], color="c", lw=4),
205 |                 Line2D([0], [0], color="b", lw=4),
206 |                 Line2D([0], [0], color="k", lw=4)], ['max-gradient', 'mean-gradient', 'zero-gradient'])
207 |     # plt.savefig('grad.png')
208 |     buf = io.BytesIO()
209 |     plt.savefig(buf, format="png", bbox_inches = 'tight', pad_inches = 0)
210 |     buf.seek(0)
211 |     img_arr = np.frombuffer(buf.getvalue(), dtype=np.uint8)
212 |     buf.close()
213 |     img = cv2.imdecode(img_arr, 1)
214 |     # cv2.imwrite('grad_img.png',img)
215 |     plt.close()
216 |     return img
217 | 
218 | def depth_image(depth, min_depth=0.1, max_depth = 100.0, colormap='jet_r'):
219 |     fig = plt.figure(figsize=(20,10),dpi=100)
220 |     plt.subplots_adjust(wspace=0.00,hspace=0.00)
221 | 
222 |     depth = np.clip(depth, min_depth, max_depth)    
223 |     depth[0,0] = min_depth
224 |     depth[-1,-1] = max_depth
225 | 
226 |      
227 |     
228 |     fig.add_subplot(111)
229 |     plt.imshow(depth,cmap=colormap)
230 |     plt.axis('off')
231 |     plt.colorbar(aspect=80,orientation='horizontal',pad=0.01)
232 |     
233 |     img = fig2img(fig)
234 |     plt.close()
235 |     return img


--------------------------------------------------------------------------------
/src/visualize2D.py:
--------------------------------------------------------------------------------
 1 | import matplotlib as mpl
 2 | import matplotlib.cm as cm
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | 
 7 | def colorize_pointcloud(depth, min_distance=3, max_distance=80, radius=3, cmap = cm.jet):
 8 |     norm = mpl.colors.Normalize(vmin=min_distance, vmax=max_distance)
 9 |     
10 |     m = cm.ScalarMappable(norm=norm, cmap=cmap)
11 |     pos = np.argwhere(depth > 0)
12 | 
13 |     pointcloud_color = np.zeros((depth.shape[0], depth.shape[1], 3), dtype=np.uint8)
14 |     for i in range(pos.shape[0]):
15 |         color = tuple([int(255 * value) for value in m.to_rgba(depth[pos[i, 0], pos[i, 1]])[0:3]])
16 |         cv2.circle(pointcloud_color, (pos[i, 1], pos[i, 0]), radius, (color[0], color[1], color[2]), -1)
17 | 
18 |     return pointcloud_color
19 | 
20 | 
21 | def colorize_depth(depth, min_distance=3, max_distance=80,cmap = cm.jet):
22 |     norm = mpl.colors.Normalize(vmin=min_distance, vmax=max_distance)
23 |     
24 |     m = cm.ScalarMappable(norm=norm, cmap=cmap)
25 | 
26 |     depth_color = (255 * m.to_rgba(depth)[:, :, 0:3]).astype(np.uint8)
27 |     depth_color[depth <= 0] = [0, 0, 0]
28 |     depth_color[np.isnan(depth)] = [0, 0, 0]
29 |     depth_color[depth == np.inf] = [0, 0, 0]
30 | 
31 |     return depth_color


--------------------------------------------------------------------------------