├── .gitignore ├── README.md ├── assets ├── gifs │ ├── day.gif │ ├── fog.gif │ ├── night.gif │ └── snow.gif └── imgs │ ├── albedo_ambient_examples.png │ ├── architecture.png │ └── cbar.png ├── chebychev └── cheb_coef_real_degree6.txt ├── depth_flat_world └── depth_flat_world.npz ├── environment.yml ├── example ├── gated0_10bit │ └── 2019-01-09_08-27-29_00200.png ├── gated1_10bit │ └── 2019-01-09_08-27-29_00200.png └── gated2_10bit │ └── 2019-01-09_08-27-29_00200.png ├── scripts ├── eval_g2d.sh ├── eval_stf.sh ├── inference.sh ├── train.sh └── unzip_data.sh └── src ├── dataset ├── __init__.py ├── gated2depth.py └── gated_dataset.py ├── eval.py ├── inference.py ├── layers.py ├── networks ├── UNet.py ├── __init__.py ├── depth │ ├── DepthResNet.py │ ├── PackNet01.py │ ├── PackNetSlim01.py │ ├── PackNetSlim01MultiDecoder.py │ └── depth_decoder.py ├── depth_decoder.py ├── layers │ ├── packnet │ │ └── layers01.py │ ├── resnet │ │ ├── depth_decoder.py │ │ ├── layers.py │ │ └── resnet_encoder.py │ └── resnet_encoder.py ├── layers01.py ├── pose │ ├── pose_cnn.py │ └── pose_decoder.py ├── pose_cnn.py ├── pose_decoder.py ├── resnet_encoder.py └── resnet_encoder2.py ├── options.py ├── splits ├── g2d │ ├── real_test_day.txt │ └── real_test_night.txt ├── gated2gated │ ├── train_files.txt │ └── val_files.txt └── stf │ ├── test_clear_day.txt │ ├── test_clear_night.txt │ ├── test_dense_fog_day.txt │ ├── test_dense_fog_night.txt │ ├── test_light_fog_day.txt │ ├── test_light_fog_night.txt │ ├── test_snow_day.txt │ └── test_snow_night.txt ├── test.py ├── train.py ├── trainer.py ├── utils.py └── visualize2D.py /.gitignore: -------------------------------------------------------------------------------- 1 | */**/__pycache__/ 2 | src/test.ipynb 3 | .vscode/ 4 | weights/initialization/*.pth 5 | weights/final/*.pth 6 | logs/* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Gated2Gated : Self-Supervised Depth Estimation from Gated Images 2 | 3 | ![architecture](assets/imgs/architecture.png) 4 | 5 | This repository contains code for [Gated2Gated : Self-Supervised Depth Estimation from Gated Images](https://arxiv.org/pdf/2112.02416.pdf). 6 | 7 | 8 | ## Summary 9 | Gated cameras hold promise as an alternative to scanning LiDAR sensors with high-resolution 3D depth that is robust to back-scatter in fog, snow, and rain. Instead of sequentially scanning a scene and directly recording depth via the photon time-of-flight, as in pulsed LiDAR sensors, gated imagers encode depth in the relative intensity of a handful of gated slices, captured at megapixel resolution. Although existing methods have shown that it is possible to decode high-resolution depth from such measurements, these methods require synchronized and calibrated LiDAR to supervise the gated depth decoder -- prohibiting fast adoption across geographies, training on large unpaired datasets, and exploring alternative applications outside of automotive use cases. In this work, we fill this gap and propose an entirely self-supervised depth estimation method that uses gated intensity profiles and temporal consistency as a training signal. The proposed model is trained end-to-end from gated video sequences, does not require LiDAR or RGB data, and learns to estimate absolute depth values. We take gated slices as input and disentangle the estimation of the scene albedo, depth, and ambient light, which are then used to learn to reconstruct the input slices through a cyclic loss. We rely on temporal consistency between a given frame and neighboring gated slices to estimate depth in regions with shadows and reflections. We experimentally validate that the proposed approach outperforms existing supervised and self-supervised depth estimation methods based on monocular RGB and stereo images, as well as supervised methods based on gated images. 10 | 11 | ## Getting started 12 | To get started, first clone this repository in your local directory using 13 | 14 | ``` 15 | https://github.com/princeton-computational-imaging/Gated2Gated 16 | ``` 17 | For getting all the necessary packages, get the anaconda environment using: 18 | ``` 19 | conda env create -f environment.yml 20 | ``` 21 | Activate the environment using 22 | ``` 23 | conda activate gated2gated 24 | ``` 25 | 26 | Download the Gated2Gated dataset and the models from the [DENSE dataset webpage](https://www.uni-ulm.de/en/in/driveu/projects/dense-datasets). 27 | 28 | Check if you have downloaded all files. Then, you can unzip your downloaded files using: 29 | ``` 30 | sh scripts/unzip_data.sh 31 | ``` 32 | 33 | After unzipping the files, your directory should look like this: 34 | ``` 35 | gated2gated 36 | ├── data 37 | │ ├── gated0_10bit 38 | │ ├── gated0_10bit_history_1 39 | │ ├── gated0_10bit_history_-1 40 | │ ├── gated0_10bit_history_2 41 | │ ├── gated0_10bit_history_-2 42 | │ ├── gated0_10bit_history_3 43 | │ ├── gated0_10bit_history_-3 44 | │ ├── gated0_10bit_history_4 45 | │ ├── gated0_10bit_history_-4 46 | │ ├── gated0_10bit_history_-5 47 | │ ├── gated0_10bit_history_-6 48 | │ ├── gated0_8bit 49 | │ ├── gated1_10bit 50 | │ ├── gated1_10bit_history_1 51 | │ ├── gated1_10bit_history_-1 52 | │ ├── gated1_10bit_history_2 53 | │ ├── gated1_10bit_history_-2 54 | │ ├── gated1_10bit_history_3 55 | │ ├── gated1_10bit_history_-3 56 | │ ├── gated1_10bit_history_4 57 | │ ├── gated1_10bit_history_-4 58 | │ ├── gated1_10bit_history_-5 59 | │ ├── gated1_10bit_history_-6 60 | │ ├── gated1_8bit 61 | │ ├── gated2_10bit 62 | │ ├── gated2_10bit_history_1 63 | │ ├── gated2_10bit_history_-1 64 | │ ├── gated2_10bit_history_2 65 | │ ├── gated2_10bit_history_-2 66 | │ ├── gated2_10bit_history_3 67 | │ ├── gated2_10bit_history_-3 68 | │ ├── gated2_10bit_history_4 69 | │ ├── gated2_10bit_history_-4 70 | │ ├── gated2_10bit_history_-5 71 | │ ├── gated2_10bit_history_-6 72 | │ ├── gated2_8bit 73 | │ ├── gated_passive_10bit 74 | │ ├── gated_passive_10bit_history_1 75 | │ ├── gated_passive_10bit_history_-1 76 | │ ├── gated_passive_10bit_history_2 77 | │ ├── gated_passive_10bit_history_-2 78 | │ ├── gated_passive_10bit_history_3 79 | │ ├── gated_passive_10bit_history_-3 80 | │ ├── gated_passive_10bit_history_4 81 | │ ├── gated_passive_10bit_history_-4 82 | │ ├── gated_passive_10bit_history_-5 83 | │ ├── gated_passive_10bit_history_-6 84 | │ ├── gated_passive_8bit 85 | │ ├── lidar_hdl64_strongest_filtered_gated 86 | │ └── lidar_hdl64_strongest_gated 87 | └── models 88 | ├── g2d 89 | ├── initialization 90 | └── stf 91 | ``` 92 | 93 | ## Quick Example 94 | Infer depth for single example using: 95 | ``` 96 | sh scripts/inference.sh 97 | ``` 98 | ## Training 99 | Train a model with pre-trained weights from lower resolution using: 100 | 101 | ``` 102 | sh scripts/train.sh 103 | ``` 104 | 105 | ## Evaluation 106 | If you have not trained the models by yourself, make sure that you have downloaded our models into the "models" folder. 107 | 108 | Evaluation on [Seeing Trough Fog](https://openaccess.thecvf.com/content_CVPR_2020/papers/Bijelic_Seeing_Through_Fog_Without_Seeing_Fog_Deep_Multimodal_Sensor_Fusion_CVPR_2020_paper.pdf) Dataset: 109 | ``` 110 | sh scripts/eval_stf.sh 111 | ``` 112 | Please notice that we have used filtered LiDAR pointclouds for evaluating on the Seeing Through Fog dataset. These pointclouds are available in our Gated2Gated dataset. 113 | 114 | Evaluation on [Gated2Depth](https://openaccess.thecvf.com/content_ICCV_2019/papers/Gruber_Gated2Depth_Real-Time_Dense_Lidar_From_Gated_Images_ICCV_2019_paper.pdf) Dataset: 115 | ``` 116 | sh scripts/eval_g2d.sh 117 | ``` 118 | 119 | 123 | 124 | 125 | 126 | ### Pre-trained Models 127 | Our final model weights for the [Seeing Through Fog](https://openaccess.thecvf.com/content_CVPR_2020/papers/Bijelic_Seeing_Through_Fog_Without_Seeing_Fog_Deep_Multimodal_Sensor_Fusion_CVPR_2020_paper.pdf) and the [Gated2Depth](https://openaccess.thecvf.com/content_ICCV_2019/papers/Gruber_Gated2Depth_Real-Time_Dense_Lidar_From_Gated_Images_ICCV_2019_paper.pdf) dataset are available for download at the [DENSE dataset webpage](https://www.uni-ulm.de/en/in/driveu/projects/dense-datasets). 128 | 129 | ## Examples 130 | 131 | ![architecture](assets/imgs/albedo_ambient_examples.png) 132 | ![architecture](assets/imgs/cbar.png) 133 | #### Day 134 | ![architecture](assets/gifs/day.gif) 135 | 136 | #### Night 137 | ![architecture](assets/gifs/night.gif) 138 | 139 | #### Fog 140 | ![architecture](assets/gifs/fog.gif) 141 | 142 | #### Snow 143 | ![architecture](assets/gifs/snow.gif) 144 | 145 | ## Reference 146 | If you find our work on gated depth estimation useful in your research, please consider citing our paper: 147 | 148 | ```bib 149 | @inproceedings{walia2022gated2gated, 150 | title={Gated2Gated: Self-Supervised Depth Estimation from Gated Images}, 151 | author={Walia, Amanpreet and Walz, Stefanie and Bijelic, Mario and Mannan, Fahim and Julca-Aguilar, Frank and Langer, Michael and Ritter, Werner and Heide, Felix}, 152 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 153 | pages={2811--2821}, 154 | year={2022} 155 | } 156 | } 157 | ``` 158 | 159 | ## Acknowledgements 160 | 161 | This code in parts is inspired/borrowed from [monodepth2](https://github.com/nianticlabs/monodepth2) and [packnet-sfm](https://github.com/TRI-ML/packnet-sfm). 162 | 163 | -------------------------------------------------------------------------------- /assets/gifs/day.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/gifs/day.gif -------------------------------------------------------------------------------- /assets/gifs/fog.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/gifs/fog.gif -------------------------------------------------------------------------------- /assets/gifs/night.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/gifs/night.gif -------------------------------------------------------------------------------- /assets/gifs/snow.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/gifs/snow.gif -------------------------------------------------------------------------------- /assets/imgs/albedo_ambient_examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/imgs/albedo_ambient_examples.png -------------------------------------------------------------------------------- /assets/imgs/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/imgs/architecture.png -------------------------------------------------------------------------------- /assets/imgs/cbar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/assets/imgs/cbar.png -------------------------------------------------------------------------------- /chebychev/cheb_coef_real_degree6.txt: -------------------------------------------------------------------------------- 1 | 2.659209412079132562e+03 1.236691633033492735e+03 2.812828548292934716e+00 2 | -2.275481873303552334e+02 -9.717205288136297270e+01 -2.331735007256061643e+00 3 | 4.608997630906832121e+00 2.806590349227340297e+00 1.891523404614634807e-01 4 | -5.283966431194774688e-02 -4.158711021465923835e-02 -5.285076242325659202e-03 5 | 3.429758582007581000e-04 3.129857163806640447e-04 6.333752718504534029e-05 6 | -1.163829913802227196e-06 -1.158968363067281679e-06 -3.229803960331233870e-07 7 | 1.597517278468657243e-09 1.680924118421431858e-09 5.863465070137500970e-10 -------------------------------------------------------------------------------- /depth_flat_world/depth_flat_world.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/depth_flat_world/depth_flat_world.npz -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: gated2gated 2 | channels: 3 | - pytorch 4 | - defaults 5 | - conda-forge 6 | dependencies: 7 | - _ipyw_jlab_nb_ext_conf=0.1.0=py38_0 8 | - _libgcc_mutex=0.1=main 9 | - alabaster=0.7.12=pyhd3eb1b0_0 10 | - anaconda=2021.05=py38_0 11 | - anaconda-client=1.7.2=py38_0 12 | - anaconda-navigator=2.0.3=py38_0 13 | - anaconda-project=0.9.1=pyhd3eb1b0_1 14 | - anyio=2.2.0=py38h06a4308_1 15 | - appdirs=1.4.4=py_0 16 | - argh=0.26.2=py38_0 17 | - argon2-cffi=20.1.0=py38h27cfd23_1 18 | - asn1crypto=1.4.0=py_0 19 | - astroid=2.5=py38h06a4308_1 20 | - astropy=4.2.1=py38h27cfd23_1 21 | - async_generator=1.10=pyhd3eb1b0_0 22 | - atomicwrites=1.4.0=py_0 23 | - attrs=20.3.0=pyhd3eb1b0_0 24 | - autopep8=1.5.6=pyhd3eb1b0_0 25 | - babel=2.9.0=pyhd3eb1b0_0 26 | - backcall=0.2.0=pyhd3eb1b0_0 27 | - backports=1.0=pyhd3eb1b0_2 28 | - backports.functools_lru_cache=1.6.4=pyhd3eb1b0_0 29 | - backports.shutil_get_terminal_size=1.0.0=pyhd3eb1b0_3 30 | - backports.tempfile=1.0=pyhd3eb1b0_1 31 | - backports.weakref=1.0.post1=py_1 32 | - beautifulsoup4=4.9.3=pyha847dfd_0 33 | - bitarray=2.1.0=py38h27cfd23_1 34 | - bkcharts=0.2=py38_0 35 | - black=19.10b0=py_0 36 | - blas=1.0=mkl 37 | - bleach=3.3.0=pyhd3eb1b0_0 38 | - blosc=1.21.0=h8c45485_0 39 | - bokeh=2.3.2=py38h06a4308_0 40 | - boto=2.49.0=py38_0 41 | - bottleneck=1.3.2=py38heb32a55_1 42 | - brotlipy=0.7.0=py38h27cfd23_1003 43 | - bzip2=1.0.8=h7b6447c_0 44 | - c-ares=1.17.1=h27cfd23_0 45 | - ca-certificates=2021.4.13=h06a4308_1 46 | - cairo=1.16.0=hf32fb01_1 47 | - certifi=2020.12.5=py38h06a4308_0 48 | - cffi=1.14.5=py38h261ae71_0 49 | - chardet=4.0.0=py38h06a4308_1003 50 | - click=7.1.2=pyhd3eb1b0_0 51 | - cloudpickle=1.6.0=py_0 52 | - clyent=1.2.2=py38_1 53 | - colorama=0.4.4=pyhd3eb1b0_0 54 | - conda=4.11.0=py38h578d9bd_0 55 | - conda-build=3.21.4=py38h06a4308_0 56 | - conda-content-trust=0.1.1=pyhd3eb1b0_0 57 | - conda-env=2.6.0=1 58 | - conda-package-handling=1.7.3=py38h27cfd23_1 59 | - conda-repo-cli=1.0.4=pyhd3eb1b0_0 60 | - conda-token=0.3.0=pyhd3eb1b0_0 61 | - conda-verify=3.4.2=py_1 62 | - contextlib2=0.6.0.post1=py_0 63 | - cryptography=3.4.7=py38hd23ed53_0 64 | - cudatoolkit=11.0.221=h6bb024c_0 65 | - curl=7.71.1=hbc83047_1 66 | - cycler=0.10.0=py38_0 67 | - cython=0.29.23=py38h2531618_0 68 | - cytoolz=0.11.0=py38h7b6447c_0 69 | - dask=2021.4.0=pyhd3eb1b0_0 70 | - dask-core=2021.4.0=pyhd3eb1b0_0 71 | - dbus=1.13.18=hb2f20db_0 72 | - decorator=5.0.6=pyhd3eb1b0_0 73 | - defusedxml=0.7.1=pyhd3eb1b0_0 74 | - diff-match-patch=20200713=py_0 75 | - distributed=2021.4.1=py38h06a4308_0 76 | - docutils=0.17.1=py38h06a4308_1 77 | - entrypoints=0.3=py38_0 78 | - et_xmlfile=1.0.1=py_1001 79 | - expat=2.3.0=h2531618_2 80 | - fastcache=1.1.0=py38h7b6447c_0 81 | - ffmpeg=4.3=hf484d3e_0 82 | - filelock=3.0.12=pyhd3eb1b0_1 83 | - flake8=3.9.0=pyhd3eb1b0_0 84 | - flask=1.1.2=pyhd3eb1b0_0 85 | - fontconfig=2.13.1=h6c09931_0 86 | - freetype=2.10.4=h5ab3b9f_0 87 | - fribidi=1.0.10=h7b6447c_0 88 | - fsspec=0.9.0=pyhd3eb1b0_0 89 | - future=0.18.2=py38_1 90 | - get_terminal_size=1.0.0=haa9412d_0 91 | - gevent=21.1.2=py38h27cfd23_1 92 | - glib=2.68.1=h36276a3_0 93 | - glob2=0.7=pyhd3eb1b0_0 94 | - gmp=6.2.1=h2531618_2 95 | - gmpy2=2.0.8=py38hd5f6e3b_3 96 | - gnutls=3.6.15=he1e5248_0 97 | - graphite2=1.3.14=h23475e2_0 98 | - greenlet=1.0.0=py38h2531618_2 99 | - gst-plugins-base=1.14.0=h8213a91_2 100 | - gstreamer=1.14.0=h28cd5cc_2 101 | - h5py=2.10.0=py38h7918eee_0 102 | - harfbuzz=2.8.0=h6f93f22_0 103 | - hdf5=1.10.4=hb1b8bf9_0 104 | - heapdict=1.0.1=py_0 105 | - html5lib=1.1=py_0 106 | - icu=58.2=he6710b0_3 107 | - idna=2.10=pyhd3eb1b0_0 108 | - imageio=2.9.0=pyhd3eb1b0_0 109 | - imagesize=1.2.0=pyhd3eb1b0_0 110 | - importlib_metadata=3.10.0=hd3eb1b0_0 111 | - iniconfig=1.1.1=pyhd3eb1b0_0 112 | - intel-openmp=2021.2.0=h06a4308_610 113 | - intervaltree=3.1.0=py_0 114 | - ipykernel=5.3.4=py38h5ca1d4c_0 115 | - ipython=7.22.0=py38hb070fc8_0 116 | - ipython_genutils=0.2.0=pyhd3eb1b0_1 117 | - ipywidgets=7.6.3=pyhd3eb1b0_1 118 | - isort=5.8.0=pyhd3eb1b0_0 119 | - itsdangerous=1.1.0=pyhd3eb1b0_0 120 | - jbig=2.1=hdba287a_0 121 | - jdcal=1.4.1=py_0 122 | - jedi=0.17.2=py38h06a4308_1 123 | - jeepney=0.6.0=pyhd3eb1b0_0 124 | - jinja2=2.11.3=pyhd3eb1b0_0 125 | - joblib=1.0.1=pyhd3eb1b0_0 126 | - jpeg=9b=h024ee3a_2 127 | - json5=0.9.5=py_0 128 | - jsonschema=3.2.0=py_2 129 | - jupyter=1.0.0=py38_7 130 | - jupyter-packaging=0.7.12=pyhd3eb1b0_0 131 | - jupyter_client=6.1.12=pyhd3eb1b0_0 132 | - jupyter_console=6.4.0=pyhd3eb1b0_0 133 | - jupyter_core=4.7.1=py38h06a4308_0 134 | - jupyter_server=1.4.1=py38h06a4308_0 135 | - jupyterlab=3.0.14=pyhd3eb1b0_1 136 | - jupyterlab_pygments=0.1.2=py_0 137 | - jupyterlab_server=2.4.0=pyhd3eb1b0_0 138 | - jupyterlab_widgets=1.0.0=pyhd3eb1b0_1 139 | - keyring=22.3.0=py38h06a4308_0 140 | - kiwisolver=1.3.1=py38h2531618_0 141 | - krb5=1.18.2=h173b8e3_0 142 | - lame=3.100=h7b6447c_0 143 | - lazy-object-proxy=1.6.0=py38h27cfd23_0 144 | - lcms2=2.12=h3be6417_0 145 | - ld_impl_linux-64=2.33.1=h53a641e_7 146 | - libarchive=3.4.2=h62408e4_0 147 | - libcurl=7.71.1=h20c2e04_1 148 | - libedit=3.1.20210216=h27cfd23_1 149 | - libev=4.33=h7b6447c_0 150 | - libffi=3.3=he6710b0_2 151 | - libgcc-ng=9.1.0=hdf63c60_0 152 | - libgfortran-ng=7.3.0=hdf63c60_0 153 | - libiconv=1.15=h63c8f33_5 154 | - libidn2=2.3.2=h7f8727e_0 155 | - liblief=0.10.1=he6710b0_0 156 | - libllvm10=10.0.1=hbcb73fb_5 157 | - libpng=1.6.37=hbc83047_0 158 | - libsodium=1.0.18=h7b6447c_0 159 | - libspatialindex=1.9.3=h2531618_0 160 | - libssh2=1.9.0=h1ba5d50_1 161 | - libstdcxx-ng=9.1.0=hdf63c60_0 162 | - libtasn1=4.16.0=h27cfd23_0 163 | - libtiff=4.2.0=h85742a9_0 164 | - libtool=2.4.6=h7b6447c_1005 165 | - libunistring=0.9.10=h27cfd23_0 166 | - libuuid=1.0.3=h1bed415_2 167 | - libuv=1.40.0=h7b6447c_0 168 | - libwebp-base=1.2.0=h27cfd23_0 169 | - libxcb=1.14=h7b6447c_0 170 | - libxml2=2.9.10=hb55368b_3 171 | - libxslt=1.1.34=hc22bd24_0 172 | - llvmlite=0.36.0=py38h612dafd_4 173 | - locket=0.2.1=py38h06a4308_1 174 | - lxml=4.6.3=py38h9120a33_0 175 | - lz4-c=1.9.3=h2531618_0 176 | - lzo=2.10=h7b6447c_2 177 | - markupsafe=1.1.1=py38h7b6447c_0 178 | - matplotlib=3.3.4=py38h06a4308_0 179 | - matplotlib-base=3.3.4=py38h62a2d02_0 180 | - mccabe=0.6.1=py38_1 181 | - mistune=0.8.4=py38h7b6447c_1000 182 | - mkl=2021.2.0=h06a4308_296 183 | - mkl-service=2.3.0=py38h27cfd23_1 184 | - mkl_fft=1.3.0=py38h42c9631_2 185 | - mkl_random=1.2.1=py38ha9443f7_2 186 | - mock=4.0.3=pyhd3eb1b0_0 187 | - more-itertools=8.7.0=pyhd3eb1b0_0 188 | - mpc=1.1.0=h10f8cd9_1 189 | - mpfr=4.0.2=hb69a4c5_1 190 | - mpmath=1.2.1=py38h06a4308_0 191 | - msgpack-python=1.0.2=py38hff7bd54_1 192 | - multipledispatch=0.6.0=py38_0 193 | - mypy_extensions=0.4.3=py38_0 194 | - navigator-updater=0.2.1=py38_0 195 | - nbclassic=0.2.6=pyhd3eb1b0_0 196 | - nbclient=0.5.3=pyhd3eb1b0_0 197 | - nbconvert=6.0.7=py38_0 198 | - nbformat=5.1.3=pyhd3eb1b0_0 199 | - ncurses=6.2=he6710b0_1 200 | - nest-asyncio=1.5.1=pyhd3eb1b0_0 201 | - nettle=3.7.3=hbbd107a_1 202 | - networkx=2.5=py_0 203 | - ninja=1.10.2=py38hd09550d_3 204 | - nltk=3.6.1=pyhd3eb1b0_0 205 | - nose=1.3.7=pyhd3eb1b0_1006 206 | - notebook=6.3.0=py38h06a4308_0 207 | - numba=0.53.1=py38ha9443f7_0 208 | - numexpr=2.7.3=py38h22e1b3c_1 209 | - numpy=1.20.1=py38h93e21f0_0 210 | - numpy-base=1.20.1=py38h7d8b39e_0 211 | - numpydoc=1.1.0=pyhd3eb1b0_1 212 | - olefile=0.46=py_0 213 | - openh264=2.1.0=hd408876_0 214 | - openpyxl=3.0.7=pyhd3eb1b0_0 215 | - openssl=1.1.1k=h27cfd23_0 216 | - packaging=20.9=pyhd3eb1b0_0 217 | - pandas=1.2.4=py38h2531618_0 218 | - pandoc=2.12=h06a4308_0 219 | - pandocfilters=1.4.3=py38h06a4308_1 220 | - pango=1.45.3=hd140c19_0 221 | - parso=0.7.0=py_0 222 | - partd=1.2.0=pyhd3eb1b0_0 223 | - patchelf=0.12=h2531618_1 224 | - path=15.1.2=py38h06a4308_0 225 | - path.py=12.5.0=0 226 | - pathlib2=2.3.5=py38h06a4308_2 227 | - pathspec=0.7.0=py_0 228 | - patsy=0.5.1=py38_0 229 | - pcre=8.44=he6710b0_0 230 | - pep8=1.7.1=py38_0 231 | - pexpect=4.8.0=pyhd3eb1b0_3 232 | - pickleshare=0.7.5=pyhd3eb1b0_1003 233 | - pillow=8.2.0=py38he98fc37_0 234 | - pip=21.0.1=py38h06a4308_0 235 | - pixman=0.40.0=h7b6447c_0 236 | - pkginfo=1.7.0=py38h06a4308_0 237 | - pluggy=0.13.1=py38h06a4308_0 238 | - ply=3.11=py38_0 239 | - prometheus_client=0.10.1=pyhd3eb1b0_0 240 | - prompt-toolkit=3.0.17=pyh06a4308_0 241 | - prompt_toolkit=3.0.17=hd3eb1b0_0 242 | - psutil=5.8.0=py38h27cfd23_1 243 | - ptyprocess=0.7.0=pyhd3eb1b0_2 244 | - py=1.10.0=pyhd3eb1b0_0 245 | - py-lief=0.10.1=py38h403a769_0 246 | - pycodestyle=2.6.0=pyhd3eb1b0_0 247 | - pycosat=0.6.3=py38h7b6447c_1 248 | - pycparser=2.20=py_2 249 | - pycurl=7.43.0.6=py38h1ba5d50_0 250 | - pydocstyle=6.0.0=pyhd3eb1b0_0 251 | - pyerfa=1.7.3=py38h27cfd23_0 252 | - pyflakes=2.2.0=pyhd3eb1b0_0 253 | - pygments=2.8.1=pyhd3eb1b0_0 254 | - pylint=2.7.4=py38h06a4308_1 255 | - pyls-black=0.4.6=hd3eb1b0_0 256 | - pyls-spyder=0.3.2=pyhd3eb1b0_0 257 | - pyodbc=4.0.30=py38he6710b0_0 258 | - pyopenssl=20.0.1=pyhd3eb1b0_1 259 | - pyparsing=2.4.7=pyhd3eb1b0_0 260 | - pyqt=5.9.2=py38h05f1152_4 261 | - pyrsistent=0.17.3=py38h7b6447c_0 262 | - pysocks=1.7.1=py38h06a4308_0 263 | - pytables=3.6.1=py38h9fd0a39_0 264 | - pytest=6.2.3=py38h06a4308_2 265 | - python=3.8.8=hdb3f193_5 266 | - python-dateutil=2.8.1=pyhd3eb1b0_0 267 | - python-jsonrpc-server=0.4.0=py_0 268 | - python-language-server=0.36.2=pyhd3eb1b0_0 269 | - python-libarchive-c=2.9=pyhd3eb1b0_1 270 | - python_abi=3.8=2_cp38 271 | - pytorch=1.7.1=py3.8_cuda11.0.221_cudnn8.0.5_0 272 | - pytorch-mutex=1.0=cuda 273 | - pytz=2021.1=pyhd3eb1b0_0 274 | - pywavelets=1.1.1=py38h7b6447c_2 275 | - pyxdg=0.27=pyhd3eb1b0_0 276 | - pyyaml=5.4.1=py38h27cfd23_1 277 | - pyzmq=20.0.0=py38h2531618_1 278 | - qdarkstyle=2.8.1=py_0 279 | - qt=5.9.7=h5867ecd_1 280 | - qtawesome=1.0.2=pyhd3eb1b0_0 281 | - qtconsole=5.0.3=pyhd3eb1b0_0 282 | - qtpy=1.9.0=py_0 283 | - readline=8.1=h27cfd23_0 284 | - regex=2021.4.4=py38h27cfd23_0 285 | - requests=2.25.1=pyhd3eb1b0_0 286 | - ripgrep=12.1.1=0 287 | - rope=0.18.0=py_0 288 | - rtree=0.9.7=py38h06a4308_1 289 | - ruamel_yaml=0.15.100=py38h27cfd23_0 290 | - scikit-image=0.18.1=py38ha9443f7_0 291 | - scikit-learn=0.24.1=py38ha9443f7_0 292 | - scipy=1.6.2=py38had2a1c9_1 293 | - seaborn=0.11.1=pyhd3eb1b0_0 294 | - secretstorage=3.3.1=py38h06a4308_0 295 | - send2trash=1.5.0=pyhd3eb1b0_1 296 | - setuptools=52.0.0=py38h06a4308_0 297 | - simplegeneric=0.8.1=py38_2 298 | - singledispatch=3.6.1=pyhd3eb1b0_1001 299 | - sip=4.19.13=py38he6710b0_0 300 | - six=1.15.0=py38h06a4308_0 301 | - sniffio=1.2.0=py38h06a4308_1 302 | - snowballstemmer=2.1.0=pyhd3eb1b0_0 303 | - sortedcollections=2.1.0=pyhd3eb1b0_0 304 | - sortedcontainers=2.3.0=pyhd3eb1b0_0 305 | - soupsieve=2.2.1=pyhd3eb1b0_0 306 | - sphinx=4.0.1=pyhd3eb1b0_0 307 | - sphinxcontrib=1.0=py38_1 308 | - sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0 309 | - sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0 310 | - sphinxcontrib-htmlhelp=1.0.3=pyhd3eb1b0_0 311 | - sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0 312 | - sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0 313 | - sphinxcontrib-serializinghtml=1.1.4=pyhd3eb1b0_0 314 | - sphinxcontrib-websupport=1.2.4=py_0 315 | - spyder=4.2.5=py38h06a4308_0 316 | - spyder-kernels=1.10.2=py38h06a4308_0 317 | - sqlalchemy=1.4.15=py38h27cfd23_0 318 | - sqlite=3.35.4=hdfb4753_0 319 | - statsmodels=0.12.2=py38h27cfd23_0 320 | - sympy=1.8=py38h06a4308_0 321 | - tbb=2020.3=hfd86e86_0 322 | - tblib=1.7.0=py_0 323 | - terminado=0.9.4=py38h06a4308_0 324 | - testpath=0.4.4=pyhd3eb1b0_0 325 | - textdistance=4.2.1=pyhd3eb1b0_0 326 | - threadpoolctl=2.1.0=pyh5ca1d4c_0 327 | - three-merge=0.1.1=pyhd3eb1b0_0 328 | - tifffile=2020.10.1=py38hdd07704_2 329 | - tk=8.6.10=hbc83047_0 330 | - toml=0.10.2=pyhd3eb1b0_0 331 | - toolz=0.11.1=pyhd3eb1b0_0 332 | - torchaudio=0.7.2=py38 333 | - torchinfo=1.5.3=pyhd8ed1ab_0 334 | - torchvision=0.8.2=py38_cu110 335 | - tornado=6.1=py38h27cfd23_0 336 | - tqdm=4.59.0=pyhd3eb1b0_1 337 | - traitlets=5.0.5=pyhd3eb1b0_0 338 | - typed-ast=1.4.2=py38h27cfd23_1 339 | - typing_extensions=3.7.4.3=pyha847dfd_0 340 | - ujson=4.0.2=py38h2531618_0 341 | - unicodecsv=0.14.1=py38_0 342 | - unixodbc=2.3.9=h7b6447c_0 343 | - urllib3=1.26.4=pyhd3eb1b0_0 344 | - watchdog=1.0.2=py38h06a4308_1 345 | - wcwidth=0.2.5=py_0 346 | - webencodings=0.5.1=py38_1 347 | - werkzeug=1.0.1=pyhd3eb1b0_0 348 | - wheel=0.36.2=pyhd3eb1b0_0 349 | - widgetsnbextension=3.5.1=py38_0 350 | - wrapt=1.12.1=py38h7b6447c_1 351 | - wurlitzer=2.1.0=py38h06a4308_0 352 | - xlrd=2.0.1=pyhd3eb1b0_0 353 | - xlsxwriter=1.3.8=pyhd3eb1b0_0 354 | - xlwt=1.3.0=py38_0 355 | - xmltodict=0.12.0=py_0 356 | - xz=5.2.5=h7b6447c_0 357 | - yaml=0.2.5=h7b6447c_0 358 | - yapf=0.31.0=pyhd3eb1b0_0 359 | - zeromq=4.3.4=h2531618_0 360 | - zict=2.0.0=pyhd3eb1b0_0 361 | - zipp=3.4.1=pyhd3eb1b0_0 362 | - zlib=1.2.11=h7b6447c_3 363 | - zope=1.0=py38_1 364 | - zope.event=4.5.0=py38_0 365 | - zope.interface=5.3.0=py38h27cfd23_0 366 | - zstd=1.4.5=h9ceee32_0 367 | - pip: 368 | - absl-py==1.0.0 369 | - cachetools==4.2.4 370 | - google-auth==2.3.3 371 | - google-auth-oauthlib==0.4.6 372 | - grpcio==1.42.0 373 | - importlib-metadata==4.8.2 374 | - markdown==3.3.6 375 | - matplotlib2tikz==0.7.6 376 | - oauthlib==3.1.1 377 | - opencv-python==4.5.4.58 378 | - protobuf==3.19.1 379 | - pyasn1==0.4.8 380 | - pyasn1-modules==0.2.8 381 | - requests-oauthlib==1.3.0 382 | - rsa==4.7.2 383 | - tensorboard==2.7.0 384 | - tensorboard-data-server==0.6.1 385 | - tensorboard-plugin-wit==1.8.0 386 | - tensorboardx==2.4 387 | - tikzplotlib==0.9.15 388 | prefix: /home/amanpreet.walia/anaconda3 389 | -------------------------------------------------------------------------------- /example/gated0_10bit/2019-01-09_08-27-29_00200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/example/gated0_10bit/2019-01-09_08-27-29_00200.png -------------------------------------------------------------------------------- /example/gated1_10bit/2019-01-09_08-27-29_00200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/example/gated1_10bit/2019-01-09_08-27-29_00200.png -------------------------------------------------------------------------------- /example/gated2_10bit/2019-01-09_08-27-29_00200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/example/gated2_10bit/2019-01-09_08-27-29_00200.png -------------------------------------------------------------------------------- /scripts/eval_g2d.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export CUDA_VISIBLE_DEVICES=0 3 | daytimes=( "day" "night") 4 | 5 | 6 | for daytime in "${daytimes[@]}" 7 | do 8 | 9 | echo "daytime: $daytime" 10 | eval_files="./src/splits/g2d/real_test_${daytime}.txt" 11 | python src/eval.py \ 12 | --data_dir data \ 13 | --min_depth 0.1 \ 14 | --max_depth 100.0 \ 15 | --height 512 \ 16 | --width 1024 \ 17 | --load_weights_folder models/g2d \ 18 | --results_dir results/g2d \ 19 | --eval_files_path $eval_files \ 20 | --dataset g2d \ 21 | --g2d_crop \ 22 | --gen_figs 23 | 24 | done -------------------------------------------------------------------------------- /scripts/eval_stf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export CUDA_VISIBLE_DEVICES=0 3 | weathers=( "clear" "light_fog" "dense_fog" "snow" ) 4 | daytimes=( "day" "night") 5 | 6 | 7 | for daytime in "${daytimes[@]}" 8 | do 9 | for weather in "${weathers[@]}" 10 | do 11 | echo "daytime: $daytime, weather: $weather" 12 | eval_files="./src/splits/stf/test_${weather}_${daytime}.txt" 13 | python src/eval.py \ 14 | --data_dir data \ 15 | --min_depth 0.1 \ 16 | --max_depth 100.0 \ 17 | --height 512 \ 18 | --width 1024 \ 19 | --load_weights_folder models/stf \ 20 | --results_dir results/stf \ 21 | --eval_files_path $eval_files \ 22 | --dataset stf \ 23 | --g2d_crop \ 24 | --gen_figs \ 25 | --binned_metrics 26 | done 27 | done -------------------------------------------------------------------------------- /scripts/inference.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export CUDA_VISIBLE_DEVICES=0 3 | python src/inference.py \ 4 | --data_dir ./example \ 5 | --height 512 \ 6 | --width 1024 \ 7 | --min_depth 0.1 \ 8 | --max_depth 100.0 \ 9 | --depth_normalizer 70.0 \ 10 | --results_dir ./results \ 11 | --weights_dir ./models/stf -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export CUDA_VISIBLE_DEVICES=0 3 | python src/train.py \ 4 | --data_dir ./data \ 5 | --log_dir ./logs \ 6 | --coeff_fpath chebychev \ 7 | --depth_flat_world_fpath depth_flat_world \ 8 | --model_name multinetwork \ 9 | --model_type multinetwork \ 10 | --exp_name multinetwork \ 11 | --models_to_load depth ambient encoder albedo pose pose_encoder \ 12 | --load_weights_folder ./models/initialization \ 13 | --exp_num 0 \ 14 | --height 512 \ 15 | --width 1024 \ 16 | --num_bits 10 \ 17 | --scales 0 \ 18 | --frame_ids 0 -1 1 \ 19 | --pose_model_type separate_resnet \ 20 | --num_layers 18 \ 21 | --weights_init pretrained \ 22 | --pose_model_input pairs \ 23 | --min_depth 0.1 \ 24 | --max_depth 100.0 \ 25 | --dataset gated \ 26 | --split gated2gated \ 27 | --batch_size 4 \ 28 | --num_workers 4 \ 29 | --learning_rate 2e-4 \ 30 | --num_epochs 20 \ 31 | --scheduler_step_size 15 \ 32 | --disparity_smoothness 0.001 \ 33 | --log_frequency 200 \ 34 | --save_frequency 1 \ 35 | --cycle_weight 0.05 \ 36 | --depth_normalizer 70.0 \ 37 | --passive_weight 0.01 \ 38 | --cycle_loss \ 39 | --temporal_loss \ 40 | --sim_gated \ 41 | --v1_multiscale \ 42 | --infty_hole_mask \ 43 | --snr_mask \ 44 | --intensity_mask \ 45 | --passive_supervision \ 46 | -------------------------------------------------------------------------------- /scripts/unzip_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | download_root=$1 4 | dst=$2 5 | 6 | files=( 7 | $download_root/gated2gated.z01 8 | $download_root/gated2gated.z02 9 | $download_root/gated2gated.z03 10 | $download_root/gated2gated.z04 11 | $download_root/gated2gated.z05 12 | $download_root/gated2gated.z06 13 | $download_root/gated2gated.z07 14 | $download_root/gated2gated.z08 15 | $download_root/gated2gated.z09 16 | $download_root/gated2gated.z10 17 | $download_root/gated2gated.z11 18 | $download_root/gated2gated.zip 19 | ) 20 | mkdir -p $dst 21 | all_exists=true 22 | for item in ${files[*]} 23 | do 24 | if [[ ! -f "$item" ]]; then 25 | echo "$item is missing" 26 | all_exists=false 27 | fi 28 | done 29 | 30 | if $all_exists; then 31 | zip -s- $download_root/gated2gated.zip -O $dst/gated2gated_full.zip 32 | unzip $dst/gated2gated_full.zip 33 | fi 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /src/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .gated2depth import Gated2DepthDataset 2 | from .gated_dataset import GatedDataset 3 | -------------------------------------------------------------------------------- /src/dataset/gated2depth.py: -------------------------------------------------------------------------------- 1 | from . import gated_dataset 2 | import numpy as np 3 | import os 4 | import cv2 5 | import random 6 | import torch 7 | 8 | def read_gt_image(base_dir, img_id, data_type, depth_normalizer = 150.0, min_distance=0.1, max_distance=100.0, scale_images=False, 9 | scaled_img_width=None, 10 | crop_size_h= 104,crop_size_w = 128, 11 | scaled_img_height=None, raw_values_only=False): 12 | 13 | if data_type == 'real': 14 | depth_lidar1 = np.load(os.path.join(base_dir, "depth_hdl64_gated_compressed", img_id + '.npz'))['arr_0'] 15 | depth_lidar1 = depth_lidar1[crop_size_h:(depth_lidar1.shape[0] - crop_size_h), 16 | crop_size_w:(depth_lidar1.shape[1] - crop_size_w)] 17 | if raw_values_only: 18 | return depth_lidar1, None 19 | 20 | gt_mask = (depth_lidar1 > 0.) 21 | 22 | depth_lidar1 = np.float32(np.clip(depth_lidar1, min_distance, max_distance) / depth_normalizer) 23 | 24 | return depth_lidar1, gt_mask 25 | 26 | img = np.load(os.path.join(base_dir, 'depth_compressed', img_id + '.npz'))['arr_0'] 27 | 28 | if raw_values_only: 29 | return img, None 30 | 31 | img = np.clip(img, min_distance, max_distance) / max_distance 32 | 33 | if scale_images: 34 | img = cv2.resize(img, dsize=(scaled_img_width, scaled_img_height), interpolation=cv2.INTER_AREA) 35 | 36 | return np.expand_dims(np.expand_dims(img, axis=2), axis=0), None 37 | 38 | def read_gated_image(base_dir, img_id, num_bits=10, data_type='real', 39 | scale_images=False, scaled_img_width=None,crop_size_h= 104,crop_size_w = 128, scaled_img_height=None): 40 | gated_imgs = [] 41 | normalizer = 2 ** num_bits - 1. 42 | 43 | for gate_id in range(3): 44 | gate_dir = os.path.join(base_dir,'gated%d_10bit' % gate_id) 45 | path = os.path.join(gate_dir, img_id + '.png') 46 | assert os.path.exists(path),"No such file : %s"%path 47 | img = cv2.imread(os.path.join(gate_dir, img_id + '.png'), cv2.IMREAD_UNCHANGED) 48 | if data_type == 'real': 49 | img = img[crop_size_h:(img.shape[0] - crop_size_h), 50 | crop_size_w:(img.shape[1] - crop_size_w)] 51 | img = img.copy() 52 | img[img > 2 ** 10 - 1] = normalizer 53 | 54 | img = np.float32(img / normalizer) 55 | gated_imgs.append(np.expand_dims(img, axis=2)) 56 | img = np.concatenate(gated_imgs, axis=2) 57 | if scale_images: 58 | img = cv2.resize(img, dsize=(scaled_img_width, scaled_img_height), interpolation=cv2.INTER_AREA) 59 | return img 60 | 61 | class Gated2DepthDataset(gated_dataset.GatedDataset): 62 | 63 | def __init__(self, gated_dir, filenames, 64 | height, width, num_scales, depth_normalizer = 150.0, 65 | frame_idxs = [0], 66 | is_train=False): 67 | super().__init__(gated_dir, filenames, height, width, frame_idxs, 68 | num_scales, is_train=is_train) 69 | assert frame_idxs == [0], "Gated2depth dataset has no temporal frames" 70 | self.depth_normalizer = depth_normalizer 71 | self.load_depth = self.check_depth() 72 | self.depth_loader = read_gt_image 73 | self.loader = read_gated_image 74 | 75 | def __getitem__(self, index): 76 | 77 | inputs = {} 78 | do_flip = self.is_train and random.random() > 0.5 79 | 80 | # line = self.filenames[index].split() 81 | line = self.filenames[index].split(',') 82 | frame_index = line[0] 83 | 84 | # there is no temporal data for gated2depth dataset 85 | inputs[("gated", 0, -1)] = self.get_gated(frame_index,do_flip) 86 | inputs["depth_gt"] = self.get_depth(frame_index,do_flip) 87 | 88 | # adjusting intrinsics to match each scale in the pyramid 89 | for scale in range(self.num_scales): 90 | K = self.K.copy() 91 | 92 | K[0, :] *= self.width // (2 ** scale) 93 | K[1, :] *= self.height // (2 ** scale) 94 | 95 | inv_K = np.linalg.pinv(K) 96 | 97 | inputs[("K", scale)] = torch.from_numpy(K) 98 | inputs[("inv_K", scale)] = torch.from_numpy(inv_K) 99 | 100 | color_aug = (lambda x: x) 101 | self.preprocess(inputs, color_aug) 102 | 103 | for i in self.frame_idxs: 104 | del inputs[("gated", i, -1)] 105 | del inputs[("gated_aug", i, -1)] 106 | 107 | 108 | return inputs 109 | 110 | def preprocess(self, inputs, color_aug): 111 | 112 | for k in list(inputs): 113 | frame = inputs[k] 114 | if "gated" in k : 115 | n, im, i = k 116 | for i in range(self.num_scales): 117 | # inputs[(n, im, i)] = self.resize[i](inputs[(n, im, i - 1)]) 118 | s = 2 ** i 119 | scaled_img_width, scaled_img_height = self.width // s, self.height // s 120 | inputs[(n, im, i)] = cv2.resize(inputs[(n, im, i - 1)], dsize=(scaled_img_width, scaled_img_height), interpolation=cv2.INTER_AREA) 121 | 122 | for k in list(inputs): 123 | f = inputs[k] 124 | if "gated" in k: 125 | n, im, i = k 126 | inputs[(n, im, i)] = self.to_tensor(f) 127 | inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f)) 128 | 129 | def get_depth(self,frame_index,do_flip): 130 | depth_gt,_ = self.depth_loader(self.root_dir, frame_index, 'real', depth_normalizer=self.depth_normalizer) 131 | if do_flip: 132 | depth_gt = np.fliplr(depth_gt).copy() 133 | return depth_gt 134 | 135 | def get_gated(self, frame_index, do_flip): 136 | gated = self.loader(self.root_dir,frame_index) 137 | 138 | if do_flip: 139 | gated = np.fliplr(gated).copy() 140 | 141 | return gated 142 | 143 | def check_depth(self): 144 | return True # Gated2Depth dataset has lidar data -------------------------------------------------------------------------------- /src/dataset/gated_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | import random 5 | import numpy as np 6 | import copy 7 | from PIL import Image # using pillow-simd for increased speed 8 | 9 | import torch 10 | import torch.utils.data as data 11 | from torchvision import transforms 12 | import cv2 13 | 14 | import json 15 | 16 | 17 | def passive_loader(base_dir, img_id, crop_size_h, crop_size_w, cent_fnum, 18 | img_ext='png', 19 | num_bits=10, data_type='real', 20 | scale_images=False, 21 | scaled_img_width=None, scaled_img_height=None): 22 | normalizer = 2 ** num_bits - 1. 23 | 24 | if cent_fnum == 0: 25 | dir = os.path.join(base_dir, 'gated_passive_10bit') 26 | else: 27 | dir = os.path.join(base_dir, 'gated_passive_10bit_history_%d' % (cent_fnum)) 28 | path = os.path.join(dir, img_id + f'.{img_ext}') 29 | assert os.path.exists(path), "No such file : %s" % path 30 | img = cv2.imread(os.path.join(dir, img_id + f'.{img_ext}'), cv2.IMREAD_UNCHANGED) 31 | if data_type == 'real': 32 | img = img[crop_size_h:(img.shape[0] - crop_size_h), 33 | crop_size_w:(img.shape[1] - crop_size_w) 34 | ] 35 | 36 | img = img.copy() 37 | img[img > 2 ** 10 - 1] = normalizer 38 | 39 | img = np.float32(img / normalizer) 40 | if scale_images: 41 | img = cv2.resize(img, dsize=(scaled_img_width, scaled_img_height), interpolation=cv2.INTER_AREA) 42 | return img 43 | 44 | 45 | def gated_loader(base_dir, img_id, crop_size_h, crop_size_w, history=None, 46 | img_ext='png', 47 | num_bits=10, data_type='real', 48 | scale_images=False, 49 | scaled_img_width=None, scaled_img_height=None): 50 | gated_imgs = [] 51 | normalizer = 2 ** num_bits - 1. 52 | 53 | 54 | 55 | for gate_id in range(3): 56 | if history is None: 57 | gate_dir = os.path.join(base_dir,'gated%d_10bit' % gate_id) 58 | else: 59 | gate_dir = os.path.join(base_dir,'gated%d_10bit_history_%d'%(gate_id,history)) 60 | path = os.path.join(gate_dir, img_id + f'.{img_ext}') 61 | assert os.path.exists(path),"No such file : %s"%path 62 | img = cv2.imread(os.path.join(gate_dir, img_id + f'.{img_ext}'), cv2.IMREAD_UNCHANGED) 63 | if data_type == 'real': 64 | img = img[ crop_size_h:(img.shape[0] - crop_size_h), 65 | crop_size_w:(img.shape[1] - crop_size_w) 66 | ] 67 | 68 | img = img.copy() 69 | img[img > 2 ** 10 - 1] = normalizer 70 | 71 | img = np.float32(img / normalizer) 72 | gated_imgs.append(np.expand_dims(img, axis=2)) 73 | img = np.concatenate(gated_imgs, axis=2) 74 | if scale_images: 75 | img = cv2.resize(img, dsize=(scaled_img_width, scaled_img_height), interpolation=cv2.INTER_AREA) 76 | return img 77 | 78 | class GatedDataset(data.Dataset): 79 | 80 | def __init__(self, 81 | gated_dir, 82 | filenames, 83 | height, 84 | width, 85 | frame_idxs, 86 | num_scales, 87 | is_train=False, 88 | img_ext='png', 89 | load_passive = False): 90 | super(GatedDataset, self).__init__() 91 | 92 | self.root_dir = gated_dir 93 | self.filenames = filenames 94 | self.height = height 95 | self.width = width 96 | self.num_scales = num_scales 97 | self.img_ext = img_ext 98 | 99 | self.full_res_shape = (1280, 720) 100 | self.crop_size_h, self.crop_size_w = int((self.full_res_shape[1]-self.height)/2), int((self.full_res_shape[0]-self.width)/2), 101 | 102 | self.frame_idxs = frame_idxs 103 | 104 | self.is_train = is_train 105 | 106 | self.loader = gated_loader 107 | self.interp = Image.ANTIALIAS 108 | self.load_passive = load_passive 109 | if self.load_passive: 110 | self.passive_loader = passive_loader 111 | 112 | 113 | self.to_tensor = transforms.ToTensor() 114 | 115 | self.resize = {} 116 | 117 | for i in range(self.num_scales): 118 | s = 2 ** i 119 | self.resize[i] = transforms.Resize((self.height // s, self.width // s), 120 | interpolation=self.interp) 121 | 122 | self.K = np.array([[1.81,0.0, 0.52, 0.0 ], 123 | [0.0, 3.23, 0.36, 0.0 ], 124 | [0.0, 0.0, 1.0, 0.0 ], 125 | [0.0, 0.0, 0.0, 1.0 ]], dtype=np.float32) 126 | 127 | 128 | self.load_depth = self.check_depth() 129 | 130 | def __getitem__(self, index): 131 | 132 | inputs = {} 133 | do_flip = self.is_train and random.random() > 0.5 134 | 135 | # line = self.filenames[index].split() 136 | line = self.filenames[index].split(',') 137 | frame_index = line[0] 138 | cent_fnum = int(line[1]) 139 | 140 | inputs['frame_info'] = "{}-{}".format(frame_index,cent_fnum) 141 | 142 | for i in self.frame_idxs: 143 | history = i + cent_fnum # Get temporal next or previous frame depending on frame_indx 144 | history = None if history == 0 else history 145 | inputs[("gated", i, -1)] = self.get_gated(frame_index,history,do_flip) 146 | 147 | 148 | # adjusting intrinsics to match each scale in the pyramid 149 | for scale in range(self.num_scales): 150 | K = self.K.copy() 151 | 152 | K[0, :] *= self.width // (2 ** scale) 153 | K[1, :] *= self.height // (2 ** scale) 154 | 155 | inv_K = np.linalg.pinv(K) 156 | 157 | inputs[("K", scale)] = torch.from_numpy(K) 158 | inputs[("inv_K", scale)] = torch.from_numpy(inv_K) 159 | 160 | gated_aug = (lambda x: x) 161 | self.preprocess(inputs, gated_aug) 162 | 163 | for i in self.frame_idxs: 164 | del inputs[("gated", i, -1)] 165 | del inputs[("gated_aug", i, -1)] 166 | 167 | if self.load_depth: 168 | depth_gt = self.get_depth(frame_index, cent_fnum, do_flip) 169 | inputs["depth_gt"] = torch.from_numpy(depth_gt) 170 | 171 | if self.load_passive: 172 | passive = self.get_passive(frame_index, cent_fnum, do_flip) 173 | inputs["passive"] = torch.from_numpy(passive) 174 | 175 | 176 | 177 | return inputs 178 | 179 | def preprocess(self, inputs, gated_aug): 180 | """ 181 | Resize gated images to the required scales and augment if required 182 | 183 | We create the gated_aug object in advance and apply the same augmentation to all 184 | images in this item. This ensures that all images input to the pose network receive the 185 | same augmentation. 186 | """ 187 | for k in list(inputs): 188 | frame = inputs[k] 189 | if "gated" in k: 190 | n, im, i = k 191 | for i in range(self.num_scales): 192 | # inputs[(n, im, i)] = self.resize[i](inputs[(n, im, i - 1)]) 193 | s = 2 ** i 194 | scaled_img_width, scaled_img_height = self.width // s, self.height // s 195 | inputs[(n, im, i)] = cv2.resize(inputs[(n, im, i - 1)], dsize=(scaled_img_width, scaled_img_height), interpolation=cv2.INTER_AREA) 196 | 197 | for k in list(inputs): 198 | f = inputs[k] 199 | if "gated" in k: 200 | n, im, i = k 201 | inputs[(n, im, i)] = self.to_tensor(f) 202 | inputs[(n + "_aug", im, i)] = self.to_tensor(gated_aug(f)) 203 | 204 | def __len__(self): 205 | return len(self.filenames) 206 | 207 | def get_gated(self, frame_index, history, do_flip): 208 | gated = self.loader(self.root_dir, frame_index, self.crop_size_h, self.crop_size_w, history=history, img_ext=self.img_ext) 209 | if do_flip: 210 | gated = np.fliplr(gated).copy() 211 | return gated 212 | 213 | def get_passive(self, frame_index, cent_fnum, do_flip): 214 | passive = self.passive_loader(self.root_dir, frame_index, self.crop_size_h, self.crop_size_w, cent_fnum=cent_fnum, img_ext=self.img_ext) 215 | if do_flip: 216 | passive = np.fliplr(passive).copy() 217 | passive = np.expand_dims(passive, 0).astype(np.float32) 218 | return passive 219 | 220 | def get_depth(self, frame_index, cent_fnum, do_flip): 221 | if cent_fnum == 0: 222 | lidar_filename = os.path.join(self.root_dir, 'lidar_hdl64_strongest_filtered_gated', frame_index + '.npz') 223 | depth_gt = np.load(lidar_filename)['arr_0'] 224 | depth_gt = depth_gt[self.crop_size_h:self.full_res_shape[1] - self.crop_size_h, self.crop_size_w:self.full_res_shape[0] - self.crop_size_w] 225 | else: 226 | depth_gt = np.zeros((self.height, self.width)) 227 | 228 | if do_flip: 229 | depth_gt = np.fliplr(depth_gt).copy() 230 | 231 | depth_gt = np.expand_dims(depth_gt, 0).astype(np.float32) 232 | return depth_gt 233 | 234 | 235 | 236 | def check_depth(self): 237 | sample = self.filenames[0].split(',')[0] 238 | lidar_filename = os.path.join(self.root_dir, 'lidar_hdl64_strongest_filtered_gated', '{}.npz'.format(sample)) 239 | return os.path.isfile(lidar_filename) 240 | 241 | 242 | 243 | 244 | 245 | -------------------------------------------------------------------------------- /src/eval.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | import cv2 5 | import numpy as np 6 | import matplotlib.cm as cm 7 | 8 | import torch 9 | from torch.utils.data import DataLoader 10 | 11 | from layers import disp_to_depth 12 | # from utils import readlines 13 | # from options import MonodepthOptions 14 | import networks 15 | import argparse 16 | 17 | from torchvision.transforms import ToTensor 18 | 19 | gated_transform = ToTensor() 20 | from tqdm.contrib import tzip 21 | 22 | import matplotlib.pyplot as plt 23 | 24 | import visualize2D 25 | import math 26 | import PIL.Image as pil 27 | 28 | 29 | cv2.setNumThreads(0) # This speeds up evaluation 5x on our unix systems (OpenCV 3.3.1) 30 | 31 | 32 | def read_sample_files(train_samples_files): 33 | samples = [] 34 | with open(train_samples_files, 'r') as f: 35 | samples += f.read().splitlines() 36 | samples = [sample.replace(',', '_') for sample in samples] 37 | return samples 38 | 39 | 40 | def threshold(y1, y2, thr=1.25): 41 | max_ratio = np.maximum(y1 / y2, y2 / y1) 42 | return np.mean(max_ratio < thr, dtype=np.float64) * 100. 43 | 44 | 45 | def rmse(y1, y2): 46 | diff = y1 - y2 47 | return math.sqrt(np.mean(diff * diff, dtype=np.float64)) 48 | 49 | 50 | def ard(y1, y2): 51 | return np.mean(np.abs(y1 - y2) / y2, dtype=np.float64) 52 | 53 | 54 | def mae(y1, y2): 55 | return np.mean(np.abs(y1 - y2), dtype=np.float64) 56 | 57 | 58 | def compute_errors(groundtruth, output, min_distance=3., max_distance=150.): 59 | output = output[groundtruth > 0] 60 | groundtruth = groundtruth[groundtruth > 0] 61 | output = np.clip(output, min_distance, max_distance) 62 | groundtruth = np.clip(groundtruth, min_distance, max_distance) 63 | 64 | return rmse(output, groundtruth), \ 65 | mae(output, groundtruth), ard(output, groundtruth), \ 66 | threshold(output, groundtruth, thr=1.25), \ 67 | threshold(output, groundtruth, thr=1.25 ** 2), threshold(output, groundtruth, thr=1.25 ** 3) 68 | 69 | 70 | def calc_bins(clip_min, clip_max, nb_bins): 71 | bins = np.linspace(clip_min, clip_max, num=nb_bins + 1) 72 | mean_bins = np.array([0.5 * (bins[i + 1] + bins[i]) for i in range(0, nb_bins)]) 73 | return bins, mean_bins 74 | 75 | 76 | def read_img(img_path, 77 | num_bits=10, 78 | crop_height=512, crop_width=1024, dataset='g2d'): 79 | gated_imgs = [] 80 | normalizer = 2 ** num_bits - 1. 81 | 82 | for gate_id in range(3): 83 | path = img_path.format(gate_id) 84 | assert os.path.exists(path), "No such file : %s" % path 85 | img = cv2.imread(path, cv2.IMREAD_UNCHANGED) 86 | img = img[((img.shape[0] - crop_height) // 2):((img.shape[0] + crop_height) // 2), 87 | ((img.shape[1] - crop_width) // 2):((img.shape[1] + crop_width) // 2)] 88 | img = img.copy() 89 | img[img > 2 ** 10 - 1] = normalizer 90 | img = np.float32(img / normalizer) 91 | gated_imgs.append(np.expand_dims(img, axis=2)) 92 | img = np.concatenate(gated_imgs, axis=2) 93 | return img 94 | 95 | 96 | def evaluate(opt): 97 | """Evaluates a pretrained model using a specified test set 98 | """ 99 | MIN_DEPTH = 3.0 100 | MAX_DEPTH = 80.0 101 | 102 | # Load dataset items 103 | dataset_dir = opt.data_dir 104 | eval_files_name = os.path.basename(opt.eval_files_path).replace('.txt', '') 105 | 106 | val_ids = sorted(read_sample_files(opt.eval_files_path)) 107 | if opt.dataset == 'g2d': 108 | lidar_paths = [os.path.join(dataset_dir, "depth_hdl64_gated_compressed", "{}.npz".format(_id)) for _id in 109 | val_ids] 110 | gated_paths = [os.path.join(dataset_dir, "gated{}_10bit", "{}.{}".format(_id,opt.img_ext)) for _id in val_ids] 111 | elif opt.dataset == 'stf': 112 | lidar_paths = [os.path.join(dataset_dir, "lidar_hdl64_strongest_filtered_gated", "{}.npz".format(_id)) for _id 113 | in val_ids] 114 | gated_paths = [os.path.join(dataset_dir, "gated{}_10bit", "{}.{}".format(_id,opt.img_ext)) for _id in val_ids] 115 | 116 | # Load weights 117 | assert os.path.isdir(opt.load_weights_folder), "Cannot find a folder at {}".format(opt.load_weights_folder) 118 | print("-> Loading weights from {}".format(opt.load_weights_folder)) 119 | depth_path = os.path.join(opt.load_weights_folder, "depth.pth") 120 | depth_dict = torch.load(depth_path) 121 | 122 | depth_net = networks.PackNetSlim01(dropout=0.5, version="1A") 123 | model_dict = depth_net.state_dict() 124 | depth_net.load_state_dict({k: v for k, v in depth_dict.items() if k in model_dict}) 125 | depth_net.cuda() 126 | depth_net.eval() 127 | 128 | print("-> Computing predictions with size {}x{}".format(opt.height, opt.width)) 129 | if opt.g2d_crop: 130 | g2d_width = 980 131 | g2d_height = 420 132 | assert opt.width >= g2d_width and opt.height >= g2d_height, 'Gated2Depth Crop can only be applied for width >= {} and height >= {}'.format( 133 | g2d_height, g2d_height) 134 | print("-> Computing metrics for Gated2Depth crop 420x980".format(opt.height, opt.width)) 135 | 136 | if not os.path.exists(os.path.join(opt.results_dir)): 137 | os.makedirs(os.path.join(opt.results_dir)) 138 | 139 | errors = [] 140 | 141 | if opt.binned_metrics: 142 | average_points = 15000 143 | results_counter = 0 144 | results = np.zeros((average_points * len(lidar_paths), 2), dtype=np.float32) 145 | 146 | with torch.no_grad(): 147 | for lidar_path, gated_path in tzip(lidar_paths, gated_paths): 148 | 149 | img_id = os.path.basename(gated_path).split('.')[0] 150 | 151 | gated_img = read_img(gated_path, crop_height=opt.height, crop_width=opt.width, dataset=opt.dataset) 152 | 153 | lidar = np.load(lidar_path)['arr_0'] 154 | gt_depth = lidar[((lidar.shape[0] - opt.height) // 2):((lidar.shape[0] + opt.height) // 2), 155 | ((lidar.shape[1] - opt.width) // 2):((lidar.shape[1] + opt.width) // 2)] 156 | 157 | input_patch = gated_transform(gated_img).unsqueeze(0).cuda() 158 | output = depth_net(input_patch) 159 | 160 | _, pred_depth = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth) 161 | pred_depth = pred_depth[0, 0].cpu().numpy() * opt.depth_normalizer 162 | 163 | ### Generate graphics for results ### 164 | if opt.gen_figs: 165 | # Making directory for storing results 166 | result_dirs = ['gated2gated_imgs', 'all', 'gated2gated'] 167 | for result_folder in result_dirs: 168 | if not os.path.exists(os.path.join(opt.results_dir, result_folder)): 169 | os.makedirs(os.path.join(opt.results_dir, result_folder)) 170 | input_patch = input_patch.permute(0, 2, 3, 1).cpu().numpy() 171 | 172 | # Generate colorized pointcloud from Lidar 173 | depth_lidar1_color = visualize2D.colorize_pointcloud(gt_depth, min_distance=MIN_DEPTH, 174 | max_distance=MAX_DEPTH, radius=3, cmap=cm.plasma) 175 | 176 | # Generate colorized depth map 177 | depth_map_color = visualize2D.colorize_depth(pred_depth, min_distance=MIN_DEPTH, max_distance=MAX_DEPTH, 178 | cmap=cm.plasma) 179 | 180 | in_out_shape = (int(depth_map_color.shape[0] + depth_map_color.shape[0] / 3. + gt_depth.shape[0]), 181 | depth_map_color.shape[1], 3) 182 | 183 | input_output = np.zeros(shape=in_out_shape) 184 | scaled_input = cv2.resize(input_patch[0, :, :, :], 185 | dsize=(int(input_patch.shape[2] / 3), int(input_patch.shape[1] / 3)), 186 | interpolation=cv2.INTER_AREA) * 255 187 | 188 | for i in range(3): 189 | input_output[:scaled_input.shape[0], :scaled_input.shape[1], i] = scaled_input[:, :, 0] 190 | input_output[:scaled_input.shape[0], scaled_input.shape[1]: 2 * scaled_input.shape[1], 191 | i] = scaled_input[:, :, 1] 192 | input_output[:scaled_input.shape[0], scaled_input.shape[1] * 2:scaled_input.shape[1] * 3, 193 | i] = scaled_input[:, :, 2] 194 | 195 | input_output[scaled_input.shape[0]: scaled_input.shape[0] + depth_map_color.shape[0], :, 196 | :] = depth_map_color 197 | input_output[scaled_input.shape[0] + depth_map_color.shape[0]:, :, :] = depth_lidar1_color 198 | depth_map_color = pil.fromarray(depth_map_color.astype(np.uint8)) 199 | input_output = pil.fromarray(input_output.astype(np.uint8)) 200 | depth_map_color.save(os.path.join(opt.results_dir, 'gated2gated_imgs', '{}.jpg'.format(img_id))) 201 | input_output.save(os.path.join(opt.results_dir, 'all', '{}.jpg'.format(img_id))) 202 | 203 | np.savez_compressed(os.path.join(opt.results_dir, 'gated2gated', '{}'.format(img_id)), pred_depth) 204 | 205 | # check whether groundtruth depthmap contains any lidar point 206 | 207 | 208 | if opt.g2d_crop: 209 | gt_depth = gt_depth[((gt_depth.shape[0] - g2d_height) // 2):((gt_depth.shape[0] + g2d_height) // 2), 210 | ((gt_depth.shape[1] - g2d_width) // 2):((gt_depth.shape[1] + g2d_width) // 2)] 211 | pred_depth = pred_depth[ 212 | ((pred_depth.shape[0] - g2d_height) // 2):((pred_depth.shape[0] + g2d_height) // 2), 213 | ((pred_depth.shape[1] - g2d_width) // 2):((pred_depth.shape[1] + g2d_width) // 2)] 214 | 215 | if np.sum(gt_depth > 0.0) > 0.: 216 | 217 | error = compute_errors(gt_depth, pred_depth, min_distance=MIN_DEPTH, max_distance=MAX_DEPTH) 218 | errors.append(error) 219 | 220 | if opt.binned_metrics: 221 | pred_depth = pred_depth[gt_depth > 0] 222 | gt_depth = gt_depth[gt_depth > 0] 223 | 224 | if results_counter + len(gt_depth) > results.shape[0]: 225 | print('Overflow') 226 | break 227 | 228 | results[results_counter:results_counter + len(gt_depth), 0] = gt_depth 229 | results[results_counter:results_counter + len(gt_depth), 1] = pred_depth 230 | 231 | results_counter += len(gt_depth) 232 | 233 | # Print and save metrics 234 | print('### Metrics ###') 235 | res = np.array(errors).mean(0) 236 | metric_str = ['rmse', 'mae', 'ard', 'delta1', 'delta2', 'delta3'] 237 | res_str = '' 238 | for i in range(res.shape[0]): 239 | res_str += '{}={:.2f} \n'.format(metric_str[i], res[i]) 240 | print(res_str) 241 | with open(os.path.join(opt.results_dir, '{}_results.txt'.format(eval_files_name)), 'w') as f: 242 | f.write(res_str) 243 | with open(os.path.join(opt.results_dir, '{}_results.tex'.format(eval_files_name)), 'w') as f: 244 | f.write(' & '.join(metric_str) + '\n') 245 | f.write(' & '.join(['{:.2f}'.format(r) for r in res])) 246 | 247 | # Print and save binned metrics 248 | if opt.binned_metrics: 249 | print('### Binned Metrics ###') 250 | results = results[results[:, 0] != 0] 251 | 252 | bins = np.linspace(MIN_DEPTH, MAX_DEPTH, num=12) 253 | inds = np.digitize(results[:, 0], bins) 254 | 255 | binned_results = np.zeros((len(bins), 6 + 1)) 256 | for i, bin in enumerate(bins): 257 | metrics = compute_errors(results[inds == i + 1, 0], results[inds == i + 1, 1], min_distance=MIN_DEPTH, 258 | max_distance=MAX_DEPTH) 259 | binned_results[i, 0] = bin 260 | binned_results[i, 1:] = metrics 261 | 262 | with open(os.path.join(opt.results_dir, '{}_binned_distance_results.txt'.format(eval_files_name)), 263 | 'w') as f: 264 | np.savetxt(f, binned_results, delimiter=' ') 265 | 266 | mean_error_binned = np.zeros((6, 1)) 267 | for i in range(0, 6): 268 | mean_error_binned[i] = np.mean(binned_results[~np.isnan(binned_results[:, i + 1]), i + 1]) 269 | res_str = '' 270 | for i in range(res.shape[0]): 271 | res_str += '{}={:.2f} \n'.format(metric_str[i], float(mean_error_binned[i])) 272 | print(res_str) 273 | with open(os.path.join(opt.results_dir, '{}_binned_results.txt'.format(eval_files_name)), 'w') as f: 274 | f.write(res_str) 275 | with open(os.path.join(opt.results_dir, '{}_binned_results.tex'.format(eval_files_name)), 'w') as f: 276 | f.write(' & '.join(metric_str) + '\n') 277 | np.savetxt(f, np.transpose(mean_error_binned), delimiter=' & ', fmt='%.2f') 278 | 279 | 280 | if __name__ == "__main__": 281 | options = argparse.ArgumentParser() 282 | options.add_argument("--data_dir", required=True, 283 | help="Path to the dataset directory") 284 | options.add_argument("--min_depth", default=0.1, 285 | type=float, 286 | help="Minimum depth value to evaluate") 287 | options.add_argument("--max_depth", default=100.0, 288 | type=float, 289 | help="Max depth value to evaluate") 290 | options.add_argument("--height", default=512, 291 | type=int, 292 | help="height of crop for gated image") 293 | options.add_argument("--width", default=1024, 294 | type=int, 295 | help="width of crop for gated image") 296 | options.add_argument("--img_ext", default='png', 297 | help="image extension (without .)") 298 | options.add_argument("--depth_normalizer", default=70.0, 299 | type=float, 300 | help="depth normalizer to multiply predicted depth with") 301 | options.add_argument("--load_weights_folder", required=True, 302 | help="Path where weights are stored") 303 | options.add_argument("--results_dir", required=True, 304 | help="Path where results are stored") 305 | options.add_argument("--gen_figs", action='store_true', 306 | help="Whether to generate figures or not") 307 | options.add_argument("--eval_files_path", 308 | help="Path to file with validation/evaluation file names.", 309 | required=True) 310 | options.add_argument("--dataset", default='stf', 311 | choices=['stf', 'g2d'], 312 | help="Which dataset is used for evaluation.") 313 | options.add_argument('--g2d_crop', help='Use same crop as used for Evaluation in Gated2Depth Paper.', 314 | action='store_true', required=False) 315 | options.add_argument('--binned_metrics', help='Calculate additional binned metrics', 316 | action='store_true', required=False) 317 | 318 | options = options.parse_args() 319 | evaluate(options) -------------------------------------------------------------------------------- /src/inference.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | import visualize2D 3 | import networks 4 | 5 | import argparse 6 | import os 7 | import cv2 8 | import numpy as np 9 | import matplotlib.pyplot as plt 10 | from matplotlib.cm import get_cmap 11 | import PIL.Image as pil 12 | import matplotlib.cm as cm 13 | import numpy as np 14 | from layers import disp_to_depth 15 | 16 | 17 | 18 | import torch 19 | from torchvision import transforms 20 | to_tensor = transforms.ToTensor() 21 | 22 | cmap_dict = { 23 | 'jet': cm.jet, 24 | 'jet_r': cm.jet_r, 25 | 'plasma': cm.plasma, 26 | 'plasma_r': cm.plasma_r, 27 | 'magma': cm.magma, 28 | 'magma_r': cm.magma_r, 29 | 'inferno': cm.inferno, 30 | 'inferno_r': cm.inferno_r 31 | } 32 | 33 | def read_gated_image(base_dir, img_id, num_bits=10, data_type='real', 34 | scale_images=False, scaled_img_width=None, crop_size_h=104, crop_size_w=128, scaled_img_height=None): 35 | 36 | gated_imgs = [] 37 | normalizer = 2 ** num_bits - 1. 38 | 39 | for gate_id in range(3): 40 | gate_dir = os.path.join(base_dir, 'gated%d_10bit' % gate_id) 41 | path = os.path.join(gate_dir, img_id + '.png') 42 | assert os.path.exists(path), "No such file : %s" % path 43 | img = cv2.imread(os.path.join( 44 | gate_dir, img_id + '.png'), cv2.IMREAD_UNCHANGED) 45 | if data_type == 'real': 46 | img = img[crop_size_h:(img.shape[0] - crop_size_h), 47 | crop_size_w:(img.shape[1] - crop_size_w)] 48 | img = img.copy() 49 | img[img > 2 ** 10 - 1] = normalizer 50 | 51 | img = np.float32(img / normalizer) 52 | gated_imgs.append(np.expand_dims(img, axis=2)) 53 | img = np.concatenate(gated_imgs, axis=2) 54 | if scale_images: 55 | img = cv2.resize(img, dsize=(scaled_img_width, 56 | scaled_img_height), interpolation=cv2.INTER_AREA) 57 | return img 58 | 59 | 60 | def load_weights(model, pretrained_weights_path): 61 | model_dict = model.state_dict() 62 | assert os.path.isfile(pretrained_weights_path), "{} not found in the location".format( 63 | os.path.basename(pretrained_weights_path)) 64 | pretrained_dict = torch.load(pretrained_weights_path) 65 | pretrained_dict = {k: v for k, 66 | v in pretrained_dict.items() if k in model_dict} 67 | model_dict.update(pretrained_dict) 68 | model.load_state_dict(model_dict) 69 | return model 70 | 71 | 72 | def save_depth_viz(depthmap, save_path, min_depth, max_depth, colormap): 73 | # Generate colorized depth map 74 | depth_map_color = visualize2D.colorize_depth( 75 | depthmap, min_distance=min_depth, max_distance=max_depth, cmap=colormap) 76 | depth_map_color = pil.fromarray(depth_map_color.astype(np.uint8)) 77 | depth_map_color.save(save_path) 78 | 79 | 80 | def inference(options): 81 | 82 | models = {} 83 | 84 | models["depth"] = networks.PackNetSlim01( 85 | dropout=0.5, version="{}{}".format(1, 'A')) 86 | models["depth"].to('cuda') 87 | 88 | models["encoder"] = networks.Encoder(num_convs=4) 89 | models["encoder"].to('cuda') 90 | 91 | models["albedo"] = networks.Decoder( 92 | name="albedo", scales=range(1), out_channels=1) 93 | models["albedo"].to('cuda') 94 | 95 | models["ambient"] = networks.Decoder( 96 | name="ambient", scales=range(1), out_channels=1) 97 | models["ambient"].to('cuda') 98 | 99 | # Load model weights 100 | models["depth"] = load_weights( 101 | models["depth"], os.path.join(options.weights_dir, "depth.pth")) 102 | models["encoder"] = load_weights( 103 | models["encoder"], os.path.join(options.weights_dir, "encoder.pth")) 104 | models["albedo"] = load_weights( 105 | models["albedo"], os.path.join(options.weights_dir, "albedo.pth")) 106 | models["ambient"] = load_weights( 107 | models["ambient"], os.path.join(options.weights_dir, "ambient.pth")) 108 | 109 | # Eval Mode 110 | for model in models.values(): 111 | model.eval() 112 | 113 | results_dirs = ["depth", "ambient", "albedo"] 114 | for _dir in results_dirs: 115 | os.makedirs(os.path.join(options.results_dir, _dir), exist_ok=True) 116 | 117 | imgs_names = [sample for sample in os.listdir(os.path.join(options.data_dir, "gated0_10bit")) if '.png' in sample] 118 | img_ids = list(map(lambda x: x.split('.')[0], imgs_names)) 119 | 120 | with torch.no_grad(): 121 | for img_id in img_ids: 122 | gated_img = to_tensor(read_gated_image( 123 | options.data_dir, img_id)).unsqueeze(0).to('cuda') 124 | 125 | # Getting depth 126 | disp = models['depth'](gated_img)[('disp', 0)] 127 | _, pred_depth = disp_to_depth( 128 | disp, options.min_depth, options.max_depth) 129 | pred_depth = pred_depth[0, 0].cpu( 130 | ).numpy() * options.depth_normalizer 131 | pred_depth = np.clip(pred_depth, 0.0, options.clip_depth) 132 | np.savez(os.path.join(options.results_dir, "depth", 133 | "{}.npz".format(img_id)), pred_depth) 134 | save_depth_viz(pred_depth,os.path.join(options.results_dir, "depth", 135 | "{}.png".format(img_id)), 0.0, options.clip_depth, 136 | cmap_dict["inferno_r"]) 137 | 138 | feats = models['encoder'](gated_img) 139 | 140 | # Getting ambient 141 | _ambient = models['ambient'](feats)[('ambient', 0)] 142 | ambient = _ambient[0, 0].cpu().numpy() 143 | ambient = np.clip(ambient, 0.0, 1.0) * 255. 144 | ambient = pil.fromarray(ambient.astype(np.uint8)) 145 | ambient.save(os.path.join(options.results_dir, "ambient", 146 | "{}.png".format(img_id))) 147 | 148 | # Getting albedo 149 | _albedo = models['albedo'](feats)[('albedo', 0)] 150 | albedo = _albedo[0, 0].cpu().numpy() 151 | albedo = np.clip(albedo, 0.0, 1.0) * 255. 152 | albedo = pil.fromarray(albedo.astype(np.uint8)) 153 | albedo.save(os.path.join(options.results_dir, "albedo", 154 | "{}.png".format(img_id))) 155 | 156 | 157 | if __name__ == "__main__": 158 | options = argparse.ArgumentParser() 159 | options.add_argument("--data_dir", required=True, 160 | help="Path to the dataset directory") 161 | options.add_argument("--min_depth", default=0.1, 162 | type=float, 163 | help="Minimum depth value to evaluate") 164 | options.add_argument("--max_depth", default=100.0, 165 | type=float, 166 | help="Max depth value to evaluate") 167 | options.add_argument("--clip_depth", default=80.0, 168 | type=float, 169 | help="clip depth to this value") 170 | options.add_argument("--height", default=512, 171 | type=int, 172 | help="height of crop for gated image") 173 | options.add_argument("--width", default=1024, 174 | type=int, 175 | help="width of crop for gated image") 176 | options.add_argument("--depth_normalizer", default=70.0, 177 | type=float, 178 | help="depth normalizer to multiply predicted depth with") 179 | options.add_argument("--weights_dir", required=True, 180 | help="Path where weights are stored") 181 | options.add_argument("--results_dir", required=True, 182 | help="Path where results are stored") 183 | options.add_argument("--cmap", default='inferno_r', 184 | choices=['jet', 'jet_r', 'plasma', 'plasma_r', 185 | 'magma', 'magma_r', 'inferno', 'inferno_r'], 186 | help="Which colormap to use for generating results") 187 | 188 | options = options.parse_args() 189 | inference(options) 190 | -------------------------------------------------------------------------------- /src/layers.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import os 10 | import numpy as np 11 | 12 | 13 | import torch 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | 17 | 18 | def disp_to_depth(disp, min_depth, max_depth): 19 | """Convert network's sigmoid output into depth prediction 20 | The formula for this conversion is given in the 'additional considerations' 21 | section of the paper. 22 | """ 23 | min_disp = 1 / max_depth 24 | max_disp = 1 / min_depth 25 | scaled_disp = min_disp + (max_disp - min_disp) * disp 26 | depth = 1 / scaled_disp 27 | return scaled_disp, depth 28 | 29 | 30 | def transformation_from_parameters(axisangle, translation, invert=False): 31 | """Convert the network's (axisangle, translation) output into a 4x4 matrix 32 | """ 33 | R = rot_from_axisangle(axisangle) 34 | t = translation.clone() 35 | 36 | if invert: 37 | R = R.transpose(1, 2) 38 | t *= -1 39 | 40 | T = get_translation_matrix(t) 41 | 42 | if invert: 43 | M = torch.matmul(R, T) 44 | else: 45 | M = torch.matmul(T, R) 46 | 47 | return M 48 | 49 | 50 | def get_translation_matrix(translation_vector): 51 | """Convert a translation vector into a 4x4 transformation matrix 52 | """ 53 | T = torch.zeros(translation_vector.shape[0], 4, 4).to(device=translation_vector.device) 54 | 55 | t = translation_vector.contiguous().view(-1, 3, 1) 56 | 57 | T[:, 0, 0] = 1 58 | T[:, 1, 1] = 1 59 | T[:, 2, 2] = 1 60 | T[:, 3, 3] = 1 61 | T[:, :3, 3, None] = t 62 | 63 | return T 64 | 65 | 66 | def rot_from_axisangle(vec): 67 | """Convert an axisangle rotation into a 4x4 transformation matrix 68 | (adapted from https://github.com/Wallacoloo/printipi) 69 | Input 'vec' has to be Bx1x3 70 | """ 71 | angle = torch.norm(vec, 2, 2, True) 72 | axis = vec / (angle + 1e-7) 73 | 74 | ca = torch.cos(angle) 75 | sa = torch.sin(angle) 76 | C = 1 - ca 77 | 78 | x = axis[..., 0].unsqueeze(1) 79 | y = axis[..., 1].unsqueeze(1) 80 | z = axis[..., 2].unsqueeze(1) 81 | 82 | xs = x * sa 83 | ys = y * sa 84 | zs = z * sa 85 | xC = x * C 86 | yC = y * C 87 | zC = z * C 88 | xyC = x * yC 89 | yzC = y * zC 90 | zxC = z * xC 91 | 92 | rot = torch.zeros((vec.shape[0], 4, 4)).to(device=vec.device) 93 | 94 | rot[:, 0, 0] = torch.squeeze(x * xC + ca) 95 | rot[:, 0, 1] = torch.squeeze(xyC - zs) 96 | rot[:, 0, 2] = torch.squeeze(zxC + ys) 97 | rot[:, 1, 0] = torch.squeeze(xyC + zs) 98 | rot[:, 1, 1] = torch.squeeze(y * yC + ca) 99 | rot[:, 1, 2] = torch.squeeze(yzC - xs) 100 | rot[:, 2, 0] = torch.squeeze(zxC - ys) 101 | rot[:, 2, 1] = torch.squeeze(yzC + xs) 102 | rot[:, 2, 2] = torch.squeeze(z * zC + ca) 103 | rot[:, 3, 3] = 1 104 | 105 | return rot 106 | 107 | 108 | class ConvBlock(nn.Module): 109 | """Layer to perform a convolution followed by ELU 110 | """ 111 | def __init__(self, in_channels, out_channels): 112 | super(ConvBlock, self).__init__() 113 | 114 | self.conv = Conv3x3(in_channels, out_channels) 115 | self.nonlin = nn.ELU(inplace=True) 116 | 117 | def forward(self, x): 118 | out = self.conv(x) 119 | out = self.nonlin(out) 120 | return out 121 | 122 | 123 | class Conv3x3(nn.Module): 124 | """Layer to pad and convolve input 125 | """ 126 | def __init__(self, in_channels, out_channels, use_refl=True): 127 | super(Conv3x3, self).__init__() 128 | 129 | if use_refl: 130 | self.pad = nn.ReflectionPad2d(1) 131 | else: 132 | self.pad = nn.ZeroPad2d(1) 133 | self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3) 134 | 135 | def forward(self, x): 136 | out = self.pad(x) 137 | out = self.conv(out) 138 | return out 139 | 140 | 141 | class BackprojectDepth(nn.Module): 142 | """Layer to transform a depth image into a point cloud 143 | """ 144 | def __init__(self, batch_size, height, width): 145 | super(BackprojectDepth, self).__init__() 146 | 147 | self.batch_size = batch_size 148 | self.height = height 149 | self.width = width 150 | 151 | meshgrid = np.meshgrid(range(self.width), range(self.height), indexing='xy') 152 | self.id_coords = np.stack(meshgrid, axis=0).astype(np.float32) 153 | self.id_coords = nn.Parameter(torch.from_numpy(self.id_coords), 154 | requires_grad=False) 155 | 156 | self.ones = nn.Parameter(torch.ones(self.batch_size, 1, self.height * self.width), 157 | requires_grad=False) 158 | 159 | self.pix_coords = torch.unsqueeze(torch.stack( 160 | [self.id_coords[0].view(-1), self.id_coords[1].view(-1)], 0), 0) 161 | self.pix_coords = self.pix_coords.repeat(batch_size, 1, 1) 162 | self.pix_coords = nn.Parameter(torch.cat([self.pix_coords, self.ones], 1), 163 | requires_grad=False) 164 | 165 | def forward(self, depth, inv_K): 166 | cam_points = torch.matmul(inv_K[:, :3, :3], self.pix_coords) 167 | cam_points = depth.view(self.batch_size, 1, -1) * cam_points 168 | cam_points = torch.cat([cam_points, self.ones], 1) 169 | 170 | return cam_points 171 | 172 | 173 | class Project3D(nn.Module): 174 | """Layer which projects 3D points into a camera with intrinsics K and at position T 175 | """ 176 | def __init__(self, batch_size, height, width, eps=1e-7): 177 | super(Project3D, self).__init__() 178 | 179 | self.batch_size = batch_size 180 | self.height = height 181 | self.width = width 182 | self.eps = eps 183 | 184 | def forward(self, points, K, T): 185 | P = torch.matmul(K, T)[:, :3, :] 186 | 187 | cam_points = torch.matmul(P, points) 188 | 189 | pix_coords = cam_points[:, :2, :] / (cam_points[:, 2, :].unsqueeze(1) + self.eps) 190 | pix_coords = pix_coords.view(self.batch_size, 2, self.height, self.width) 191 | pix_coords = pix_coords.permute(0, 2, 3, 1) 192 | pix_coords[..., 0] /= self.width - 1 193 | pix_coords[..., 1] /= self.height - 1 194 | pix_coords = (pix_coords - 0.5) * 2 195 | return pix_coords 196 | 197 | 198 | def upsample(x): 199 | """Upsample input tensor by a factor of 2 200 | """ 201 | return F.interpolate(x, scale_factor=2, mode="nearest") 202 | 203 | 204 | def get_smooth_loss(disp, img): 205 | """Computes the smoothness loss for a disparity image 206 | The color image is used for edge-aware smoothness 207 | """ 208 | grad_disp_x = torch.abs(disp[:, :, :, :-1] - disp[:, :, :, 1:]) 209 | grad_disp_y = torch.abs(disp[:, :, :-1, :] - disp[:, :, 1:, :]) 210 | 211 | grad_img_x = torch.mean(torch.abs(img[:, :, :, :-1] - img[:, :, :, 1:]), 1, keepdim=True) 212 | grad_img_y = torch.mean(torch.abs(img[:, :, :-1, :] - img[:, :, 1:, :]), 1, keepdim=True) 213 | 214 | grad_disp_x *= torch.exp(-grad_img_x) 215 | grad_disp_y *= torch.exp(-grad_img_y) 216 | 217 | return grad_disp_x.mean() + grad_disp_y.mean() 218 | 219 | 220 | class SSIM(nn.Module): 221 | """Layer to compute the SSIM loss between a pair of images 222 | """ 223 | def __init__(self): 224 | super(SSIM, self).__init__() 225 | self.mu_x_pool = nn.AvgPool2d(3, 1) 226 | self.mu_y_pool = nn.AvgPool2d(3, 1) 227 | self.sig_x_pool = nn.AvgPool2d(3, 1) 228 | self.sig_y_pool = nn.AvgPool2d(3, 1) 229 | self.sig_xy_pool = nn.AvgPool2d(3, 1) 230 | 231 | self.refl = nn.ReflectionPad2d(1) 232 | 233 | self.C1 = 0.01 ** 2 234 | self.C2 = 0.03 ** 2 235 | 236 | def forward(self, x, y): 237 | x = self.refl(x) 238 | y = self.refl(y) 239 | 240 | mu_x = self.mu_x_pool(x) 241 | mu_y = self.mu_y_pool(y) 242 | 243 | sigma_x = self.sig_x_pool(x ** 2) - mu_x ** 2 244 | sigma_y = self.sig_y_pool(y ** 2) - mu_y ** 2 245 | sigma_xy = self.sig_xy_pool(x * y) - mu_x * mu_y 246 | 247 | SSIM_n = (2 * mu_x * mu_y + self.C1) * (2 * sigma_xy + self.C2) 248 | SSIM_d = (mu_x ** 2 + mu_y ** 2 + self.C1) * (sigma_x + sigma_y + self.C2) 249 | 250 | return torch.clamp((1 - SSIM_n / SSIM_d) / 2, 0, 1) 251 | 252 | 253 | def compute_depth_errors(gt, pred): 254 | """Computation of error metrics between predicted and ground truth depths 255 | """ 256 | thresh = torch.max((gt / pred), (pred / gt)) 257 | a1 = (thresh < 1.25 ).float().mean() 258 | a2 = (thresh < 1.25 ** 2).float().mean() 259 | a3 = (thresh < 1.25 ** 3).float().mean() 260 | 261 | rmse = (gt - pred) ** 2 262 | rmse = torch.sqrt(rmse.mean()) 263 | 264 | rmse_log = (torch.log(gt) - torch.log(pred)) ** 2 265 | rmse_log = torch.sqrt(rmse_log.mean()) 266 | 267 | abs_rel = torch.mean(torch.abs(gt - pred) / gt) 268 | abs = torch.mean(torch.abs(gt - pred)) 269 | 270 | sq_rel = torch.mean((gt - pred) ** 2 / gt) 271 | 272 | return abs, abs_rel, sq_rel, rmse, rmse_log, a1, a2, a3 273 | 274 | class SimulateGated(nn.Module): 275 | 276 | def __init__(self, cheb_path, dark_levels, depth_normalizer = 150.0, num_bits = 10, min_depth = 0.1, max_depth = 100.0) -> None: 277 | super(SimulateGated,self).__init__() 278 | 279 | self.cheb_path = cheb_path 280 | self.depth_normalizer = depth_normalizer 281 | self.intensity_normalizer = 2.**num_bits -1. 282 | self.min_depth = min_depth 283 | self.max_depth = max_depth 284 | self.c = nn.Parameter(torch.Tensor(np.loadtxt(os.path.join(cheb_path, 'cheb_coef_real_degree6.txt')).reshape(7,1,3,1,1)), 285 | requires_grad=False) 286 | self.dark_levels = nn.Parameter(torch.Tensor(dark_levels),requires_grad=False) 287 | 288 | 289 | def chebval(self, x): 290 | """ 291 | This function implements chebyschev polynomial of first kind on 2D data. 292 | x : tensor of dimension B x 1 x H x W 293 | """ 294 | t0 = torch.ones_like(x) 295 | t1 = x 296 | t2 = 2 * x ** 2 - 1 297 | t3 = 4 * x ** 3 - 3 * x 298 | t4 = 8 * x ** 4 - 8 * x ** 2 + 1 299 | t5 = 16 * x ** 5 - 20 * x ** 3 + 5 * x 300 | t6 = 32 * x ** 6 - 48 * x ** 4 + 18 * x ** 2 - 1 301 | return self.c[0] * t0 + self.c[1] * t1 + self.c[2] * t2 + self.c[3] * t3 + self.c[4] * t4 + self.c[5] * t5 + self.c[6] * t6 302 | 303 | def forward(self, depth, albedo, ambient): 304 | 305 | # depth = torch.clamp(depth * self.depth_normalizer, self.min_depth, self.max_depth) 306 | 307 | albedo = torch.clamp(albedo, 0., 1.) 308 | sim_gated = albedo * self.chebval(depth) * 2 + self.dark_levels 309 | sim_gated = torch.clamp(sim_gated , 0., self.intensity_normalizer)/self.intensity_normalizer + ambient 310 | sim_gated = torch.clamp(sim_gated, 0., 1.) 311 | return sim_gated,depth,albedo -------------------------------------------------------------------------------- /src/networks/UNet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import numpy as np 4 | 5 | import torch 6 | import torch.nn as nn 7 | from torch.nn.modules.batchnorm import BatchNorm2d 8 | import torchvision.models as models 9 | import torch.utils.model_zoo as model_zoo 10 | from collections import OrderedDict 11 | from layers import Conv3x3 12 | import torch.nn.functional as F 13 | import math 14 | 15 | 16 | class Conv1x1(nn.Module): 17 | """Layer to pad and convolve input 18 | """ 19 | def __init__(self, in_channels, out_channels): 20 | super(Conv1x1, self).__init__() 21 | self.conv = nn.Conv2d(int(in_channels), int(out_channels), 1) 22 | 23 | def forward(self, x): 24 | out = self.conv(x) 25 | return out 26 | 27 | class ConvBlock(nn.Module): 28 | def __init__(self,in_channels, out_channels): 29 | super(ConvBlock,self).__init__() 30 | self.conv1 = Conv3x3(in_channels,out_channels) 31 | self.conv2 = Conv3x3(out_channels,out_channels) 32 | self.nonlin = nn.LeakyReLU(negative_slope=0.2, inplace=True) 33 | self.batchnorm1 = nn.BatchNorm2d(out_channels) 34 | self.batchnorm2 = nn.BatchNorm2d(out_channels) 35 | 36 | def forward(self,x): 37 | out = self.conv1(x) 38 | out = self.nonlin(out) 39 | out = self.batchnorm1(out) 40 | 41 | out = self.conv2(out) 42 | out = self.nonlin(out) 43 | out = self.batchnorm2(out) 44 | 45 | return out 46 | 47 | class MultiChannelG2D(nn.Module): 48 | def __init__(self, num_convs = 4, scales = range(1), out_channels = 1, 49 | init_channels = 32, height = 512, width = 1024, 50 | use_depth = True): 51 | super(MultiChannelG2D,self).__init__() 52 | 53 | self.scales = scales 54 | self.height = height 55 | self.width = width 56 | 57 | self.encoder = Encoder(num_convs, init_channels) 58 | self.use_depth = use_depth # Whether to use UNet for depth output or not 59 | 60 | if self.use_depth: 61 | self.depth_decoder = Decoder(scales, num_convs, init_channels, out_channels, 'depth') 62 | 63 | self.albedo_decoder = Decoder(scales, num_convs, init_channels, out_channels, 'albedo') 64 | self.ambient_decoder = Decoder(scales, num_convs, init_channels, out_channels, 'ambient') 65 | 66 | 67 | def forward(self,x): 68 | output = {} 69 | 70 | enc_feats = self.encoder(x) 71 | if self.use_depth: 72 | output.update(self.depth_decoder(enc_feats)) 73 | output.update(self.albedo_decoder(enc_feats)) 74 | output.update(self.ambient_decoder(enc_feats)) 75 | 76 | # for scale in self.scales: 77 | # if self.use_depth: 78 | # output[('depth',scale)] = F.interpolate(output[('depth',scale)], [self.height, self.width], mode="bilinear", align_corners=False) 79 | 80 | # output[('albedo',scale)] = F.interpolate(output[('albedo',scale)], [self.height, self.width], mode="bilinear", align_corners=False) 81 | # output[('ambient',scale)] = F.interpolate(output[('ambient',scale)], [self.height, self.width], mode="bilinear", align_corners=False) 82 | return output 83 | 84 | 85 | class Encoder(nn.Module): 86 | 87 | def __init__(self, num_convs = 4, init_channels=32): 88 | """[UNet Encoder for gated images] 89 | 90 | Args: 91 | num_convs (int, optional): [number of up/down levels]. Defaults to 4. 92 | init_channels (int, optional): [initial number of encoding channels]. Defaults to 32. 93 | """ 94 | super(Encoder, self).__init__() 95 | self.channels = [init_channels*2**(i) for i in range(0,num_convs+1)] 96 | self.channels = [3] + self.channels # number of channels in gated image appended in the beginning 97 | self.enc_blocks = nn.ModuleList([ConvBlock(self.channels[i], self.channels[i+1]) for i in range(len(self.channels)-1)]) 98 | self.maxpool = nn.MaxPool2d(2) 99 | 100 | def forward(self, x): 101 | 102 | skips = [] 103 | for i,enc_block in enumerate(self.enc_blocks): 104 | # print("input shape {} = {}".format(i,x.shape)) 105 | x = enc_block(x) 106 | # print("conv block {} = {}".format(i,x.shape)) 107 | skips.append(x) 108 | x = self.maxpool(x) 109 | # print("maxpool block {} = {}".format(i,x.shape)) 110 | 111 | return skips 112 | 113 | class Decoder(nn.Module): 114 | 115 | def __init__(self, name = "output", scales = range(1), num_convs = 4, init_channels=32, out_channels = 1): 116 | """[UNet Decoder for multi-headed output] 117 | 118 | Args: 119 | scales (list(int), optional): [scales to get output]. Defaults to [0]. 120 | num_convs (int, optional): [number of up/down levels]. Defaults to 4. 121 | init_channels (int, optional): [initial number of encoding channels]. Defaults to 32. 122 | out_channels (int, optional): [number of channels in the output]. Defaults to 1. 123 | name (str, optional): [name of the output]. Defaults to "output". 124 | """ 125 | super(Decoder,self).__init__() 126 | self.channels = [init_channels*2**(i) for i in range(0,num_convs+1)] # [32,64,128,256,512] 127 | self.channels = self.channels[::-1] # Reverse the list to up sample in opposite way # [512,256,128,64,32] 128 | self.scales = scales 129 | self.num_convs = num_convs 130 | self.name = name 131 | self.up_convs = nn.ModuleList([nn.Sequential(nn.ConvTranspose2d(in_channels=self.channels[i], out_channels=self.channels[i+1], kernel_size=2, 132 | stride=2), nn.BatchNorm2d(self.channels[i+1])) for i in range(len(self.channels)-1)]) # [(512->256),(256->128),(128->64),(64->32)] 133 | 134 | # [(256+256 -> 256),(128+128 -> 128),(64+64 -> 64),(32+32 -> 32)] = [(512 -> 256),(256 -> 128),(128 -> 64),(64 -> 32)] 135 | self.conv_blocks = nn.ModuleList([ConvBlock( 136 | self.channels[i], self.channels[i+1]) for i in range(len(self.channels)-1)]) 137 | self.out_convs = nn.ModuleList([Conv1x1(in_channels=self.channels[-( 138 | s+1)], out_channels=out_channels) for s in self.scales]) # in_channels = [32,64,128,256] 139 | 140 | def forward(self,encoder_feats): 141 | output = {} 142 | x = encoder_feats[-1] 143 | 144 | for i in range(len(self.channels)-1): 145 | # print("input shape = {}".format(x.shape)) 146 | x = self.up_convs[i](x) 147 | # print("upsample shape = {}".format(x.shape)) 148 | enc_ftrs = encoder_feats[-(i+2)] 149 | x = torch.cat([x,enc_ftrs],dim=1) 150 | # print("concat shape = {}".format(x.shape)) 151 | x = self.conv_blocks[i](x) 152 | # print("up conv shape = {}".format(x.shape)) 153 | curr_scale = self.num_convs-i-1 154 | if curr_scale in self.scales: 155 | output[(self.name,curr_scale)] = self.out_convs[curr_scale](x) 156 | # print("output shape = {}".format(output[(self.name,curr_scale)].shape)) 157 | return output 158 | 159 | 160 | -------------------------------------------------------------------------------- /src/networks/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet_encoder import ResnetEncoder 2 | from .depth_decoder import DepthDecoder 3 | from .pose_decoder import PoseDecoder 4 | from .pose_cnn import PoseCNN 5 | 6 | from .depth.DepthResNet import DepthResNet 7 | from .depth.PackNet01 import PackNet01 8 | from .depth.PackNetSlim01 import PackNetSlim01 9 | from .depth.PackNetSlim01MultiDecoder import PackNetSlim01MultiDecoder 10 | from .UNet import Encoder, Decoder 11 | # from .depth.PackNetSlim01MultiOutput import PackNetSlim01MultiOutput 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/networks/depth/DepthResNet.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch.nn as nn 4 | from functools import partial 5 | 6 | from networks.layers.resnet.resnet_encoder import ResnetEncoder 7 | from networks.layers.resnet.depth_decoder import DepthDecoder 8 | from networks.layers.resnet.layers import disp_to_depth 9 | 10 | ######################################################################################################################## 11 | 12 | class DepthResNet(nn.Module): 13 | """ 14 | Inverse depth network based on the ResNet architecture. 15 | 16 | Parameters 17 | ---------- 18 | version : str 19 | Has a XY format, where: 20 | X is the number of residual layers [18, 34, 50] and 21 | Y is an optional ImageNet pretrained flag added by the "pt" suffix 22 | Example: "18pt" initializes a pretrained ResNet18, and "34" initializes a ResNet34 from scratch 23 | kwargs : dict 24 | Extra parameters 25 | """ 26 | def __init__(self, version=None, **kwargs): 27 | super().__init__() 28 | assert version is not None, "DispResNet needs a version" 29 | 30 | num_layers = int(version[:2]) # First two characters are the number of layers 31 | pretrained = version[2:] == 'pt' # If the last characters are "pt", use ImageNet pretraining 32 | assert num_layers in [18, 34, 50], 'ResNet version {} not available'.format(num_layers) 33 | 34 | self.encoder = ResnetEncoder(num_layers=num_layers, pretrained=pretrained) 35 | self.decoder = DepthDecoder(num_ch_enc=self.encoder.num_ch_enc) 36 | self.scale_inv_depth = partial(disp_to_depth, min_depth=0.1, max_depth=100.0) 37 | 38 | def forward(self, x): 39 | """ 40 | Runs the network and returns inverse depth maps 41 | (4 scales if training and 1 if not). 42 | """ 43 | x = self.encoder(x) 44 | x = self.decoder(x) 45 | disps = [x[('disp', i)] for i in range(4)] 46 | outputs = {} 47 | for i in range(4): 48 | outputs[("disp",i)] = self.scale_inv_depth(disps[i])[0] 49 | return outputs 50 | # if self.training: 51 | # return [self.scale_inv_depth(d)[0] for d in disps] 52 | # else: 53 | # return self.scale_inv_depth(disps[0])[0] 54 | 55 | ######################################################################################################################## 56 | -------------------------------------------------------------------------------- /src/networks/depth/PackNet01.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | import torch.nn as nn 5 | from networks.layers.packnet.layers01 import \ 6 | PackLayerConv3d, UnpackLayerConv3d, Conv2D, ResidualBlock, InvDepth 7 | 8 | class PackNet01(nn.Module): 9 | """ 10 | PackNet network with 3d convolutions (version 01, from the CVPR paper). 11 | 12 | https://arxiv.org/abs/1905.02693 13 | 14 | Parameters 15 | ---------- 16 | dropout : float 17 | Dropout value to use 18 | version : str 19 | Has a XY format, where: 20 | X controls upsampling variations (not used at the moment). 21 | Y controls feature stacking (A for concatenation and B for addition) 22 | kwargs : dict 23 | Extra parameters 24 | """ 25 | def __init__(self, dropout=None, version=None, **kwargs): 26 | super().__init__() 27 | self.version = version[1:] 28 | # Input/output channels 29 | in_channels = 3 30 | out_channels = 1 31 | # Hyper-parameters 32 | ni, no = 64, out_channels 33 | n1, n2, n3, n4, n5 = 64, 64, 128, 256, 512 34 | num_blocks = [2, 2, 3, 3] 35 | pack_kernel = [5, 3, 3, 3, 3] 36 | unpack_kernel = [3, 3, 3, 3, 3] 37 | iconv_kernel = [3, 3, 3, 3, 3] 38 | # Initial convolutional layer 39 | self.pre_calc = Conv2D(in_channels, ni, 5, 1) 40 | # Support for different versions 41 | if self.version == 'A': # Channel concatenation 42 | n1o, n1i = n1, n1 + ni + no 43 | n2o, n2i = n2, n2 + n1 + no 44 | n3o, n3i = n3, n3 + n2 + no 45 | n4o, n4i = n4, n4 + n3 46 | n5o, n5i = n5, n5 + n4 47 | elif self.version == 'B': # Channel addition 48 | n1o, n1i = n1, n1 + no 49 | n2o, n2i = n2, n2 + no 50 | n3o, n3i = n3//2, n3//2 + no 51 | n4o, n4i = n4//2, n4//2 52 | n5o, n5i = n5//2, n5//2 53 | else: 54 | raise ValueError('Unknown PackNet version {}'.format(version)) 55 | 56 | # Encoder 57 | 58 | self.pack1 = PackLayerConv3d(n1, pack_kernel[0]) 59 | self.pack2 = PackLayerConv3d(n2, pack_kernel[1]) 60 | self.pack3 = PackLayerConv3d(n3, pack_kernel[2]) 61 | self.pack4 = PackLayerConv3d(n4, pack_kernel[3]) 62 | self.pack5 = PackLayerConv3d(n5, pack_kernel[4]) 63 | 64 | self.conv1 = Conv2D(ni, n1, 7, 1) 65 | self.conv2 = ResidualBlock(n1, n2, num_blocks[0], 1, dropout=dropout) 66 | self.conv3 = ResidualBlock(n2, n3, num_blocks[1], 1, dropout=dropout) 67 | self.conv4 = ResidualBlock(n3, n4, num_blocks[2], 1, dropout=dropout) 68 | self.conv5 = ResidualBlock(n4, n5, num_blocks[3], 1, dropout=dropout) 69 | 70 | # Decoder 71 | 72 | self.unpack5 = UnpackLayerConv3d(n5, n5o, unpack_kernel[0]) 73 | self.unpack4 = UnpackLayerConv3d(n5, n4o, unpack_kernel[1]) 74 | self.unpack3 = UnpackLayerConv3d(n4, n3o, unpack_kernel[2]) 75 | self.unpack2 = UnpackLayerConv3d(n3, n2o, unpack_kernel[3]) 76 | self.unpack1 = UnpackLayerConv3d(n2, n1o, unpack_kernel[4]) 77 | 78 | self.iconv5 = Conv2D(n5i, n5, iconv_kernel[0], 1) 79 | self.iconv4 = Conv2D(n4i, n4, iconv_kernel[1], 1) 80 | self.iconv3 = Conv2D(n3i, n3, iconv_kernel[2], 1) 81 | self.iconv2 = Conv2D(n2i, n2, iconv_kernel[3], 1) 82 | self.iconv1 = Conv2D(n1i, n1, iconv_kernel[4], 1) 83 | 84 | # Depth Layers 85 | 86 | self.unpack_disps = nn.PixelShuffle(2) 87 | self.unpack_disp4 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None) 88 | self.unpack_disp3 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None) 89 | self.unpack_disp2 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None) 90 | 91 | self.disp4_layer = InvDepth(n4, out_channels=out_channels) 92 | self.disp3_layer = InvDepth(n3, out_channels=out_channels) 93 | self.disp2_layer = InvDepth(n2, out_channels=out_channels) 94 | self.disp1_layer = InvDepth(n1, out_channels=out_channels) 95 | 96 | self.init_weights() 97 | 98 | def init_weights(self): 99 | """Initializes network weights.""" 100 | for m in self.modules(): 101 | if isinstance(m, (nn.Conv2d, nn.Conv3d)): 102 | nn.init.xavier_uniform_(m.weight) 103 | if m.bias is not None: 104 | m.bias.data.zero_() 105 | 106 | def forward(self, x): 107 | """ 108 | Runs the network and returns inverse depth maps 109 | (4 scales if training and 1 if not). 110 | """ 111 | x = self.pre_calc(x) 112 | 113 | # Encoder 114 | 115 | x1 = self.conv1(x) 116 | x1p = self.pack1(x1) 117 | x2 = self.conv2(x1p) 118 | x2p = self.pack2(x2) 119 | x3 = self.conv3(x2p) 120 | x3p = self.pack3(x3) 121 | x4 = self.conv4(x3p) 122 | x4p = self.pack4(x4) 123 | x5 = self.conv5(x4p) 124 | x5p = self.pack5(x5) 125 | 126 | # Skips 127 | 128 | skip1 = x 129 | skip2 = x1p 130 | skip3 = x2p 131 | skip4 = x3p 132 | skip5 = x4p 133 | 134 | # Decoder 135 | 136 | unpack5 = self.unpack5(x5p) 137 | if self.version == 'A': 138 | concat5 = torch.cat((unpack5, skip5), 1) 139 | else: 140 | concat5 = unpack5 + skip5 141 | iconv5 = self.iconv5(concat5) 142 | 143 | unpack4 = self.unpack4(iconv5) 144 | if self.version == 'A': 145 | concat4 = torch.cat((unpack4, skip4), 1) 146 | else: 147 | concat4 = unpack4 + skip4 148 | iconv4 = self.iconv4(concat4) 149 | disp4 = self.disp4_layer(iconv4) 150 | udisp4 = self.unpack_disp4(disp4) 151 | 152 | unpack3 = self.unpack3(iconv4) 153 | if self.version == 'A': 154 | concat3 = torch.cat((unpack3, skip3, udisp4), 1) 155 | else: 156 | concat3 = torch.cat((unpack3 + skip3, udisp4), 1) 157 | iconv3 = self.iconv3(concat3) 158 | disp3 = self.disp3_layer(iconv3) 159 | udisp3 = self.unpack_disp3(disp3) 160 | 161 | unpack2 = self.unpack2(iconv3) 162 | if self.version == 'A': 163 | concat2 = torch.cat((unpack2, skip2, udisp3), 1) 164 | else: 165 | concat2 = torch.cat((unpack2 + skip2, udisp3), 1) 166 | iconv2 = self.iconv2(concat2) 167 | disp2 = self.disp2_layer(iconv2) 168 | udisp2 = self.unpack_disp2(disp2) 169 | 170 | unpack1 = self.unpack1(iconv2) 171 | if self.version == 'A': 172 | concat1 = torch.cat((unpack1, skip1, udisp2), 1) 173 | else: 174 | concat1 = torch.cat((unpack1 + skip1, udisp2), 1) 175 | iconv1 = self.iconv1(concat1) 176 | disp1 = self.disp1_layer(iconv1) 177 | disps = [disp1, disp2, disp3, disp4] 178 | outputs = {} 179 | for i,disp in enumerate(disps): 180 | outputs[("disp",i)] = disp 181 | return outputs 182 | # if self.training: 183 | # return [disp1, disp2, disp3, disp4] 184 | # else: 185 | # return disp1 186 | -------------------------------------------------------------------------------- /src/networks/depth/PackNetSlim01.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | import torch.nn as nn 5 | from networks.layers.packnet.layers01 import \ 6 | PackLayerConv3d, UnpackLayerConv3d, Conv2D, ResidualBlock, InvDepth 7 | 8 | class PackNetSlim01(nn.Module): 9 | """ 10 | PackNet network with 3d convolutions (version 01, from the CVPR paper). 11 | Slimmer version, with fewer feature channels 12 | 13 | https://arxiv.org/abs/1905.02693 14 | 15 | Parameters 16 | ---------- 17 | dropout : float 18 | Dropout value to use 19 | version : str 20 | Has a XY format, where: 21 | X controls upsampling variations (not used at the moment). 22 | Y controls feature stacking (A for concatenation and B for addition) 23 | kwargs : dict 24 | Extra parameters 25 | """ 26 | def __init__(self, dropout=None, version=None, **kwargs): 27 | super().__init__() 28 | self.version = version[1:] 29 | # Input/output channels 30 | in_channels = 3 31 | out_channels = 1 32 | # Hyper-parameters 33 | ni, no = 32, out_channels 34 | n1, n2, n3, n4, n5 = 32, 64, 128, 256, 512 35 | num_blocks = [2, 2, 3, 3] 36 | pack_kernel = [5, 3, 3, 3, 3] 37 | unpack_kernel = [3, 3, 3, 3, 3] 38 | iconv_kernel = [3, 3, 3, 3, 3] 39 | num_3d_feat = 4 40 | # Initial convolutional layer 41 | self.pre_calc = Conv2D(in_channels, ni, 5, 1) 42 | # Support for different versions 43 | if self.version == 'A': # Channel concatenation 44 | n1o, n1i = n1, n1 + ni + no 45 | n2o, n2i = n2, n2 + n1 + no 46 | n3o, n3i = n3, n3 + n2 + no 47 | n4o, n4i = n4, n4 + n3 48 | n5o, n5i = n5, n5 + n4 49 | elif self.version == 'B': # Channel addition 50 | n1o, n1i = n1, n1 + no 51 | n2o, n2i = n2, n2 + no 52 | n3o, n3i = n3//2, n3//2 + no 53 | n4o, n4i = n4//2, n4//2 54 | n5o, n5i = n5//2, n5//2 55 | else: 56 | raise ValueError('Unknown PackNet version {}'.format(version)) 57 | 58 | # Encoder 59 | 60 | self.pack1 = PackLayerConv3d(n1, pack_kernel[0], d=num_3d_feat) 61 | self.pack2 = PackLayerConv3d(n2, pack_kernel[1], d=num_3d_feat) 62 | self.pack3 = PackLayerConv3d(n3, pack_kernel[2], d=num_3d_feat) 63 | self.pack4 = PackLayerConv3d(n4, pack_kernel[3], d=num_3d_feat) 64 | self.pack5 = PackLayerConv3d(n5, pack_kernel[4], d=num_3d_feat) 65 | 66 | self.conv1 = Conv2D(ni, n1, 7, 1) 67 | self.conv2 = ResidualBlock(n1, n2, num_blocks[0], 1, dropout=dropout) 68 | self.conv3 = ResidualBlock(n2, n3, num_blocks[1], 1, dropout=dropout) 69 | self.conv4 = ResidualBlock(n3, n4, num_blocks[2], 1, dropout=dropout) 70 | self.conv5 = ResidualBlock(n4, n5, num_blocks[3], 1, dropout=dropout) 71 | 72 | # Decoder 73 | 74 | self.unpack5 = UnpackLayerConv3d(n5, n5o, unpack_kernel[0], d=num_3d_feat) 75 | self.unpack4 = UnpackLayerConv3d(n5, n4o, unpack_kernel[1], d=num_3d_feat) 76 | self.unpack3 = UnpackLayerConv3d(n4, n3o, unpack_kernel[2], d=num_3d_feat) 77 | self.unpack2 = UnpackLayerConv3d(n3, n2o, unpack_kernel[3], d=num_3d_feat) 78 | self.unpack1 = UnpackLayerConv3d(n2, n1o, unpack_kernel[4], d=num_3d_feat) 79 | 80 | self.iconv5 = Conv2D(n5i, n5, iconv_kernel[0], 1) 81 | self.iconv4 = Conv2D(n4i, n4, iconv_kernel[1], 1) 82 | self.iconv3 = Conv2D(n3i, n3, iconv_kernel[2], 1) 83 | self.iconv2 = Conv2D(n2i, n2, iconv_kernel[3], 1) 84 | self.iconv1 = Conv2D(n1i, n1, iconv_kernel[4], 1) 85 | 86 | # Depth Layers 87 | 88 | self.unpack_disps = nn.PixelShuffle(2) 89 | self.unpack_disp4 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None) 90 | self.unpack_disp3 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None) 91 | self.unpack_disp2 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None) 92 | 93 | self.disp4_layer = InvDepth(n4, out_channels=out_channels) 94 | self.disp3_layer = InvDepth(n3, out_channels=out_channels) 95 | self.disp2_layer = InvDepth(n2, out_channels=out_channels) 96 | self.disp1_layer = InvDepth(n1, out_channels=out_channels) 97 | 98 | self.init_weights() 99 | 100 | def init_weights(self): 101 | """Initializes network weights.""" 102 | for m in self.modules(): 103 | if isinstance(m, (nn.Conv2d, nn.Conv3d)): 104 | nn.init.xavier_uniform_(m.weight) 105 | if m.bias is not None: 106 | m.bias.data.zero_() 107 | 108 | def forward(self, x): 109 | """ 110 | Runs the network and returns inverse depth maps 111 | (4 scales if training and 1 if not). 112 | """ 113 | x = self.pre_calc(x) 114 | 115 | # Encoder 116 | 117 | x1 = self.conv1(x) 118 | x1p = self.pack1(x1) 119 | x2 = self.conv2(x1p) 120 | x2p = self.pack2(x2) 121 | x3 = self.conv3(x2p) 122 | x3p = self.pack3(x3) 123 | x4 = self.conv4(x3p) 124 | x4p = self.pack4(x4) 125 | x5 = self.conv5(x4p) 126 | x5p = self.pack5(x5) 127 | 128 | # Skips 129 | 130 | skip1 = x 131 | skip2 = x1p 132 | skip3 = x2p 133 | skip4 = x3p 134 | skip5 = x4p 135 | 136 | # Decoder 137 | 138 | unpack5 = self.unpack5(x5p) 139 | if self.version == 'A': 140 | concat5 = torch.cat((unpack5, skip5), 1) 141 | else: 142 | concat5 = unpack5 + skip5 143 | iconv5 = self.iconv5(concat5) 144 | 145 | unpack4 = self.unpack4(iconv5) 146 | if self.version == 'A': 147 | concat4 = torch.cat((unpack4, skip4), 1) 148 | else: 149 | concat4 = unpack4 + skip4 150 | iconv4 = self.iconv4(concat4) 151 | disp4 = self.disp4_layer(iconv4) 152 | udisp4 = self.unpack_disp4(disp4) 153 | 154 | unpack3 = self.unpack3(iconv4) 155 | if self.version == 'A': 156 | concat3 = torch.cat((unpack3, skip3, udisp4), 1) 157 | else: 158 | concat3 = torch.cat((unpack3 + skip3, udisp4), 1) 159 | iconv3 = self.iconv3(concat3) 160 | disp3 = self.disp3_layer(iconv3) 161 | udisp3 = self.unpack_disp3(disp3) 162 | 163 | unpack2 = self.unpack2(iconv3) 164 | if self.version == 'A': 165 | concat2 = torch.cat((unpack2, skip2, udisp3), 1) 166 | else: 167 | concat2 = torch.cat((unpack2 + skip2, udisp3), 1) 168 | iconv2 = self.iconv2(concat2) 169 | disp2 = self.disp2_layer(iconv2) 170 | udisp2 = self.unpack_disp2(disp2) 171 | 172 | unpack1 = self.unpack1(iconv2) 173 | if self.version == 'A': 174 | concat1 = torch.cat((unpack1, skip1, udisp2), 1) 175 | else: 176 | concat1 = torch.cat((unpack1 + skip1, udisp2), 1) 177 | iconv1 = self.iconv1(concat1) 178 | disp1 = self.disp1_layer(iconv1) 179 | 180 | disps = [disp1, disp2, disp3, disp4] 181 | outputs = {} 182 | for i,disp in enumerate(disps): 183 | outputs[("disp",i)] = disp 184 | return outputs 185 | # if self.training: 186 | # return [disp1, disp2, disp3, disp4] 187 | # else: 188 | # return disp1 189 | -------------------------------------------------------------------------------- /src/networks/depth/PackNetSlim01MultiDecoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from networks.layers.packnet.layers01 import PackLayerConv3d, UnpackLayerConv3d, Conv2D, ResidualBlock, InvDepth, activations 4 | 5 | class Decoder(nn.Module): 6 | def __init__(self, name, version, ni, no, n1, n2, n3, n4, n5, unpack_kernel, iconv_kernel, num_3d_feat, out_channels, activation = 'sigmoid', use_batchnorm=False): 7 | super().__init__() 8 | self.name = name 9 | self.version = version 10 | 11 | # Support for different versions 12 | if self.version == 'A': # Channel concatenation 13 | n1o, n1i = n1, n1 + ni + no 14 | n2o, n2i = n2, n2 + n1 + no 15 | n3o, n3i = n3, n3 + n2 + no 16 | n4o, n4i = n4, n4 + n3 17 | n5o, n5i = n5, n5 + n4 18 | elif self.version == 'B': # Channel addition 19 | n1o, n1i = n1, n1 + no 20 | n2o, n2i = n2, n2 + no 21 | n3o, n3i = n3//2, n3//2 + no 22 | n4o, n4i = n4//2, n4//2 23 | n5o, n5i = n5//2, n5//2 24 | else: 25 | raise ValueError('Unknown PackNet version {}'.format(self.version)) 26 | 27 | # Decoder 28 | self.unpack5 = UnpackLayerConv3d(n5, n5o, unpack_kernel[0], d=num_3d_feat) 29 | self.unpack4 = UnpackLayerConv3d(n5, n4o, unpack_kernel[1], d=num_3d_feat) 30 | self.unpack3 = UnpackLayerConv3d(n4, n3o, unpack_kernel[2], d=num_3d_feat) 31 | self.unpack2 = UnpackLayerConv3d(n3, n2o, unpack_kernel[3], d=num_3d_feat) 32 | self.unpack1 = UnpackLayerConv3d(n2, n1o, unpack_kernel[4], d=num_3d_feat) 33 | 34 | self.iconv5 = Conv2D(n5i, n5, iconv_kernel[0], 1, use_batchnorm=use_batchnorm) 35 | self.iconv4 = Conv2D(n4i, n4, iconv_kernel[1], 1, use_batchnorm=use_batchnorm) 36 | self.iconv3 = Conv2D(n3i, n3, iconv_kernel[2], 1, use_batchnorm=use_batchnorm) 37 | self.iconv2 = Conv2D(n2i, n2, iconv_kernel[3], 1, use_batchnorm=use_batchnorm) 38 | self.iconv1 = Conv2D(n1i, n1, iconv_kernel[4], 1, use_batchnorm=use_batchnorm) 39 | 40 | # Depth Layers 41 | 42 | self.unpack_out4 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None) 43 | self.unpack_out3 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None) 44 | self.unpack_out2 = nn.Upsample(scale_factor=2, mode='nearest', align_corners=None) 45 | 46 | self.out4_layer = InvDepth(n4, out_channels=out_channels, activation=activation) 47 | self.out3_layer = InvDepth(n3, out_channels=out_channels, activation=activation) 48 | self.out2_layer = InvDepth(n2, out_channels=out_channels, activation=activation) 49 | self.out1_layer = InvDepth(n1, out_channels=out_channels, activation=activation) 50 | 51 | 52 | def forward(self, x5p, skip1, skip2, skip3, skip4, skip5): 53 | # Decoder 54 | 55 | unpack5 = self.unpack5(x5p) 56 | if self.version == 'A': 57 | concat5 = torch.cat((unpack5, skip5), 1) 58 | else: 59 | concat5 = unpack5 + skip5 60 | iconv5 = self.iconv5(concat5) 61 | 62 | unpack4 = self.unpack4(iconv5) 63 | if self.version == 'A': 64 | concat4 = torch.cat((unpack4, skip4), 1) 65 | else: 66 | concat4 = unpack4 + skip4 67 | iconv4 = self.iconv4(concat4) 68 | out4 = self.out4_layer(iconv4) 69 | uout4 = self.unpack_out4(out4) 70 | 71 | unpack3 = self.unpack3(iconv4) 72 | if self.version == 'A': 73 | concat3 = torch.cat((unpack3, skip3, uout4), 1) 74 | else: 75 | concat3 = torch.cat((unpack3 + skip3, uout4), 1) 76 | iconv3 = self.iconv3(concat3) 77 | out3 = self.out3_layer(iconv3) 78 | uout3 = self.unpack_out3(out3) 79 | 80 | unpack2 = self.unpack2(iconv3) 81 | if self.version == 'A': 82 | concat2 = torch.cat((unpack2, skip2, uout3), 1) 83 | else: 84 | concat2 = torch.cat((unpack2 + skip2, uout3), 1) 85 | iconv2 = self.iconv2(concat2) 86 | out2 = self.out2_layer(iconv2) 87 | uout2 = self.unpack_out2(out2) 88 | 89 | unpack1 = self.unpack1(iconv2) 90 | if self.version == 'A': 91 | concat1 = torch.cat((unpack1, skip1, uout2), 1) 92 | else: 93 | concat1 = torch.cat((unpack1 + skip1, uout2), 1) 94 | iconv1 = self.iconv1(concat1) 95 | out1 = self.out1_layer(iconv1) 96 | 97 | outs = [out1, out2, out3, out4] 98 | outputs = {} 99 | for i,out in enumerate(outs): 100 | outputs[(self.name,i)] = out 101 | return outputs 102 | 103 | class PackNetSlim01MultiDecoder(nn.Module): 104 | """ 105 | PackNet network with 3d convolutions (version 01, from the CVPR paper). 106 | Slimmer version, with fewer feature channels 107 | https://arxiv.org/abs/1905.02693 108 | Parameters 109 | ---------- 110 | dropout : float 111 | Dropout value to use 112 | version : str 113 | Has a XY format, where: 114 | X controls upsampling variations (not used at the moment). 115 | Y controls feature stacking (A for concatenation and B for addition) 116 | kwargs : dict 117 | Extra parameters 118 | """ 119 | def __init__(self, dropout=None, version=None, cycle_loss=False, use_batchnorm = False, **kwargs): 120 | super().__init__() 121 | self.version = version[1:] 122 | name = 'depth' if cycle_loss else 'disp' 123 | 124 | # Input/output channels 125 | in_channels = 3 126 | out_channels = 1 127 | # Hyper-parameters 128 | ni, no = 32, out_channels 129 | n1, n2, n3, n4, n5 = 32, 64, 128, 256, 512 130 | num_blocks = [2, 2, 3, 3] 131 | pack_kernel = [5, 3, 3, 3, 3] 132 | unpack_kernel = [3, 3, 3, 3, 3] 133 | iconv_kernel = [3, 3, 3, 3, 3] 134 | num_3d_feat = 4 135 | # Initial convolutional layer 136 | self.pre_calc = Conv2D(in_channels, ni, 5, 1,use_batchnorm=use_batchnorm) 137 | 138 | 139 | # Encoder 140 | 141 | self.pack1 = PackLayerConv3d(n1, pack_kernel[0], d=num_3d_feat) 142 | self.pack2 = PackLayerConv3d(n2, pack_kernel[1], d=num_3d_feat) 143 | self.pack3 = PackLayerConv3d(n3, pack_kernel[2], d=num_3d_feat) 144 | self.pack4 = PackLayerConv3d(n4, pack_kernel[3], d=num_3d_feat) 145 | self.pack5 = PackLayerConv3d(n5, pack_kernel[4], d=num_3d_feat) 146 | 147 | self.conv1 = Conv2D(ni, n1, 7, 1, use_batchnorm=use_batchnorm) 148 | self.conv2 = ResidualBlock(n1, n2, num_blocks[0], 1, dropout=dropout, use_batchnorm=use_batchnorm) 149 | self.conv3 = ResidualBlock(n2, n3, num_blocks[1], 1, dropout=dropout, use_batchnorm=use_batchnorm) 150 | self.conv4 = ResidualBlock(n3, n4, num_blocks[2], 1, dropout=dropout, use_batchnorm=use_batchnorm) 151 | self.conv5 = ResidualBlock(n4, n5, num_blocks[3], 1, dropout=dropout, use_batchnorm=use_batchnorm) 152 | 153 | # Decoder 154 | 155 | self.depth_decoder = Decoder(name, self.version, ni, no, n1, n2, n3, n4, n5, unpack_kernel, iconv_kernel, num_3d_feat, out_channels, activation='sigmoid', use_batchnorm=use_batchnorm) 156 | self.albedo_decoder = Decoder('albedo', self.version, ni, no, n1, n2, n3, n4, n5, unpack_kernel, iconv_kernel, num_3d_feat, out_channels, activation='sigmoid', use_batchnorm=use_batchnorm) 157 | self.ambient_decoder = Decoder('ambient', self.version, ni, no, n1, n2, n3, n4, n5, unpack_kernel, iconv_kernel, num_3d_feat, out_channels, activation='sigmoid', use_batchnorm=use_batchnorm) 158 | 159 | self.init_weights() 160 | 161 | def init_weights(self): 162 | """Initializes network weights.""" 163 | for m in self.modules(): 164 | if isinstance(m, (nn.Conv2d, nn.Conv3d)): 165 | nn.init.xavier_uniform_(m.weight) 166 | if m.bias is not None: 167 | m.bias.data.zero_() 168 | 169 | def forward(self, x): 170 | """ 171 | Runs the network and returns inverse depth maps 172 | (4 scales if training and 1 if not). 173 | """ 174 | x = self.pre_calc(x) 175 | 176 | # Encoder 177 | 178 | x1 = self.conv1(x) 179 | x1p = self.pack1(x1) 180 | x2 = self.conv2(x1p) 181 | x2p = self.pack2(x2) 182 | x3 = self.conv3(x2p) 183 | x3p = self.pack3(x3) 184 | x4 = self.conv4(x3p) 185 | x4p = self.pack4(x4) 186 | x5 = self.conv5(x4p) 187 | x5p = self.pack5(x5) 188 | 189 | # Skips 190 | 191 | skip1 = x 192 | skip2 = x1p 193 | skip3 = x2p 194 | skip4 = x3p 195 | skip5 = x4p 196 | 197 | # Decoder 198 | outputs = {} 199 | outputs.update(self.depth_decoder(x5p, skip1, skip2, skip3, skip4, skip5)) 200 | outputs.update(self.albedo_decoder(x5p, skip1, skip2, skip3, skip4, skip5)) 201 | outputs.update(self.ambient_decoder(x5p, skip1, skip2, skip3, skip4, skip5)) 202 | 203 | return outputs 204 | 205 | 206 | 207 | if __name__ == '__main__': 208 | import sys 209 | import os 210 | sys.path.append("..") 211 | from layers.packnet.layers01 import PackLayerConv3d, UnpackLayerConv3d, Conv2D, ResidualBlock, InvDepth 212 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 213 | 214 | dropout = 0.5 215 | version = '1A' 216 | model = PackNetSlim01MultiDecoder(dropout,version) 217 | img = torch.rand(1,3, 512, 1024) 218 | outputs = model(img) 219 | print(outputs) -------------------------------------------------------------------------------- /src/networks/depth/depth_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import numpy as np 10 | import torch 11 | import torch.nn as nn 12 | 13 | from collections import OrderedDict 14 | from layers import * 15 | 16 | 17 | class DepthDecoder(nn.Module): 18 | def __init__(self, num_ch_enc, scales=range(4), num_output_channels=1, use_skips=True): 19 | super(DepthDecoder, self).__init__() 20 | 21 | self.num_output_channels = num_output_channels 22 | self.use_skips = use_skips 23 | self.upsample_mode = 'nearest' 24 | self.scales = scales 25 | 26 | self.num_ch_enc = num_ch_enc 27 | self.num_ch_dec = np.array([16, 32, 64, 128, 256]) 28 | 29 | # decoder 30 | self.convs = OrderedDict() 31 | for i in range(4, -1, -1): 32 | # upconv_0 33 | num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1] 34 | num_ch_out = self.num_ch_dec[i] 35 | self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out) 36 | 37 | # upconv_1 38 | num_ch_in = self.num_ch_dec[i] 39 | if self.use_skips and i > 0: 40 | num_ch_in += self.num_ch_enc[i - 1] 41 | num_ch_out = self.num_ch_dec[i] 42 | self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out) 43 | 44 | for s in self.scales: 45 | self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels) 46 | 47 | self.decoder = nn.ModuleList(list(self.convs.values())) 48 | self.sigmoid = nn.Sigmoid() 49 | 50 | def forward(self, input_features): 51 | self.outputs = {} 52 | 53 | # decoder 54 | x = input_features[-1] 55 | for i in range(4, -1, -1): 56 | x = self.convs[("upconv", i, 0)](x) 57 | x = [upsample(x)] 58 | if self.use_skips and i > 0: 59 | x += [input_features[i - 1]] 60 | x = torch.cat(x, 1) 61 | x = self.convs[("upconv", i, 1)](x) 62 | if i in self.scales: 63 | self.outputs[("disp", i)] = self.sigmoid(self.convs[("dispconv", i)](x)) 64 | 65 | return self.outputs 66 | -------------------------------------------------------------------------------- /src/networks/depth_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import numpy as np 10 | import torch 11 | import torch.nn as nn 12 | 13 | from collections import OrderedDict 14 | from layers import * 15 | 16 | 17 | class DepthDecoder(nn.Module): 18 | def __init__(self, num_ch_enc, scales=range(4), num_output_channels=1, use_skips=True): 19 | super(DepthDecoder, self).__init__() 20 | 21 | self.num_output_channels = num_output_channels 22 | self.use_skips = use_skips 23 | self.upsample_mode = 'nearest' 24 | self.scales = scales 25 | 26 | self.num_ch_enc = num_ch_enc 27 | self.num_ch_dec = np.array([16, 32, 64, 128, 256]) 28 | 29 | # decoder 30 | self.convs = OrderedDict() 31 | for i in range(4, -1, -1): 32 | # upconv_0 33 | num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1] 34 | num_ch_out = self.num_ch_dec[i] 35 | self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out) 36 | 37 | # upconv_1 38 | num_ch_in = self.num_ch_dec[i] 39 | if self.use_skips and i > 0: 40 | num_ch_in += self.num_ch_enc[i - 1] 41 | num_ch_out = self.num_ch_dec[i] 42 | self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out) 43 | 44 | for s in self.scales: 45 | self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels) 46 | 47 | self.decoder = nn.ModuleList(list(self.convs.values())) 48 | self.sigmoid = nn.Sigmoid() 49 | 50 | def forward(self, input_features): 51 | self.outputs = {} 52 | 53 | # decoder 54 | x = input_features[-1] 55 | for i in range(4, -1, -1): 56 | x = self.convs[("upconv", i, 0)](x) 57 | x = [upsample(x)] 58 | if self.use_skips and i > 0: 59 | x += [input_features[i - 1]] 60 | x = torch.cat(x, 1) 61 | x = self.convs[("upconv", i, 1)](x) 62 | if i in self.scales: 63 | self.outputs[("disp", i)] = self.sigmoid(self.convs[("dispconv", i)](x)) 64 | 65 | return self.outputs -------------------------------------------------------------------------------- /src/networks/layers/packnet/layers01.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | import torch.nn as nn 5 | from functools import partial 6 | import torch.nn.functional as F 7 | 8 | ######################################################################################################################## 9 | activations = { 10 | 'tanh' : nn.Tanh(), 11 | 'sigmoid': nn.Sigmoid(), 12 | 'relu' : nn.ReLU(inplace=True), 13 | 'linear' : lambda x : x 14 | } 15 | 16 | ######################################################################################################################## 17 | 18 | class Conv2D(nn.Module): 19 | """ 20 | 2D convolution with GroupNorm and ELU 21 | Parameters 22 | ---------- 23 | in_channels : int 24 | Number of input channels 25 | out_channels : int 26 | Number of output channels 27 | kernel_size : int 28 | Kernel size 29 | stride : int 30 | Stride 31 | """ 32 | def __init__(self, in_channels, out_channels, kernel_size, stride, use_batchnorm=False): 33 | super().__init__() 34 | self.kernel_size = kernel_size 35 | self.conv_base = nn.Conv2d( 36 | in_channels, out_channels, kernel_size=kernel_size, stride=stride) 37 | self.pad = nn.ConstantPad2d([kernel_size // 2] * 4, value=0) 38 | if not use_batchnorm: 39 | self.normalize = torch.nn.GroupNorm(16, out_channels) 40 | else: 41 | self.normalize = torch.nn.BatchNorm2d(out_channels) 42 | self.activ = nn.ELU(inplace=True) 43 | 44 | def forward(self, x): 45 | """Runs the Conv2D layer.""" 46 | x = self.conv_base(self.pad(x)) 47 | return self.activ(self.normalize(x)) 48 | 49 | 50 | class ResidualConv(nn.Module): 51 | """2D Convolutional residual block with GroupNorm and ELU""" 52 | def __init__(self, in_channels, out_channels, stride, dropout=None, use_batchnorm=False): 53 | """ 54 | Initializes a ResidualConv object. 55 | Parameters 56 | ---------- 57 | in_channels : int 58 | Number of input channels 59 | out_channels : int 60 | Number of output channels 61 | stride : int 62 | Stride 63 | dropout : float 64 | Dropout value 65 | """ 66 | super().__init__() 67 | self.conv1 = Conv2D(in_channels, out_channels, 3, stride) 68 | self.conv2 = Conv2D(out_channels, out_channels, 3, 1) 69 | self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride) 70 | if not use_batchnorm: 71 | self.normalize = torch.nn.GroupNorm(16, out_channels) 72 | else: 73 | self.normalize = torch.nn.BatchNorm2d(out_channels) 74 | self.activ = nn.ELU(inplace=True) 75 | 76 | if dropout: 77 | self.conv3 = nn.Sequential(self.conv3, nn.Dropout2d(dropout)) 78 | 79 | def forward(self, x): 80 | """Runs the ResidualConv layer.""" 81 | x_out = self.conv1(x) 82 | x_out = self.conv2(x_out) 83 | shortcut = self.conv3(x) 84 | return self.activ(self.normalize(x_out + shortcut)) 85 | 86 | 87 | def ResidualBlock(in_channels, out_channels, num_blocks, stride, dropout=None, use_batchnorm=False): 88 | """ 89 | Returns a ResidualBlock with various ResidualConv layers. 90 | Parameters 91 | ---------- 92 | in_channels : int 93 | Number of input channels 94 | out_channels : int 95 | Number of output channels 96 | num_blocks : int 97 | Number of residual blocks 98 | stride : int 99 | Stride 100 | dropout : float 101 | Dropout value 102 | """ 103 | layers = [ResidualConv(in_channels, out_channels, stride, dropout=dropout,use_batchnorm=use_batchnorm)] 104 | for i in range(1, num_blocks): 105 | layers.append(ResidualConv(out_channels, out_channels, 1, dropout=dropout)) 106 | return nn.Sequential(*layers) 107 | 108 | 109 | class InvDepth(nn.Module): 110 | """Inverse depth layer""" 111 | def __init__(self, in_channels, out_channels=1, min_depth=0.5, activation = 'sigmoid'): 112 | """ 113 | Initializes an InvDepth object. 114 | Parameters 115 | ---------- 116 | in_channels : int 117 | Number of input channels 118 | out_channels : int 119 | Number of output channels 120 | min_depth : float 121 | Minimum depth value to calculate 122 | """ 123 | super().__init__() 124 | self.min_depth = min_depth 125 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1) 126 | self.pad = nn.ConstantPad2d([1] * 4, value=0) 127 | # self.activ = nn.Sigmoid() 128 | self.activ = activations[activation] # changed it to dict because for albedo and ambient should use linear activations 129 | 130 | def forward(self, x): 131 | """Runs the InvDepth layer.""" 132 | x = self.conv1(self.pad(x)) 133 | return self.activ(x) / self.min_depth 134 | 135 | ######################################################################################################################## 136 | 137 | def packing(x, r=2): 138 | """ 139 | Takes a [B,C,H,W] tensor and returns a [B,(r^2)C,H/r,W/r] tensor, by concatenating 140 | neighbor spatial pixels as extra channels. It is the inverse of nn.PixelShuffle 141 | (if you apply both sequentially you should get the same tensor) 142 | Parameters 143 | ---------- 144 | x : torch.Tensor [B,C,H,W] 145 | Input tensor 146 | r : int 147 | Packing ratio 148 | Returns 149 | ------- 150 | out : torch.Tensor [B,(r^2)C,H/r,W/r] 151 | Packed tensor 152 | """ 153 | b, c, h, w = x.shape 154 | out_channel = c * (r ** 2) 155 | out_h, out_w = h // r, w // r 156 | x = x.contiguous().view(b, c, out_h, r, out_w, r) 157 | return x.permute(0, 1, 3, 5, 2, 4).contiguous().view(b, out_channel, out_h, out_w) 158 | 159 | ######################################################################################################################## 160 | 161 | class PackLayerConv2d(nn.Module): 162 | """ 163 | Packing layer with 2d convolutions. Takes a [B,C,H,W] tensor, packs it 164 | into [B,(r^2)C,H/r,W/r] and then convolves it to produce [B,C,H/r,W/r]. 165 | """ 166 | def __init__(self, in_channels, kernel_size, r=2): 167 | """ 168 | Initializes a PackLayerConv2d object. 169 | Parameters 170 | ---------- 171 | in_channels : int 172 | Number of input channels 173 | kernel_size : int 174 | Kernel size 175 | r : int 176 | Packing ratio 177 | """ 178 | super().__init__() 179 | self.conv = Conv2D(in_channels * (r ** 2), in_channels, kernel_size, 1) 180 | self.pack = partial(packing, r=r) 181 | 182 | def forward(self, x): 183 | """Runs the PackLayerConv2d layer.""" 184 | x = self.pack(x) 185 | x = self.conv(x) 186 | return x 187 | 188 | 189 | class UnpackLayerConv2d(nn.Module): 190 | """ 191 | Unpacking layer with 2d convolutions. Takes a [B,C,H,W] tensor, convolves it 192 | to produce [B,(r^2)C,H,W] and then unpacks it to produce [B,C,rH,rW]. 193 | """ 194 | def __init__(self, in_channels, out_channels, kernel_size, r=2): 195 | """ 196 | Initializes a UnpackLayerConv2d object. 197 | Parameters 198 | ---------- 199 | in_channels : int 200 | Number of input channels 201 | out_channels : int 202 | Number of output channels 203 | kernel_size : int 204 | Kernel size 205 | r : int 206 | Packing ratio 207 | """ 208 | super().__init__() 209 | self.conv = Conv2D(in_channels, out_channels * (r ** 2), kernel_size, 1) 210 | self.unpack = nn.PixelShuffle(r) 211 | 212 | def forward(self, x): 213 | """Runs the UnpackLayerConv2d layer.""" 214 | x = self.conv(x) 215 | x = self.unpack(x) 216 | return x 217 | 218 | ######################################################################################################################## 219 | 220 | class PackLayerConv3d(nn.Module): 221 | """ 222 | Packing layer with 3d convolutions. Takes a [B,C,H,W] tensor, packs it 223 | into [B,(r^2)C,H/r,W/r] and then convolves it to produce [B,C,H/r,W/r]. 224 | """ 225 | def __init__(self, in_channels, kernel_size, r=2, d=8): 226 | """ 227 | Initializes a PackLayerConv3d object. 228 | Parameters 229 | ---------- 230 | in_channels : int 231 | Number of input channels 232 | kernel_size : int 233 | Kernel size 234 | r : int 235 | Packing ratio 236 | d : int 237 | Number of 3D features 238 | """ 239 | super().__init__() 240 | self.conv = Conv2D(in_channels * (r ** 2) * d, in_channels, kernel_size, 1) 241 | self.pack = partial(packing, r=r) 242 | self.conv3d = nn.Conv3d(1, d, kernel_size=(3, 3, 3), 243 | stride=(1, 1, 1), padding=(1, 1, 1)) 244 | 245 | def forward(self, x): 246 | """Runs the PackLayerConv3d layer.""" 247 | x = self.pack(x) 248 | x = x.unsqueeze(1) 249 | x = self.conv3d(x) 250 | b, c, d, h, w = x.shape 251 | x = x.view(b, c * d, h, w) 252 | x = self.conv(x) 253 | return x 254 | 255 | 256 | class UnpackLayerConv3d(nn.Module): 257 | """ 258 | Unpacking layer with 3d convolutions. Takes a [B,C,H,W] tensor, convolves it 259 | to produce [B,(r^2)C,H,W] and then unpacks it to produce [B,C,rH,rW]. 260 | """ 261 | def __init__(self, in_channels, out_channels, kernel_size, r=2, d=8): 262 | """ 263 | Initializes a UnpackLayerConv3d object. 264 | Parameters 265 | ---------- 266 | in_channels : int 267 | Number of input channels 268 | out_channels : int 269 | Number of output channels 270 | kernel_size : int 271 | Kernel size 272 | r : int 273 | Packing ratio 274 | d : int 275 | Number of 3D features 276 | """ 277 | super().__init__() 278 | self.conv = Conv2D(in_channels, out_channels * (r ** 2) // d, kernel_size, 1) 279 | self.unpack = nn.PixelShuffle(r) 280 | self.conv3d = nn.Conv3d(1, d, kernel_size=(3, 3, 3), 281 | stride=(1, 1, 1), padding=(1, 1, 1)) 282 | 283 | def forward(self, x): 284 | """Runs the UnpackLayerConv3d layer.""" 285 | x = self.conv(x) 286 | x = x.unsqueeze(1) 287 | x = self.conv3d(x) 288 | b, c, d, h, w = x.shape 289 | x = x.view(b, c * d, h, w) 290 | x = self.unpack(x) 291 | return x 292 | 293 | ######################################################################################################################## -------------------------------------------------------------------------------- /src/networks/layers/resnet/depth_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | # Adapted from monodepth2 4 | # https://github.com/nianticlabs/monodepth2/blob/master/networks/depth_decoder.py 5 | 6 | from __future__ import absolute_import, division, print_function 7 | 8 | import numpy as np 9 | import torch 10 | import torch.nn as nn 11 | 12 | from collections import OrderedDict 13 | from .layers import ConvBlock, Conv3x3, upsample 14 | 15 | 16 | class DepthDecoder(nn.Module): 17 | def __init__(self, num_ch_enc, scales=range(4), num_output_channels=1, use_skips=True): 18 | super(DepthDecoder, self).__init__() 19 | 20 | self.num_output_channels = num_output_channels 21 | self.use_skips = use_skips 22 | self.upsample_mode = 'nearest' 23 | self.scales = scales 24 | 25 | self.num_ch_enc = num_ch_enc 26 | self.num_ch_dec = np.array([16, 32, 64, 128, 256]) 27 | 28 | # decoder 29 | self.convs = OrderedDict() 30 | for i in range(4, -1, -1): 31 | # upconv_0 32 | num_ch_in = self.num_ch_enc[-1] if i == 4 else self.num_ch_dec[i + 1] 33 | num_ch_out = self.num_ch_dec[i] 34 | self.convs[("upconv", i, 0)] = ConvBlock(num_ch_in, num_ch_out) 35 | 36 | # upconv_1 37 | num_ch_in = self.num_ch_dec[i] 38 | if self.use_skips and i > 0: 39 | num_ch_in += self.num_ch_enc[i - 1] 40 | num_ch_out = self.num_ch_dec[i] 41 | self.convs[("upconv", i, 1)] = ConvBlock(num_ch_in, num_ch_out) 42 | 43 | for s in self.scales: 44 | self.convs[("dispconv", s)] = Conv3x3(self.num_ch_dec[s], self.num_output_channels) 45 | 46 | self.decoder = nn.ModuleList(list(self.convs.values())) 47 | self.sigmoid = nn.Sigmoid() 48 | 49 | def forward(self, input_features): 50 | self.outputs = {} 51 | 52 | # decoder 53 | x = input_features[-1] 54 | for i in range(4, -1, -1): 55 | x = self.convs[("upconv", i, 0)](x) 56 | x = [upsample(x)] 57 | if self.use_skips and i > 0: 58 | x += [input_features[i - 1]] 59 | x = torch.cat(x, 1) 60 | x = self.convs[("upconv", i, 1)](x) 61 | if i in self.scales: 62 | self.outputs[("disp", i)] = self.sigmoid(self.convs[("dispconv", i)](x)) 63 | 64 | return self.outputs -------------------------------------------------------------------------------- /src/networks/layers/resnet/layers.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | # Adapted from monodepth2 4 | # https://github.com/nianticlabs/monodepth2/blob/master/layers.py 5 | 6 | from __future__ import absolute_import, division, print_function 7 | 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | 12 | def disp_to_depth(disp, min_depth, max_depth): 13 | """Convert network's sigmoid output into depth prediction 14 | The formula for this conversion is given in the 'additional considerations' 15 | section of the paper. 16 | """ 17 | min_disp = 1 / max_depth 18 | max_disp = 1 / min_depth 19 | scaled_disp = min_disp + (max_disp - min_disp) * disp 20 | depth = 1 / scaled_disp 21 | return scaled_disp, depth 22 | 23 | 24 | class ConvBlock(nn.Module): 25 | """Layer to perform a convolution followed by ELU 26 | """ 27 | def __init__(self, in_channels, out_channels): 28 | super(ConvBlock, self).__init__() 29 | 30 | self.conv = Conv3x3(in_channels, out_channels) 31 | self.nonlin = nn.ELU(inplace=True) 32 | 33 | def forward(self, x): 34 | out = self.conv(x) 35 | out = self.nonlin(out) 36 | return out 37 | 38 | 39 | class Conv3x3(nn.Module): 40 | """Layer to pad and convolve input 41 | """ 42 | def __init__(self, in_channels, out_channels, use_refl=True): 43 | super(Conv3x3, self).__init__() 44 | 45 | if use_refl: 46 | self.pad = nn.ReflectionPad2d(1) 47 | else: 48 | self.pad = nn.ZeroPad2d(1) 49 | self.conv = nn.Conv2d(int(in_channels), int(out_channels), 3) 50 | 51 | def forward(self, x): 52 | out = self.pad(x) 53 | out = self.conv(out) 54 | return out 55 | 56 | 57 | def upsample(x): 58 | """Upsample input tensor by a factor of 2 59 | """ 60 | return F.interpolate(x, scale_factor=2, mode="nearest") -------------------------------------------------------------------------------- /src/networks/layers/resnet/resnet_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import numpy as np 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torchvision.models as models 14 | import torch.utils.model_zoo as model_zoo 15 | 16 | 17 | class ResNetMultiImageInput(models.ResNet): 18 | """Constructs a resnet model with varying number of input images. 19 | Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py 20 | """ 21 | def __init__(self, block, layers, num_classes=1000, num_input_images=1): 22 | super(ResNetMultiImageInput, self).__init__(block, layers) 23 | self.inplanes = 64 24 | self.conv1 = nn.Conv2d( 25 | num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False) 26 | self.bn1 = nn.BatchNorm2d(64) 27 | self.relu = nn.ReLU(inplace=True) 28 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 29 | self.layer1 = self._make_layer(block, 64, layers[0]) 30 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 31 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 32 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 33 | 34 | for m in self.modules(): 35 | if isinstance(m, nn.Conv2d): 36 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 37 | elif isinstance(m, nn.BatchNorm2d): 38 | nn.init.constant_(m.weight, 1) 39 | nn.init.constant_(m.bias, 0) 40 | 41 | 42 | def resnet_multiimage_input(num_layers, pretrained=False, num_input_images=1): 43 | """Constructs a ResNet model. 44 | Args: 45 | num_layers (int): Number of resnet layers. Must be 18 or 50 46 | pretrained (bool): If True, returns a model pre-trained on ImageNet 47 | num_input_images (int): Number of frames stacked as input 48 | """ 49 | assert num_layers in [18, 50], "Can only run with 18 or 50 layer resnet" 50 | blocks = {18: [2, 2, 2, 2], 50: [3, 4, 6, 3]}[num_layers] 51 | block_type = {18: models.resnet.BasicBlock, 50: models.resnet.Bottleneck}[num_layers] 52 | model = ResNetMultiImageInput(block_type, blocks, num_input_images=num_input_images) 53 | 54 | if pretrained: 55 | loaded = model_zoo.load_url(models.resnet.model_urls['resnet{}'.format(num_layers)]) 56 | loaded['conv1.weight'] = torch.cat( 57 | [loaded['conv1.weight']] * num_input_images, 1) / num_input_images 58 | model.load_state_dict(loaded) 59 | return model 60 | 61 | 62 | class ResnetEncoder(nn.Module): 63 | """Pytorch module for a resnet encoder 64 | """ 65 | def __init__(self, num_layers, pretrained, num_input_images=1): 66 | super(ResnetEncoder, self).__init__() 67 | 68 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 69 | 70 | resnets = {18: models.resnet18, 71 | 34: models.resnet34, 72 | 50: models.resnet50, 73 | 101: models.resnet101, 74 | 152: models.resnet152} 75 | 76 | if num_layers not in resnets: 77 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 78 | 79 | if num_input_images > 1: 80 | self.encoder = resnet_multiimage_input(num_layers, pretrained, num_input_images) 81 | else: 82 | self.encoder = resnets[num_layers](pretrained) 83 | 84 | if num_layers > 34: 85 | self.num_ch_enc[1:] *= 4 86 | 87 | def forward(self, input_image): 88 | self.features = [] 89 | x = (input_image - 0.45) / 0.225 90 | x = self.encoder.conv1(x) 91 | x = self.encoder.bn1(x) 92 | self.features.append(self.encoder.relu(x)) 93 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 94 | self.features.append(self.encoder.layer2(self.features[-1])) 95 | self.features.append(self.encoder.layer3(self.features[-1])) 96 | self.features.append(self.encoder.layer4(self.features[-1])) 97 | 98 | return self.features -------------------------------------------------------------------------------- /src/networks/layers/resnet_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import numpy as np 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torchvision.models as models 14 | import torch.utils.model_zoo as model_zoo 15 | 16 | 17 | class ResNetMultiImageInput(models.ResNet): 18 | """Constructs a resnet model with varying number of input images. 19 | Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py 20 | """ 21 | def __init__(self, block, layers, num_classes=1000, num_input_images=1): 22 | super(ResNetMultiImageInput, self).__init__(block, layers) 23 | self.inplanes = 64 24 | self.conv1 = nn.Conv2d( 25 | num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False) 26 | self.bn1 = nn.BatchNorm2d(64) 27 | self.relu = nn.ReLU(inplace=True) 28 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 29 | self.layer1 = self._make_layer(block, 64, layers[0]) 30 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 31 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 32 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 33 | 34 | for m in self.modules(): 35 | if isinstance(m, nn.Conv2d): 36 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 37 | elif isinstance(m, nn.BatchNorm2d): 38 | nn.init.constant_(m.weight, 1) 39 | nn.init.constant_(m.bias, 0) 40 | 41 | 42 | def resnet_multiimage_input(num_layers, pretrained=False, num_input_images=1): 43 | """Constructs a ResNet model. 44 | Args: 45 | num_layers (int): Number of resnet layers. Must be 18 or 50 46 | pretrained (bool): If True, returns a model pre-trained on ImageNet 47 | num_input_images (int): Number of frames stacked as input 48 | """ 49 | assert num_layers in [18, 50], "Can only run with 18 or 50 layer resnet" 50 | blocks = {18: [2, 2, 2, 2], 50: [3, 4, 6, 3]}[num_layers] 51 | block_type = {18: models.resnet.BasicBlock, 50: models.resnet.Bottleneck}[num_layers] 52 | model = ResNetMultiImageInput(block_type, blocks, num_input_images=num_input_images) 53 | 54 | if pretrained: 55 | loaded = model_zoo.load_url(models.resnet.model_urls['resnet{}'.format(num_layers)]) 56 | loaded['conv1.weight'] = torch.cat( 57 | [loaded['conv1.weight']] * num_input_images, 1) / num_input_images 58 | model.load_state_dict(loaded) 59 | return model 60 | 61 | 62 | class ResnetEncoder(nn.Module): 63 | """Pytorch module for a resnet encoder 64 | """ 65 | def __init__(self, num_layers, pretrained, num_input_images=1): 66 | super(ResnetEncoder, self).__init__() 67 | 68 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 69 | 70 | resnets = {18: models.resnet18, 71 | 34: models.resnet34, 72 | 50: models.resnet50, 73 | 101: models.resnet101, 74 | 152: models.resnet152} 75 | 76 | if num_layers not in resnets: 77 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 78 | 79 | if num_input_images > 1: 80 | self.encoder = resnet_multiimage_input(num_layers, pretrained, num_input_images) 81 | else: 82 | self.encoder = resnets[num_layers](pretrained) 83 | 84 | if num_layers > 34: 85 | self.num_ch_enc[1:] *= 4 86 | 87 | def forward(self, input_image): 88 | self.features = [] 89 | x = (input_image - 0.45) / 0.225 90 | x = self.encoder.conv1(x) 91 | x = self.encoder.bn1(x) 92 | self.features.append(self.encoder.relu(x)) 93 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 94 | self.features.append(self.encoder.layer2(self.features[-1])) 95 | self.features.append(self.encoder.layer3(self.features[-1])) 96 | self.features.append(self.encoder.layer4(self.features[-1])) 97 | 98 | return self.features 99 | -------------------------------------------------------------------------------- /src/networks/layers01.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Toyota Research Institute. All rights reserved. 2 | 3 | import torch 4 | import torch.nn as nn 5 | from functools import partial 6 | import torch.nn.functional as F 7 | 8 | ######################################################################################################################## 9 | activations = { 10 | 'tanh' : nn.Tanh(), 11 | 'sigmoid': nn.Sigmoid(), 12 | 'relu' : nn.ReLU(inplace=True), 13 | 'linear' : lambda x : x 14 | } 15 | 16 | class Conv2D(nn.Module): 17 | """ 18 | 2D convolution with GroupNorm and ELU 19 | Parameters 20 | ---------- 21 | in_channels : int 22 | Number of input channels 23 | out_channels : int 24 | Number of output channels 25 | kernel_size : int 26 | Kernel size 27 | stride : int 28 | Stride 29 | """ 30 | def __init__(self, in_channels, out_channels, kernel_size, stride): 31 | super().__init__() 32 | self.kernel_size = kernel_size 33 | self.conv_base = nn.Conv2d( 34 | in_channels, out_channels, kernel_size=kernel_size, stride=stride) 35 | self.pad = nn.ConstantPad2d([kernel_size // 2] * 4, value=0) 36 | self.normalize = torch.nn.GroupNorm(16, out_channels) 37 | self.activ = nn.ELU(inplace=True) 38 | 39 | def forward(self, x): 40 | """Runs the Conv2D layer.""" 41 | x = self.conv_base(self.pad(x)) 42 | return self.activ(self.normalize(x)) 43 | 44 | 45 | class ResidualConv(nn.Module): 46 | """2D Convolutional residual block with GroupNorm and ELU""" 47 | def __init__(self, in_channels, out_channels, stride, dropout=None): 48 | """ 49 | Initializes a ResidualConv object. 50 | Parameters 51 | ---------- 52 | in_channels : int 53 | Number of input channels 54 | out_channels : int 55 | Number of output channels 56 | stride : int 57 | Stride 58 | dropout : float 59 | Dropout value 60 | """ 61 | super().__init__() 62 | self.conv1 = Conv2D(in_channels, out_channels, 3, stride) 63 | self.conv2 = Conv2D(out_channels, out_channels, 3, 1) 64 | self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride) 65 | self.normalize = torch.nn.GroupNorm(16, out_channels) 66 | self.activ = nn.ELU(inplace=True) 67 | 68 | if dropout: 69 | self.conv3 = nn.Sequential(self.conv3, nn.Dropout2d(dropout)) 70 | 71 | def forward(self, x): 72 | """Runs the ResidualConv layer.""" 73 | x_out = self.conv1(x) 74 | x_out = self.conv2(x_out) 75 | shortcut = self.conv3(x) 76 | return self.activ(self.normalize(x_out + shortcut)) 77 | 78 | 79 | def ResidualBlock(in_channels, out_channels, num_blocks, stride, dropout=None): 80 | """ 81 | Returns a ResidualBlock with various ResidualConv layers. 82 | Parameters 83 | ---------- 84 | in_channels : int 85 | Number of input channels 86 | out_channels : int 87 | Number of output channels 88 | num_blocks : int 89 | Number of residual blocks 90 | stride : int 91 | Stride 92 | dropout : float 93 | Dropout value 94 | """ 95 | layers = [ResidualConv(in_channels, out_channels, stride, dropout=dropout)] 96 | for i in range(1, num_blocks): 97 | layers.append(ResidualConv(out_channels, out_channels, 1, dropout=dropout)) 98 | return nn.Sequential(*layers) 99 | 100 | 101 | class InvDepth(nn.Module): 102 | """Inverse depth layer""" 103 | def __init__(self, in_channels, out_channels=1, min_depth=0.5, activation = 'sigmoid'): 104 | """ 105 | Initializes an InvDepth object. 106 | Parameters 107 | ---------- 108 | in_channels : int 109 | Number of input channels 110 | out_channels : int 111 | Number of output channels 112 | min_depth : float 113 | Minimum depth value to calculate 114 | """ 115 | super().__init__() 116 | self.min_depth = min_depth 117 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1) 118 | self.pad = nn.ConstantPad2d([1] * 4, value=0) 119 | # self.activ = nn.Sigmoid() 120 | self.activ = activations[activation] # changed it to dict because for albedo and ambient should use linear activations 121 | 122 | def forward(self, x): 123 | """Runs the InvDepth layer.""" 124 | x = self.conv1(self.pad(x)) 125 | return self.activ(x) / self.min_depth 126 | 127 | ######################################################################################################################## 128 | 129 | def packing(x, r=2): 130 | """ 131 | Takes a [B,C,H,W] tensor and returns a [B,(r^2)C,H/r,W/r] tensor, by concatenating 132 | neighbor spatial pixels as extra channels. It is the inverse of nn.PixelShuffle 133 | (if you apply both sequentially you should get the same tensor) 134 | Parameters 135 | ---------- 136 | x : torch.Tensor [B,C,H,W] 137 | Input tensor 138 | r : int 139 | Packing ratio 140 | Returns 141 | ------- 142 | out : torch.Tensor [B,(r^2)C,H/r,W/r] 143 | Packed tensor 144 | """ 145 | b, c, h, w = x.shape 146 | out_channel = c * (r ** 2) 147 | out_h, out_w = h // r, w // r 148 | x = x.contiguous().view(b, c, out_h, r, out_w, r) 149 | return x.permute(0, 1, 3, 5, 2, 4).contiguous().view(b, out_channel, out_h, out_w) 150 | 151 | ######################################################################################################################## 152 | 153 | class PackLayerConv2d(nn.Module): 154 | """ 155 | Packing layer with 2d convolutions. Takes a [B,C,H,W] tensor, packs it 156 | into [B,(r^2)C,H/r,W/r] and then convolves it to produce [B,C,H/r,W/r]. 157 | """ 158 | def __init__(self, in_channels, kernel_size, r=2): 159 | """ 160 | Initializes a PackLayerConv2d object. 161 | Parameters 162 | ---------- 163 | in_channels : int 164 | Number of input channels 165 | kernel_size : int 166 | Kernel size 167 | r : int 168 | Packing ratio 169 | """ 170 | super().__init__() 171 | self.conv = Conv2D(in_channels * (r ** 2), in_channels, kernel_size, 1) 172 | self.pack = partial(packing, r=r) 173 | 174 | def forward(self, x): 175 | """Runs the PackLayerConv2d layer.""" 176 | x = self.pack(x) 177 | x = self.conv(x) 178 | return x 179 | 180 | 181 | class UnpackLayerConv2d(nn.Module): 182 | """ 183 | Unpacking layer with 2d convolutions. Takes a [B,C,H,W] tensor, convolves it 184 | to produce [B,(r^2)C,H,W] and then unpacks it to produce [B,C,rH,rW]. 185 | """ 186 | def __init__(self, in_channels, out_channels, kernel_size, r=2): 187 | """ 188 | Initializes a UnpackLayerConv2d object. 189 | Parameters 190 | ---------- 191 | in_channels : int 192 | Number of input channels 193 | out_channels : int 194 | Number of output channels 195 | kernel_size : int 196 | Kernel size 197 | r : int 198 | Packing ratio 199 | """ 200 | super().__init__() 201 | self.conv = Conv2D(in_channels, out_channels * (r ** 2), kernel_size, 1) 202 | self.unpack = nn.PixelShuffle(r) 203 | 204 | def forward(self, x): 205 | """Runs the UnpackLayerConv2d layer.""" 206 | x = self.conv(x) 207 | x = self.unpack(x) 208 | return x 209 | 210 | ######################################################################################################################## 211 | 212 | class PackLayerConv3d(nn.Module): 213 | """ 214 | Packing layer with 3d convolutions. Takes a [B,C,H,W] tensor, packs it 215 | into [B,(r^2)C,H/r,W/r] and then convolves it to produce [B,C,H/r,W/r]. 216 | """ 217 | def __init__(self, in_channels, kernel_size, r=2, d=8): 218 | """ 219 | Initializes a PackLayerConv3d object. 220 | Parameters 221 | ---------- 222 | in_channels : int 223 | Number of input channels 224 | kernel_size : int 225 | Kernel size 226 | r : int 227 | Packing ratio 228 | d : int 229 | Number of 3D features 230 | """ 231 | super().__init__() 232 | self.conv = Conv2D(in_channels * (r ** 2) * d, in_channels, kernel_size, 1) 233 | self.pack = partial(packing, r=r) 234 | self.conv3d = nn.Conv3d(1, d, kernel_size=(3, 3, 3), 235 | stride=(1, 1, 1), padding=(1, 1, 1)) 236 | 237 | def forward(self, x): 238 | """Runs the PackLayerConv3d layer.""" 239 | x = self.pack(x) 240 | x = x.unsqueeze(1) 241 | x = self.conv3d(x) 242 | b, c, d, h, w = x.shape 243 | x = x.view(b, c * d, h, w) 244 | x = self.conv(x) 245 | return x 246 | 247 | 248 | class UnpackLayerConv3d(nn.Module): 249 | """ 250 | Unpacking layer with 3d convolutions. Takes a [B,C,H,W] tensor, convolves it 251 | to produce [B,(r^2)C,H,W] and then unpacks it to produce [B,C,rH,rW]. 252 | """ 253 | def __init__(self, in_channels, out_channels, kernel_size, r=2, d=8): 254 | """ 255 | Initializes a UnpackLayerConv3d object. 256 | Parameters 257 | ---------- 258 | in_channels : int 259 | Number of input channels 260 | out_channels : int 261 | Number of output channels 262 | kernel_size : int 263 | Kernel size 264 | r : int 265 | Packing ratio 266 | d : int 267 | Number of 3D features 268 | """ 269 | super().__init__() 270 | self.conv = Conv2D(in_channels, out_channels * (r ** 2) // d, kernel_size, 1) 271 | self.unpack = nn.PixelShuffle(r) 272 | self.conv3d = nn.Conv3d(1, d, kernel_size=(3, 3, 3), 273 | stride=(1, 1, 1), padding=(1, 1, 1)) 274 | 275 | def forward(self, x): 276 | """Runs the UnpackLayerConv3d layer.""" 277 | x = self.conv(x) 278 | x = x.unsqueeze(1) 279 | x = self.conv3d(x) 280 | b, c, d, h, w = x.shape 281 | x = x.view(b, c * d, h, w) 282 | x = self.unpack(x) 283 | return x -------------------------------------------------------------------------------- /src/networks/pose/pose_cnn.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | 13 | class PoseCNN(nn.Module): 14 | def __init__(self, num_input_frames): 15 | super(PoseCNN, self).__init__() 16 | 17 | self.num_input_frames = num_input_frames 18 | 19 | self.convs = {} 20 | self.convs[0] = nn.Conv2d(3 * num_input_frames, 16, 7, 2, 3) 21 | self.convs[1] = nn.Conv2d(16, 32, 5, 2, 2) 22 | self.convs[2] = nn.Conv2d(32, 64, 3, 2, 1) 23 | self.convs[3] = nn.Conv2d(64, 128, 3, 2, 1) 24 | self.convs[4] = nn.Conv2d(128, 256, 3, 2, 1) 25 | self.convs[5] = nn.Conv2d(256, 256, 3, 2, 1) 26 | self.convs[6] = nn.Conv2d(256, 256, 3, 2, 1) 27 | 28 | self.pose_conv = nn.Conv2d(256, 6 * (num_input_frames - 1), 1) 29 | 30 | self.num_convs = len(self.convs) 31 | 32 | self.relu = nn.ReLU(True) 33 | 34 | self.net = nn.ModuleList(list(self.convs.values())) 35 | 36 | def forward(self, out): 37 | 38 | for i in range(self.num_convs): 39 | out = self.convs[i](out) 40 | out = self.relu(out) 41 | 42 | out = self.pose_conv(out) 43 | out = out.mean(3).mean(2) 44 | 45 | out = 0.01 * out.view(-1, self.num_input_frames - 1, 1, 6) 46 | 47 | axisangle = out[..., :3] 48 | translation = out[..., 3:] 49 | 50 | return axisangle, translation 51 | -------------------------------------------------------------------------------- /src/networks/pose/pose_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import torch 10 | import torch.nn as nn 11 | from collections import OrderedDict 12 | 13 | 14 | class PoseDecoder(nn.Module): 15 | def __init__(self, num_ch_enc, num_input_features, num_frames_to_predict_for=None, stride=1): 16 | super(PoseDecoder, self).__init__() 17 | 18 | self.num_ch_enc = num_ch_enc 19 | self.num_input_features = num_input_features 20 | 21 | if num_frames_to_predict_for is None: 22 | num_frames_to_predict_for = num_input_features - 1 23 | self.num_frames_to_predict_for = num_frames_to_predict_for 24 | 25 | self.convs = OrderedDict() 26 | self.convs[("squeeze")] = nn.Conv2d(self.num_ch_enc[-1], 256, 1) 27 | self.convs[("pose", 0)] = nn.Conv2d(num_input_features * 256, 256, 3, stride, 1) 28 | self.convs[("pose", 1)] = nn.Conv2d(256, 256, 3, stride, 1) 29 | self.convs[("pose", 2)] = nn.Conv2d(256, 6 * num_frames_to_predict_for, 1) 30 | 31 | self.relu = nn.ReLU() 32 | 33 | self.net = nn.ModuleList(list(self.convs.values())) 34 | 35 | def forward(self, input_features): 36 | last_features = [f[-1] for f in input_features] 37 | 38 | cat_features = [self.relu(self.convs["squeeze"](f)) for f in last_features] 39 | cat_features = torch.cat(cat_features, 1) 40 | 41 | out = cat_features 42 | for i in range(3): 43 | out = self.convs[("pose", i)](out) 44 | if i != 2: 45 | out = self.relu(out) 46 | 47 | out = out.mean(3).mean(2) 48 | 49 | out = 0.01 * out.view(-1, self.num_frames_to_predict_for, 1, 6) 50 | 51 | axisangle = out[..., :3] 52 | translation = out[..., 3:] 53 | 54 | return axisangle, translation 55 | -------------------------------------------------------------------------------- /src/networks/pose_cnn.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | 13 | class PoseCNN(nn.Module): 14 | def __init__(self, num_input_frames): 15 | super(PoseCNN, self).__init__() 16 | 17 | self.num_input_frames = num_input_frames 18 | 19 | self.convs = {} 20 | self.convs[0] = nn.Conv2d(3 * num_input_frames, 16, 7, 2, 3) 21 | self.convs[1] = nn.Conv2d(16, 32, 5, 2, 2) 22 | self.convs[2] = nn.Conv2d(32, 64, 3, 2, 1) 23 | self.convs[3] = nn.Conv2d(64, 128, 3, 2, 1) 24 | self.convs[4] = nn.Conv2d(128, 256, 3, 2, 1) 25 | self.convs[5] = nn.Conv2d(256, 256, 3, 2, 1) 26 | self.convs[6] = nn.Conv2d(256, 256, 3, 2, 1) 27 | 28 | self.pose_conv = nn.Conv2d(256, 6 * (num_input_frames - 1), 1) 29 | 30 | self.num_convs = len(self.convs) 31 | 32 | self.relu = nn.ReLU(True) 33 | 34 | self.net = nn.ModuleList(list(self.convs.values())) 35 | 36 | def forward(self, out): 37 | 38 | for i in range(self.num_convs): 39 | out = self.convs[i](out) 40 | out = self.relu(out) 41 | 42 | out = self.pose_conv(out) 43 | out = out.mean(3).mean(2) 44 | 45 | out = 0.01 * out.view(-1, self.num_input_frames - 1, 1, 6) 46 | 47 | axisangle = out[..., :3] 48 | translation = out[..., 3:] 49 | 50 | return axisangle, translation 51 | -------------------------------------------------------------------------------- /src/networks/pose_decoder.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import torch 10 | import torch.nn as nn 11 | from collections import OrderedDict 12 | 13 | 14 | class PoseDecoder(nn.Module): 15 | def __init__(self, num_ch_enc, num_input_features, num_frames_to_predict_for=None, stride=1): 16 | super(PoseDecoder, self).__init__() 17 | 18 | self.num_ch_enc = num_ch_enc 19 | self.num_input_features = num_input_features 20 | 21 | if num_frames_to_predict_for is None: 22 | num_frames_to_predict_for = num_input_features - 1 23 | self.num_frames_to_predict_for = num_frames_to_predict_for 24 | 25 | self.convs = OrderedDict() 26 | self.convs[("squeeze")] = nn.Conv2d(self.num_ch_enc[-1], 256, 1) 27 | self.convs[("pose", 0)] = nn.Conv2d(num_input_features * 256, 256, 3, stride, 1) 28 | self.convs[("pose", 1)] = nn.Conv2d(256, 256, 3, stride, 1) 29 | self.convs[("pose", 2)] = nn.Conv2d(256, 6 * num_frames_to_predict_for, 1) 30 | 31 | self.relu = nn.ReLU() 32 | 33 | self.net = nn.ModuleList(list(self.convs.values())) 34 | 35 | def forward(self, input_features): 36 | last_features = [f[-1] for f in input_features] 37 | 38 | cat_features = [self.relu(self.convs["squeeze"](f)) for f in last_features] 39 | cat_features = torch.cat(cat_features, 1) 40 | 41 | out = cat_features 42 | for i in range(3): 43 | out = self.convs[("pose", i)](out) 44 | if i != 2: 45 | out = self.relu(out) 46 | 47 | out = out.mean(3).mean(2) 48 | 49 | out = 0.01 * out.view(-1, self.num_frames_to_predict_for, 1, 6) 50 | 51 | axisangle = out[..., :3] 52 | translation = out[..., 3:] 53 | 54 | return axisangle, translation 55 | -------------------------------------------------------------------------------- /src/networks/resnet_encoder.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import numpy as np 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torchvision.models as models 14 | import torch.utils.model_zoo as model_zoo 15 | 16 | 17 | class ResNetMultiImageInput(models.ResNet): 18 | """Constructs a resnet model with varying number of input images. 19 | Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py 20 | """ 21 | def __init__(self, block, layers, num_classes=1000, num_input_images=1): 22 | super(ResNetMultiImageInput, self).__init__(block, layers) 23 | self.inplanes = 64 24 | self.conv1 = nn.Conv2d( 25 | num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False) 26 | self.bn1 = nn.BatchNorm2d(64) 27 | self.relu = nn.ReLU(inplace=True) 28 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 29 | self.layer1 = self._make_layer(block, 64, layers[0]) 30 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 31 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 32 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 33 | 34 | for m in self.modules(): 35 | if isinstance(m, nn.Conv2d): 36 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 37 | elif isinstance(m, nn.BatchNorm2d): 38 | nn.init.constant_(m.weight, 1) 39 | nn.init.constant_(m.bias, 0) 40 | 41 | 42 | def resnet_multiimage_input(num_layers, pretrained=False, num_input_images=1): 43 | """Constructs a ResNet model. 44 | Args: 45 | num_layers (int): Number of resnet layers. Must be 18 or 50 46 | pretrained (bool): If True, returns a model pre-trained on ImageNet 47 | num_input_images (int): Number of frames stacked as input 48 | """ 49 | assert num_layers in [18, 50], "Can only run with 18 or 50 layer resnet" 50 | blocks = {18: [2, 2, 2, 2], 50: [3, 4, 6, 3]}[num_layers] 51 | block_type = {18: models.resnet.BasicBlock, 50: models.resnet.Bottleneck}[num_layers] 52 | model = ResNetMultiImageInput(block_type, blocks, num_input_images=num_input_images) 53 | 54 | if pretrained: 55 | loaded = model_zoo.load_url(models.resnet.model_urls['resnet{}'.format(num_layers)]) 56 | loaded['conv1.weight'] = torch.cat( 57 | [loaded['conv1.weight']] * num_input_images, 1) / num_input_images 58 | model.load_state_dict(loaded) 59 | return model 60 | 61 | 62 | class ResnetEncoder(nn.Module): 63 | """Pytorch module for a resnet encoder 64 | """ 65 | def __init__(self, num_layers, pretrained, num_input_images=1): 66 | super(ResnetEncoder, self).__init__() 67 | 68 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 69 | 70 | resnets = {18: models.resnet18, 71 | 34: models.resnet34, 72 | 50: models.resnet50, 73 | 101: models.resnet101, 74 | 152: models.resnet152} 75 | 76 | if num_layers not in resnets: 77 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 78 | 79 | if num_input_images > 1: 80 | self.encoder = resnet_multiimage_input(num_layers, pretrained, num_input_images) 81 | else: 82 | self.encoder = resnets[num_layers](pretrained) 83 | 84 | if num_layers > 34: 85 | self.num_ch_enc[1:] *= 4 86 | 87 | def forward(self, input_image): 88 | self.features = [] 89 | x = (input_image - 0.45) / 0.225 90 | x = self.encoder.conv1(x) 91 | x = self.encoder.bn1(x) 92 | self.features.append(self.encoder.relu(x)) 93 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 94 | self.features.append(self.encoder.layer2(self.features[-1])) 95 | self.features.append(self.encoder.layer3(self.features[-1])) 96 | self.features.append(self.encoder.layer4(self.features[-1])) 97 | 98 | return self.features 99 | -------------------------------------------------------------------------------- /src/networks/resnet_encoder2.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | import numpy as np 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torchvision.models as models 14 | import torch.utils.model_zoo as model_zoo 15 | 16 | 17 | class ResNetMultiImageInput(models.ResNet): 18 | """Constructs a resnet model with varying number of input images. 19 | Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py 20 | """ 21 | def __init__(self, block, layers, num_classes=1000, num_input_images=1): 22 | super(ResNetMultiImageInput, self).__init__(block, layers) 23 | self.inplanes = 64 24 | self.conv1 = nn.Conv2d( 25 | num_input_images * 3, 64, kernel_size=7, stride=2, padding=3, bias=False) 26 | self.bn1 = nn.BatchNorm2d(64) 27 | self.relu = nn.ReLU(inplace=True) 28 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 29 | self.layer1 = self._make_layer(block, 64, layers[0]) 30 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 31 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 32 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 33 | 34 | for m in self.modules(): 35 | if isinstance(m, nn.Conv2d): 36 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 37 | elif isinstance(m, nn.BatchNorm2d): 38 | nn.init.constant_(m.weight, 1) 39 | nn.init.constant_(m.bias, 0) 40 | 41 | 42 | def resnet_multiimage_input(num_layers, pretrained=False, num_input_images=1): 43 | """Constructs a ResNet model. 44 | Args: 45 | num_layers (int): Number of resnet layers. Must be 18 or 50 46 | pretrained (bool): If True, returns a model pre-trained on ImageNet 47 | num_input_images (int): Number of frames stacked as input 48 | """ 49 | assert num_layers in [18, 50], "Can only run with 18 or 50 layer resnet" 50 | blocks = {18: [2, 2, 2, 2], 50: [3, 4, 6, 3]}[num_layers] 51 | block_type = {18: models.resnet.BasicBlock, 50: models.resnet.Bottleneck}[num_layers] 52 | model = ResNetMultiImageInput(block_type, blocks, num_input_images=num_input_images) 53 | 54 | if pretrained: 55 | loaded = model_zoo.load_url(models.resnet.model_urls['resnet{}'.format(num_layers)]) 56 | loaded['conv1.weight'] = torch.cat( 57 | [loaded['conv1.weight']] * num_input_images, 1) / num_input_images 58 | model.load_state_dict(loaded) 59 | return model 60 | 61 | 62 | class ResnetEncoder(nn.Module): 63 | """Pytorch module for a resnet encoder 64 | """ 65 | def __init__(self, num_layers, pretrained, num_input_images=1): 66 | super(ResnetEncoder, self).__init__() 67 | 68 | self.num_ch_enc = np.array([64, 64, 128, 256, 512]) 69 | 70 | resnets = {18: models.resnet18, 71 | 34: models.resnet34, 72 | 50: models.resnet50, 73 | 101: models.resnet101, 74 | 152: models.resnet152} 75 | 76 | if num_layers not in resnets: 77 | raise ValueError("{} is not a valid number of resnet layers".format(num_layers)) 78 | 79 | if num_input_images > 1: 80 | self.encoder = resnet_multiimage_input(num_layers, pretrained, num_input_images) 81 | else: 82 | self.encoder = resnets[num_layers](pretrained) 83 | 84 | if num_layers > 34: 85 | self.num_ch_enc[1:] *= 4 86 | 87 | def forward(self, input_image): 88 | self.features = [] 89 | x = (input_image - 0.45) / 0.225 90 | x = self.encoder.conv1(x) 91 | x = self.encoder.bn1(x) 92 | self.features.append(self.encoder.relu(x)) 93 | self.features.append(self.encoder.layer1(self.encoder.maxpool(self.features[-1]))) 94 | self.features.append(self.encoder.layer2(self.features[-1])) 95 | self.features.append(self.encoder.layer3(self.features[-1])) 96 | self.features.append(self.encoder.layer4(self.features[-1])) 97 | 98 | return self.features -------------------------------------------------------------------------------- /src/options.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | import argparse 5 | 6 | class GatedOptions: 7 | 8 | def __init__(self) -> None: 9 | self.parser = argparse.ArgumentParser(description="Depth From Gated Profile Options") 10 | 11 | # PATH options 12 | self.parser.add_argument("--data_dir", 13 | type=str, 14 | required=True, 15 | help="directory gated dataset") 16 | self.parser.add_argument("--log_dir", 17 | type=str, 18 | required=True, 19 | help="directory to store logs") 20 | self.parser.add_argument("--coeff_fpath", 21 | type=str, 22 | required=True, 23 | help="file with stored chebychev coefficients") 24 | self.parser.add_argument("--depth_flat_world_fpath", 25 | type=str, 26 | required=False, 27 | help="path to flat world npz file") 28 | 29 | # TRAINING options 30 | self.parser.add_argument("--model_name", 31 | type=str, 32 | help="the name of the folder to save the model in", 33 | default="gated2gated") 34 | self.parser.add_argument("--model_type", 35 | type=str, 36 | help="model structure to use", 37 | default="multinetwork", 38 | choices=["multinetwork","multioutput"]) 39 | self.parser.add_argument("--depth_model", 40 | type=str, 41 | help="depth model to use", 42 | default="packnet", 43 | choices=["packnet","resnet","packnet_full"]) 44 | self.parser.add_argument("--img_ext", 45 | type=str, 46 | help="image extension to use", 47 | default="png", 48 | choices=["png","tiff"]) 49 | self.parser.add_argument("--exp_num", 50 | type=int, 51 | help="experiment number", 52 | default=-1) 53 | self.parser.add_argument("--exp_name", 54 | type=str, 55 | help="the name of the folder to save the model in", 56 | default="gated2gated") 57 | self.parser.add_argument("--exp_metainfo", 58 | type=str, 59 | default="Main Experiment", 60 | help="additional info regarding experiment") 61 | self.parser.add_argument("--height", 62 | type=int, 63 | default=512, 64 | help="crop height of the image") 65 | self.parser.add_argument("--width", 66 | type=int, 67 | default=1024, 68 | help="crop width of the image") 69 | self.parser.add_argument("--num_bits", 70 | type=int, 71 | help="number of bits for gated image intensity", 72 | default=10) 73 | self.parser.add_argument("--scales", 74 | nargs="+", 75 | type=int, 76 | help="scales used in the loss", 77 | default=[0,1,2,3]) 78 | self.parser.add_argument("--frame_ids", 79 | nargs="+", 80 | type=int, 81 | help="frames to load", 82 | default=[0, -1, 1]) 83 | self.parser.add_argument("--pose_model_type", 84 | type=str, 85 | help="normal or shared", 86 | default="separate_resnet", 87 | choices=["posecnn", "separate_resnet"]) 88 | self.parser.add_argument("--num_layers", 89 | type=int, 90 | help="number of resnet layers", 91 | default=18, 92 | choices=[18, 34, 50, 101, 152]) 93 | self.parser.add_argument("--weights_init", 94 | type=str, 95 | help="pretrained or scratch", 96 | default="pretrained", 97 | choices=["pretrained", "scratch"]) 98 | self.parser.add_argument("--pose_model_input", 99 | type=str, 100 | help="how many images the pose network gets", 101 | default="pairs", 102 | choices=["pairs", "all"]) 103 | self.parser.add_argument("--depth_normalizer", 104 | type=float, 105 | help="constant to normalize depth", 106 | default=150.0) 107 | self.parser.add_argument("--train_depth_normalizer", 108 | action='store_true', 109 | help="train only a single scalar constant,\ 110 | while freezing depth, pose, ambient, and albedo head") 111 | self.parser.add_argument("--min_depth", 112 | type=float, 113 | help="minimum depth", 114 | default=0.1) 115 | self.parser.add_argument("--max_depth", 116 | type=float, 117 | help="maximum depth", 118 | default=100.0) 119 | self.parser.add_argument("--snr_mask", 120 | action='store_true', 121 | help="whether to use SNR based mask for reprojection loss") 122 | self.parser.add_argument("--intensity_mask", 123 | action='store_true', 124 | help="whether to use Intensity based mask for reprojection loss") 125 | self.parser.add_argument("--min_snr_val", 126 | type=float, 127 | default=0.04, 128 | help="Minimum SNR value for SNR mask") 129 | self.parser.add_argument("--dataset", 130 | type=str, 131 | help="dataset to train on", 132 | default="gated", 133 | choices=["gated"]) 134 | self.parser.add_argument("--split", 135 | type=str, 136 | help="which training split to use", 137 | choices=["gated2gated"], 138 | default="gated2gated") 139 | self.parser.add_argument("--dropout", 140 | type=float, 141 | help="dropout rate for packnet", 142 | default=0.5) 143 | self.parser.add_argument("--feat_stack", 144 | type=str, 145 | help="whether to use concatenation(A) or Addition (B)", 146 | default="A", 147 | choices=["A", "B"]) 148 | self.parser.add_argument("--num_convs", 149 | type=int, 150 | help="number of up/down levels in UNet", 151 | default=4) 152 | 153 | # OPTIMIZATION OPTION 154 | self.parser.add_argument("--batch_size", 155 | type=int, 156 | help="batch size", 157 | default=1) 158 | self.parser.add_argument("--learning_rate", 159 | type=float, 160 | help="learning rate", 161 | default=1e-4) 162 | self.parser.add_argument("--start_epoch", 163 | type=int, 164 | help="start epoch to have non-zero starting option for continuing training", 165 | default=0) 166 | self.parser.add_argument("--num_epochs", 167 | type=int, 168 | help="number of epochs", 169 | default=20) 170 | self.parser.add_argument("--scheduler_step_size", 171 | type=int, 172 | help="step size of the scheduler", 173 | default=15) 174 | 175 | # LOADING options 176 | self.parser.add_argument("--load_weights_folder", 177 | type=str, 178 | help="name of model to load") 179 | self.parser.add_argument("--models_to_load", 180 | nargs="+", 181 | type=str, 182 | help="models to load", 183 | default=["depth", "pose_encoder", "pose"]) 184 | 185 | # ABLATION options 186 | self.parser.add_argument("--no_ssim", 187 | action="store_true", 188 | help="if not to use SSIM loss") 189 | self.parser.add_argument("--cycle_loss", 190 | help="if set, cycle loss is used", 191 | action="store_true") 192 | self.parser.add_argument("--cycle_weight", 193 | type=float, 194 | default=0.1, 195 | help="cycle loss weight") 196 | self.parser.add_argument("--temporal_loss", 197 | help="if set, temporal reprojection loss is used", 198 | action="store_true") 199 | self.parser.add_argument("--temporal_weight", 200 | type=float, 201 | default=1.0, 202 | help="temporal loss weight") 203 | self.parser.add_argument("--sim_gated", 204 | action="store_true", 205 | help="whether to generate gated simulation image") 206 | self.parser.add_argument("--disparity_smoothness", 207 | type=float, 208 | default=1e-3, 209 | help="disparity smoothnes weight") 210 | self.parser.add_argument("--v1_multiscale", 211 | help="if set, uses monodepth v1 multiscale", 212 | action="store_true") 213 | self.parser.add_argument("--disable_automasking", 214 | help="if set, doesn't do auto-masking", 215 | action="store_true") 216 | self.parser.add_argument("--avg_reprojection", 217 | help="if set, uses average reprojection loss", 218 | action="store_true") 219 | self.parser.add_argument("--infty_hole_mask", 220 | help="if set, uses a masking scheme to filter out points with infinite depth close to camera", 221 | action="store_true") 222 | self.parser.add_argument("--infty_epoch_start", 223 | type=int, 224 | help="start epoch to use infinity masks", 225 | default=0) 226 | self.parser.add_argument("--close_px_fact", 227 | type=float, 228 | help="factor to select close pixels to the image", 229 | default=0.995) 230 | self.parser.add_argument("--infty_hole_thresh", 231 | type=float, 232 | help="threshold to consider infinity points", 233 | default=0.01) 234 | self.parser.add_argument("--use_batchnorm", 235 | action="store_true", 236 | help="whether to use batchnorm2D in packnet module or not") 237 | self.parser.add_argument("--albedo_offset", 238 | type=float, 239 | default=0.0, 240 | help="constant factor to add to albedo to avoid gradient cutoff") 241 | self.parser.add_argument("--freeze_pose_net", 242 | action="store_true", 243 | help="whether to freeze the training for pose network") 244 | self.parser.add_argument("--clip_depth_grad", 245 | type=float, 246 | default=-1.0, 247 | help="clip depth gradient to a certain value if value > 0") 248 | self.parser.add_argument("--passive_supervision", 249 | action="store_true", 250 | help="supervise learning of passive image with real one") 251 | self.parser.add_argument("--passive_weight", 252 | type=float, 253 | default=0.1, 254 | help="passive supervision loss weight") 255 | 256 | 257 | 258 | # LOGGING options 259 | self.parser.add_argument("--log_frequency", 260 | type=int, 261 | help="number of batches between each tensorboard log", 262 | default=250) 263 | self.parser.add_argument("--chkpt_frequency", 264 | type=int, 265 | help="number of batches between each checkpoint", 266 | default=250) 267 | self.parser.add_argument("--save_frequency", 268 | type=int, 269 | help="number of epochs between each save", 270 | default=1) 271 | 272 | # SYSTEM options 273 | self.parser.add_argument("--no_cuda", 274 | action="store_true", 275 | help="whether to train on cpu") 276 | self.parser.add_argument("--num_workers", 277 | type=int, 278 | help="number of dataloader workers", 279 | default=12) 280 | 281 | def parse(self): 282 | self.options = self.parser.parse_args() 283 | return self.options -------------------------------------------------------------------------------- /src/splits/g2d/real_test_night.txt: -------------------------------------------------------------------------------- 1 | 03644 2 | 13755 3 | 04165 4 | 13006 5 | 09623 6 | 03742 7 | 09270 8 | 00874 9 | 05966 10 | 13029 11 | 12142 12 | 00138 13 | 09779 14 | 07951 15 | 12413 16 | 06752 17 | 11152 18 | 11024 19 | 04942 20 | 00964 21 | 12826 22 | 05791 23 | 09303 24 | 01799 25 | 10436 26 | 06197 27 | 09090 28 | 13733 29 | 11777 30 | 10449 31 | 11209 32 | 03295 33 | 02852 34 | 06895 35 | 06575 36 | 01253 37 | 12354 38 | 04187 39 | 01204 40 | 06426 41 | 00945 42 | 00288 43 | 08479 44 | 00413 45 | 08438 46 | 07191 47 | 09894 48 | 11819 49 | 04479 50 | 02473 51 | 08333 52 | 05552 53 | 03305 54 | 08476 55 | 12835 56 | 07141 57 | 08959 58 | 08752 59 | 12602 60 | 13369 61 | 10802 62 | 08617 63 | 10324 64 | 09828 65 | 07534 66 | 05064 67 | 06681 68 | 06976 69 | 01064 70 | 07021 71 | 08254 72 | 03530 73 | 07456 74 | 07022 75 | 02907 76 | 07417 77 | 00202 78 | 09022 79 | 08031 80 | 00592 81 | 13282 82 | 03480 83 | 05289 84 | 14034 85 | 00443 86 | 07800 87 | 06006 88 | 01195 89 | 09731 90 | 09072 91 | 11721 92 | 06015 93 | 03574 94 | 01188 95 | 00855 96 | 11030 97 | 05426 98 | 03502 99 | 05755 100 | 06326 101 | 04955 102 | 08156 103 | 03622 104 | 12140 105 | 07580 106 | 05063 107 | 08492 108 | 10209 109 | 07182 110 | 10817 111 | 11186 112 | 07814 113 | 01001 114 | 13637 115 | 01040 116 | 01049 117 | 09269 118 | 05045 119 | 04138 120 | 01862 121 | 01813 122 | 10329 123 | 07442 124 | 01137 125 | 01770 126 | 08023 127 | 10462 128 | 13049 129 | 06518 130 | 09079 131 | 02287 132 | 10700 133 | 09373 134 | 00556 135 | 01238 136 | 06025 137 | 02027 138 | 00223 139 | 12565 140 | 08494 141 | 11643 142 | 03613 143 | 07175 144 | 00391 145 | 11109 146 | 09506 147 | 08771 148 | 06085 149 | 06279 150 | 04855 151 | 02216 152 | 03833 153 | 09377 154 | 10346 155 | 12553 156 | 00463 157 | 11377 158 | 12235 159 | 05652 160 | 09985 161 | 00757 162 | 12369 163 | 07703 164 | 03786 165 | 13726 166 | 08196 167 | 01093 168 | 09749 169 | 01155 170 | 06659 171 | 09581 172 | 11536 173 | 02253 174 | 05728 175 | 12657 176 | 05614 177 | 09493 178 | 11420 179 | 04264 180 | 12035 181 | 05004 182 | 03885 183 | 06563 184 | 08454 185 | 01996 186 | 00245 187 | 07184 188 | 11153 189 | 05486 190 | 09535 191 | 01035 192 | 13365 193 | 01812 194 | 09965 195 | 01473 196 | 04584 197 | 10787 198 | 12899 199 | 02646 200 | 03605 201 | 08045 202 | 06802 203 | 00346 204 | 09748 205 | 12089 206 | 07352 207 | 03694 208 | 11317 209 | 11895 210 | 03256 211 | 04938 212 | 09005 213 | 08224 214 | 11114 215 | 13349 216 | 06488 217 | 00049 218 | 07223 219 | 09375 220 | 09781 221 | 01841 222 | 03221 223 | 13339 224 | 06169 225 | 05385 226 | 11710 227 | 10720 228 | 06728 229 | 06259 230 | 05205 231 | 04364 232 | 01741 233 | 01756 234 | 12526 235 | 04836 236 | 11018 237 | 00835 238 | 13400 239 | 09097 240 | 09230 241 | 03873 242 | 09808 243 | 04723 244 | 05113 245 | 08985 246 | 07053 247 | 01157 248 | 08638 249 | 09637 250 | 10099 251 | 13140 252 | 13940 253 | 10851 254 | 09410 255 | 03033 256 | 02863 257 | 06655 258 | 05896 259 | 13051 260 | 12838 261 | 09147 262 | 05663 263 | 07894 264 | 04806 265 | 01976 266 | 06213 267 | 06859 268 | 06520 269 | 13245 270 | 12610 271 | 02681 272 | 02924 273 | 11519 274 | 00038 275 | 01233 276 | 13915 277 | 05760 278 | 07812 279 | 13933 280 | 13898 281 | 06186 282 | 10225 283 | 01207 284 | 03707 285 | 14166 286 | 07843 287 | 08891 288 | 04380 289 | 04746 290 | 01376 291 | 12389 292 | 07981 293 | 07162 294 | 04915 295 | 12400 296 | 10579 297 | 07711 298 | 06458 299 | 11282 300 | 13265 301 | 10913 302 | 02034 303 | 02416 304 | 13126 305 | 00424 306 | 12079 307 | 13845 308 | 13852 309 | 07611 310 | 09666 311 | 04845 312 | 01142 313 | 02259 314 | 07001 315 | 03900 316 | 10155 317 | 09793 318 | 05066 319 | 03593 320 | 07623 321 | 13936 322 | 06072 323 | 10159 324 | 00317 325 | 11598 326 | 00981 327 | 14009 328 | 05003 329 | 11920 330 | 02752 331 | 01790 332 | 05986 333 | 04818 334 | 01044 335 | 08341 336 | 09981 337 | 08839 338 | 03050 339 | 10786 340 | 11028 341 | 00531 342 | 02675 343 | 00262 344 | 07775 345 | 11336 346 | 10316 347 | 12299 348 | 06244 349 | 00931 350 | 13685 351 | 10219 352 | 05023 353 | 02339 354 | 13641 355 | 04106 356 | 00330 357 | 12941 358 | 06863 359 | 10166 360 | 10130 361 | 13743 362 | 05420 363 | 02405 364 | 10707 365 | 00126 366 | 11216 367 | 03405 368 | 08376 369 | 10789 370 | 03532 371 | 05001 372 | 07140 373 | 12867 374 | 11411 375 | 04856 376 | 00411 377 | 06667 378 | 03752 379 | 05255 380 | 03814 381 | 02791 382 | 01763 383 | 12467 384 | 03273 385 | 13213 386 | 03382 387 | 11113 388 | 00515 389 | 12108 390 | 11971 391 | 01472 392 | 02387 393 | 05079 394 | 07507 395 | 08313 396 | 01348 397 | 10147 398 | 03025 399 | 06246 400 | 12932 401 | 11250 402 | 01469 403 | 07201 404 | 08437 405 | 00745 406 | 12272 407 | 00289 408 | 03965 409 | 05790 410 | 05703 411 | 02871 412 | 10342 413 | 04210 414 | 03421 415 | 08741 416 | 09654 417 | 02459 418 | 09372 419 | 13554 420 | 01997 421 | 08838 422 | 02468 423 | 08511 424 | 03464 425 | 04733 426 | 08026 427 | 04554 428 | 13035 429 | 06555 430 | 10526 431 | 14074 432 | 02593 433 | 07626 434 | 05019 435 | 01899 436 | 07226 437 | 01562 438 | 09058 439 | 05425 440 | 05596 441 | 04766 442 | 00769 443 | 03048 444 | 00046 445 | 13899 446 | 01941 447 | 11001 448 | 08750 449 | 00846 450 | 07654 451 | 07503 452 | 06523 453 | 08330 454 | 00956 455 | 09611 456 | 05938 457 | 10542 458 | 04424 459 | 02637 460 | 13355 461 | 11363 462 | 10274 463 | 06691 464 | 00260 465 | 13267 466 | 03330 467 | 13358 468 | 05491 469 | 02081 470 | 08798 471 | 07249 472 | 04778 473 | 05439 474 | 07434 475 | 08169 476 | 12273 477 | 09776 478 | 11376 479 | 11832 480 | 00573 481 | 06249 482 | 03544 483 | 09877 484 | 03261 485 | 08610 486 | 07440 487 | 12300 488 | 04224 489 | 04136 490 | 13048 491 | 07419 492 | 04830 493 | 04058 494 | 09547 495 | 02773 496 | 06528 497 | 04833 498 | 08337 499 | 05917 500 | 13600 501 | 08822 502 | 04320 503 | 12953 504 | 05695 505 | 03385 506 | 14002 507 | 05964 508 | 08911 509 | 00339 510 | 08325 511 | 07567 512 | 03817 513 | 04100 514 | 05237 515 | 04085 516 | 02330 517 | 02554 518 | 13458 519 | 07104 520 | 06208 521 | 03245 522 | 05612 523 | 06032 524 | 07562 525 | 05139 526 | 11664 527 | 13077 528 | 09105 529 | 10463 530 | 02234 531 | 01944 532 | 11882 533 | 06394 534 | 10759 535 | 04756 536 | 12840 537 | 01696 538 | 10265 539 | 12327 540 | 06041 541 | 07121 542 | 00595 543 | 03040 544 | 00553 545 | 10616 546 | 04172 547 | 05397 548 | 05307 549 | 04227 550 | 02621 551 | -------------------------------------------------------------------------------- /src/splits/stf/test_dense_fog_day.txt: -------------------------------------------------------------------------------- 1 | 2018-10-29_16-12-02,01200 2 | 2018-10-29_14-30-29,02460 3 | 2018-10-29_14-30-29,00590 4 | 2018-10-29_14-30-29,00460 5 | 2018-10-29_14-30-29,00910 6 | 2018-10-08_08-10-40,03020 7 | 2018-10-29_14-30-29,00500 8 | 2018-10-08_08-10-40,00650 9 | 2018-10-29_14-30-29,01680 10 | 2018-10-08_08-10-40,03590 11 | 2018-10-08_08-10-40,03050 12 | 2018-10-08_08-10-40,03500 13 | 2018-10-29_14-30-29,02490 14 | 2018-10-29_14-30-29,00600 15 | 2018-10-08_08-18-59,00130 16 | 2018-10-08_08-10-40,03110 17 | 2018-10-29_14-30-29,01560 18 | 2018-10-08_08-10-40,03680 19 | 2018-10-29_14-30-29,00660 20 | 2018-10-08_08-10-40,03370 21 | 2018-10-08_08-10-40,02900 22 | 2018-10-29_14-30-29,02640 23 | 2018-10-08_08-10-40,03200 24 | 2018-10-29_14-30-29,01200 25 | 2018-10-08_08-10-40,02400 26 | 2018-10-08_08-10-40,04200 27 | 2018-10-29_14-30-29,02420 28 | 2018-10-29_14-30-29,01210 29 | 2018-10-29_14-30-29,01030 30 | 2018-10-29_14-30-29,01740 31 | 2018-10-29_14-30-29,02510 32 | 2018-10-29_14-30-29,01730 33 | 2018-10-29_14-30-29,00550 34 | 2018-10-29_14-30-29,01320 35 | 2018-10-29_14-30-29,02520 36 | 2018-10-29_14-30-29,02450 37 | 2018-10-29_14-30-29,02100 38 | 2018-10-08_08-10-40,03150 39 | 2018-10-29_14-30-29,01630 40 | 2018-10-29_15-15-15,01210 41 | 2018-10-08_08-10-40,03230 42 | 2018-10-08_08-18-59,00300 43 | 2018-10-08_08-10-40,03120 44 | 2018-10-29_14-30-29,00630 45 | 2018-10-29_14-30-29,01550 46 | 2018-10-29_14-30-29,01250 47 | 2018-10-29_14-30-29,02190 48 | 2018-10-08_08-10-40,03160 49 | 2018-10-29_14-30-29,02600 50 | 2018-10-29_14-30-29,02350 51 | 2018-10-08_08-10-40,03420 52 | 2018-10-08_08-10-40,02630 53 | 2018-10-29_14-30-29,02070 54 | 2018-10-29_15-15-15,01500 55 | 2018-10-29_14-30-29,01460 56 | 2018-10-08_08-10-40,04700 57 | 2018-10-29_15-37-43,01800 58 | 2018-10-29_15-15-15,01900 59 | 2018-10-29_14-30-29,00450 60 | 2018-10-29_16-12-02,01300 61 | 2018-10-29_14-30-29,00920 62 | 2018-10-29_14-30-29,01770 63 | 2018-10-29_14-30-29,00510 64 | 2018-10-29_14-30-29,01910 65 | 2018-10-29_14-30-29,01690 66 | 2018-10-29_14-30-29,01810 67 | 2018-10-08_08-10-40,03040 68 | 2018-10-29_14-30-29,02480 69 | 2018-10-08_08-18-59,00120 70 | 2018-10-29_14-30-29,01350 71 | 2018-10-29_14-30-29,02250 72 | 2018-10-29_15-15-15,00900 73 | 2018-10-29_15-15-15,01520 74 | 2018-10-08_08-10-40,03300 75 | 2018-10-08_08-10-40,02640 76 | 2018-10-29_14-30-29,01820 77 | 2018-10-08_08-10-40,03260 78 | 2018-10-08_08-18-59,00370 79 | 2018-10-29_14-30-29,02630 80 | 2018-10-08_08-10-40,03600 81 | 2018-10-29_14-30-29,00700 82 | 2018-10-08_08-10-40,03410 83 | 2018-10-08_08-10-40,03460 84 | 2018-10-29_14-30-29,02580 85 | 2018-10-29_14-30-29,00800 86 | 2018-10-29_14-30-29,01920 87 | 2018-10-29_15-37-43,01200 88 | 2018-10-08_08-10-40,02730 89 | 2018-10-29_14-30-29,01000 90 | 2018-10-29_15-15-15,01800 91 | 2018-10-29_14-30-29,01790 92 | 2018-10-29_14-30-29,01720 93 | 2018-10-29_14-30-29,00560 94 | 2018-10-29_14-30-29,00480 95 | 2018-10-29_14-30-29,02440 96 | 2018-10-29_14-30-29,01310 97 | 2018-10-29_14-30-29,01640 98 | 2018-10-08_08-10-40,03000 99 | 2018-10-29_15-15-15,01200 100 | 2018-10-08_08-10-40,03640 101 | 2018-10-08_08-10-40,03520 102 | 2018-10-08_08-10-40,03290 103 | 2018-10-08_08-10-40,02320 104 | 2018-10-29_14-30-29,02200 105 | 2018-10-08_08-10-40,02600 106 | 2018-10-08_08-10-40,03730 107 | 2018-10-08_08-10-40,00520 108 | 2018-10-08_08-10-40,02520 109 | 2018-10-29_15-15-15,01510 110 | 2018-10-08_08-10-40,03100 111 | 2018-10-08_08-10-40,02440 112 | 2018-10-29_15-37-43,01400 113 | 2018-10-29_14-30-29,00930 114 | 2018-10-29_14-30-29,02400 115 | 2018-10-29_15-15-15,01560 116 | 2018-10-29_14-30-29,00520 117 | 2018-10-08_08-10-40,04600 118 | 2018-10-29_14-30-29,01760 119 | 2018-10-08_08-10-40,03330 120 | 2018-10-08_08-10-40,03570 121 | 2018-10-29_14-30-29,01900 122 | 2018-10-29_14-30-29,01340 123 | 2018-10-08_08-18-59,00110 124 | 2018-10-29_14-30-29,01220 125 | 2018-10-29_14-30-29,00620 126 | 2018-10-08_08-10-40,03170 127 | 2018-10-08_08-10-40,04500 128 | 2018-10-08_08-10-40,00510 129 | 2018-10-29_15-15-15,00890 130 | 2018-10-29_14-30-29,00710 131 | 2018-10-08_08-10-40,00670 132 | 2018-10-29_14-30-29,00650 133 | 2018-10-08_08-10-40,03670 134 | 2018-10-08_08-10-40,02950 135 | 2018-10-08_08-10-40,04000 136 | 2018-10-08_08-10-40,03400 137 | 2018-10-08_08-10-40,02550 138 | 2018-10-29_14-30-29,01010 139 | 2018-10-08_08-10-40,03900 140 | 2018-10-29_14-30-29,01400 141 | 2018-10-29_14-30-29,02570 142 | 2018-10-29_14-30-29,01780 143 | 2018-10-29_14-30-29,00580 144 | 2018-10-29_14-30-29,00780 145 | 2018-10-29_14-30-29,01710 146 | 2018-10-29_14-30-29,00900 147 | 2018-10-08_08-10-40,03030 148 | 2018-10-29_14-30-29,02470 149 | 2018-10-29_14-30-29,00570 150 | 2018-10-29_14-30-29,01300 151 | 2018-10-29_14-30-29,01650 152 | 2018-10-29_15-15-15,01220 153 | 2018-10-08_08-10-40,00630 154 | 2018-10-29_14-30-29,00610 155 | 2018-10-08_08-10-40,03530 156 | 2018-10-08_08-18-59,00140 157 | 2018-10-29_15-37-43,02800 158 | 2018-10-29_14-30-29,02390 159 | 2018-10-08_08-10-40,03360 160 | 2018-10-29_14-30-29,02300 161 | 2018-10-08_08-10-40,02610 162 | 2018-10-08_08-10-40,03800 163 | 2018-10-29_14-30-29,00720 164 | 2018-10-08_08-18-59,00320 165 | 2018-10-29_14-30-29,02090 166 | 2018-10-29_15-15-15,01190 167 | 2018-10-29_14-30-29,00670 168 | 2018-10-29_14-30-29,02000 169 | 2018-10-29_14-30-29,02430 170 | 2018-10-29_14-30-29,00940 171 | 2018-10-29_16-12-02,01360 172 | 2018-10-29_14-30-29,02530 173 | 2018-10-29_14-30-29,01750 174 | 2018-10-29_14-30-29,00530 175 | 2018-10-29_14-30-29,01450 176 | 2018-10-08_08-10-40,04900 177 | 2018-10-29_14-30-29,00790 178 | 2018-10-29_14-30-29,00540 179 | 2018-10-08_08-18-59,00100 180 | 2018-10-29_14-30-29,01330 181 | 2018-10-29_14-30-29,01620 182 | 2018-10-29_14-30-29,02500 183 | 2018-10-08_08-10-40,03710 184 | 2018-10-08_08-18-59,00310 185 | 2018-10-29_14-30-29,01800 186 | 2018-10-29_14-30-29,02080 187 | 2018-10-29_14-30-29,00640 188 | 2018-10-29_15-37-43,02200 189 | 2018-10-08_08-10-40,02300 190 | 2018-10-29_15-15-15,01910 191 | 2018-10-08_08-10-40,04400 192 | 2018-10-29_16-12-02,01290 193 | 2018-10-08_08-10-40,02530 194 | 2018-10-08_08-18-59,03300 195 | 2018-10-08_08-10-40,03700 196 | 2018-10-08_08-10-40,02620 -------------------------------------------------------------------------------- /src/splits/stf/test_dense_fog_night.txt: -------------------------------------------------------------------------------- 1 | 2018-02-07_18-39-52,00300 2 | 2018-12-12_15-21-22,00300 3 | 2018-12-12_15-21-22,02700 4 | 2018-12-12_15-21-22,00900 5 | 2018-12-12_15-32-51,01000 6 | 2018-12-14_15-36-31,02800 7 | 2018-02-07_18-20-02,00320 8 | 2018-02-07_18-06-31,00150 9 | 2018-12-12_15-32-51,01300 10 | 2019-01-09_08-07-00,00800 11 | 2018-02-07_18-39-52,00380 12 | 2018-12-12_15-30-45,00200 13 | 2018-12-12_15-30-45,00100 14 | 2018-02-07_18-39-52,00200 15 | 2018-12-12_15-21-22,01800 16 | 2018-12-12_15-21-22,01600 17 | 2018-12-12_15-30-45,00000 18 | 2018-12-12_15-30-45,00400 19 | 2018-02-07_18-39-52,00220 20 | 2018-12-14_15-36-31,00900 21 | 2018-12-12_15-21-22,01400 22 | 2018-02-07_18-20-02,00310 23 | 2018-12-12_15-30-45,00300 24 | 2018-12-12_15-21-22,01500 25 | 2018-02-07_18-25-17,00100 26 | 2018-02-07_18-39-52,00390 27 | 2018-02-07_18-06-31,00120 28 | 2018-02-07_18-39-52,00280 29 | 2018-12-14_15-36-31,00600 30 | 2018-02-07_17-56-35,00130 31 | 2018-12-12_15-21-22,01200 32 | 2018-12-14_15-36-31,03300 33 | 2018-02-07_18-25-17,00060 34 | 2018-12-12_15-32-51,00700 35 | 2018-12-14_15-36-31,00300 36 | 2018-12-12_15-21-22,02100 37 | 2018-12-12_15-32-51,00900 38 | 2018-12-12_15-21-22,00000 39 | 2018-02-07_18-39-52,00370 40 | 2018-12-14_15-36-31,02900 41 | 2018-02-07_18-39-52,00250 42 | 2018-02-07_18-39-52,00320 43 | 2018-12-12_15-32-51,00400 44 | 2018-12-12_15-30-45,00800 45 | 2018-12-12_15-21-22,00400 46 | 2018-02-07_18-20-02,00000 47 | 2018-12-12_15-32-51,01200 48 | 2018-12-12_15-21-22,01100 49 | 2018-12-14_15-36-31,01100 50 | 2018-12-12_15-30-45,00700 51 | 2018-02-07_18-20-02,00300 52 | 2018-12-12_15-21-22,03300 53 | 2018-02-07_18-25-17,00170 54 | 2018-02-07_17-56-35,00120 55 | 2018-12-12_15-32-51,00300 56 | 2018-12-12_15-21-22,01000 57 | 2018-12-14_15-36-31,03100 58 | 2018-12-12_15-21-22,02500 59 | 2018-12-14_15-36-31,00400 60 | 2018-12-12_15-32-51,00000 61 | 2018-12-12_15-21-22,02400 62 | 2018-02-07_18-06-31,00130 63 | 2018-12-12_15-21-22,03400 64 | 2018-12-12_15-21-22,02800 65 | 2018-12-12_15-21-22,02200 66 | 2019-01-09_08-09-20,04600 67 | 2018-02-07_18-39-52,00340 68 | 2018-12-14_15-36-31,03400 69 | 2018-02-07_18-39-52,00310 70 | 2018-02-07_18-20-02,00010 71 | 2018-12-12_15-21-22,00200 72 | 2018-12-12_15-21-22,00800 73 | 2018-12-12_15-32-51,01100 74 | 2018-12-12_15-21-22,00500 75 | 2018-12-12_15-30-45,00600 76 | 2018-12-14_15-36-31,01000 77 | 2018-12-12_15-21-22,03200 78 | 2018-12-12_15-30-45,00500 79 | 2018-12-12_15-21-22,01700 80 | 2018-12-14_15-36-31,00100 81 | 2018-12-12_15-21-22,01900 82 | 2018-02-07_17-56-35,00110 83 | 2018-12-14_15-36-31,00800 84 | 2018-12-12_15-21-22,02600 85 | 2018-12-12_15-21-22,01300 86 | 2018-12-14_15-13-08,00000 87 | 2018-12-12_15-21-22,00100 88 | 2018-12-14_15-36-31,00500 89 | 2018-02-07_18-39-52,00270 90 | 2018-12-12_15-21-22,02900 91 | 2018-02-07_18-39-52,00350 92 | 2018-12-12_15-32-51,00100 93 | 2018-02-07_18-39-52,00290 94 | 2018-12-14_15-36-31,03000 95 | 2018-12-12_15-21-22,00600 96 | 2018-12-12_15-21-22,02300 97 | 2018-12-14_15-36-31,00200 98 | 2018-02-07_18-20-02,00020 99 | 2018-12-12_15-32-51,00600 100 | 2018-12-12_15-21-22,02000 101 | 2018-12-12_15-32-51,00800 102 | 2018-02-07_17-56-35,00010 103 | 2018-02-07_17-56-35,00160 104 | 2018-02-07_17-56-35,00050 105 | 2018-02-07_18-06-31,00210 -------------------------------------------------------------------------------- /src/splits/stf/test_light_fog_day.txt: -------------------------------------------------------------------------------- 1 | 2018-12-12_09-22-59,06600 2 | 2018-12-12_09-22-59,03400 3 | 2018-12-12_11-27-36,00000 4 | 2018-10-08_08-27-03,03600 5 | 2018-10-29_15-46-53,01200 6 | 2018-12-16_12-31-35,00100 7 | 2018-10-08_08-27-03,05900 8 | 2018-10-29_15-46-53,00500 9 | 2018-12-12_09-22-59,03700 10 | 2018-10-29_16-12-02,00250 11 | 2019-01-09_08-23-49,00200 12 | 2018-10-29_14-35-02,01080 13 | 2018-10-29_15-46-53,00200 14 | 2018-10-29_16-12-02,01000 15 | 2019-01-08_11-12-54,01000 16 | 2019-01-09_08-23-49,00700 17 | 2018-10-29_15-46-53,00800 18 | 2018-10-08_08-27-03,05300 19 | 2018-12-12_09-22-59,04600 20 | 2019-01-09_08-23-49,00900 21 | 2018-12-12_09-22-59,06300 22 | 2018-10-08_08-27-03,01100 23 | 2018-10-08_08-27-03,01250 24 | 2019-01-09_08-27-29,00500 25 | 2018-10-29_16-12-02,01150 26 | 2018-12-12_09-22-59,06000 27 | 2018-12-12_10-21-21,00100 28 | 2019-01-09_09-28-07,00300 29 | 2018-12-12_13-43-01,00100 30 | 2018-12-12_09-22-59,04300 31 | 2018-03-15_09-39-45,00100 32 | 2018-12-16_12-05-56,00700 33 | 2018-10-08_08-27-03,04200 34 | 2019-01-09_09-28-07,00200 35 | 2018-10-08_08-27-03,05200 36 | 2018-12-12_09-22-59,04900 37 | 2018-10-29_16-00-52,05400 38 | 2018-10-29_15-46-53,01000 39 | 2018-03-15_09-39-45,00200 40 | 2018-12-16_12-05-56,00000 41 | 2019-01-09_08-23-49,00400 42 | 2018-12-12_09-22-59,05300 43 | 2018-10-08_08-27-03,05110 44 | 2018-10-29_16-12-02,00450 45 | 2018-10-08_08-27-03,03840 46 | 2019-01-09_08-23-49,01200 47 | 2018-10-08_08-27-03,01700 48 | 2018-03-15_09-28-05,00200 49 | 2018-10-29_16-12-02,00350 50 | 2018-10-08_08-27-03,00000 51 | 2018-10-29_16-12-02,00950 52 | 2018-10-08_08-27-03,01380 53 | 2019-01-09_08-23-49,00000 54 | 2018-10-29_16-12-02,01050 55 | 2018-12-12_09-22-59,05800 56 | 2019-01-09_08-27-29,00300 57 | 2018-12-12_09-22-59,05600 58 | 2018-03-15_09-28-05,00100 59 | 2019-01-08_11-12-54,00700 60 | 2018-10-08_08-27-03,02100 61 | 2018-10-08_08-27-03,04100 62 | 2018-10-08_08-27-03,04000 63 | 2018-10-29_15-46-53,01400 64 | 2019-01-09_09-26-35,00100 65 | 2018-12-12_09-22-59,05500 66 | 2018-10-08_08-27-03,03400 67 | 2018-12-12_09-22-59,03300 68 | 2018-10-29_15-46-53,01100 69 | 2018-10-29_16-12-02,00300 70 | 2018-12-12_11-27-36,00900 71 | 2018-10-08_08-27-03,00850 72 | 2018-12-16_12-31-35,00200 73 | 2018-10-08_08-27-03,05800 74 | 2018-12-12_09-22-59,05200 75 | 2018-03-15_09-40-42,00000 76 | 2018-10-08_08-27-03,01850 77 | 2018-10-29_15-46-53,00400 78 | 2019-01-09_08-23-49,00300 79 | 2018-10-08_08-27-03,01770 80 | 2018-12-12_09-22-59,03600 81 | 2018-10-29_16-12-02,00900 82 | 2018-10-29_14-35-02,00850 83 | 2018-12-12_11-27-36,01200 84 | 2018-12-12_09-55-58,00000 85 | 2018-10-29_15-46-53,01300 86 | 2018-10-08_08-27-03,00750 87 | 2018-10-08_08-27-03,02250 88 | 2018-12-12_09-59-21,00100 89 | 2018-10-08_08-27-03,05400 90 | 2018-10-08_08-27-03,01000 91 | 2018-10-08_08-27-03,01400 92 | 2018-12-12_09-59-21,00000 93 | 2018-10-08_08-27-03,01150 94 | 2018-12-12_09-22-59,06100 95 | 2018-10-29_15-46-53,00000 96 | 2018-10-29_16-12-02,00600 97 | 2018-12-12_09-22-59,03900 98 | 2018-12-12_09-22-59,04000 99 | 2018-12-16_12-05-56,00100 100 | 2019-01-09_08-23-49,00500 101 | 2018-10-29_16-12-02,00500 102 | 2018-10-08_08-27-03,01950 103 | 2018-12-12_10-21-21,00200 104 | 2018-12-21_11-24-17,00000 105 | 2018-10-29_16-12-02,00550 106 | 2018-10-08_08-27-03,05100 107 | 2018-10-08_08-27-03,00150 108 | 2018-10-29_15-46-53,00700 109 | 2018-10-08_08-27-03,00800 110 | 2018-12-12_11-27-36,00300 111 | 2018-10-08_08-27-03,06000 112 | 2018-02-12_08-56-38,00080 113 | 2018-10-29_15-46-53,00900 114 | 2018-10-08_08-27-03,01780 115 | 2018-10-29_16-12-02,00800 116 | 2018-12-12_09-22-59,06400 117 | 2019-01-09_08-23-49,01100 118 | 2018-10-08_08-27-03,01600 119 | 2018-03-15_09-39-45,00300 120 | 2019-01-08_11-12-54,00300 121 | 2018-12-12_09-55-58,00100 122 | 2018-10-08_08-27-03,03500 123 | 2018-02-04_12-42-39,00000 124 | 2018-10-08_08-27-03,02200 125 | 2019-01-08_11-12-54,00400 126 | 2018-12-12_09-22-59,05100 127 | 2018-03-15_09-39-45,00400 128 | 2018-02-12_08-56-38,00090 129 | 2019-01-08_11-12-54,00600 130 | 2019-01-09_09-26-35,00000 131 | 2018-10-29_15-46-53,01500 132 | 2019-01-09_08-27-29,00000 133 | 2018-10-29_16-12-02,00150 134 | 2018-12-12_09-22-59,05400 135 | 2018-10-08_08-27-03,05130 136 | 2018-10-08_08-27-03,01900 137 | 2018-10-08_08-27-03,01760 138 | 2018-10-08_08-27-03,05500 139 | 2018-12-16_12-31-35,00000 140 | 2018-10-29_15-46-53,01600 141 | 2018-12-12_09-22-59,03200 142 | 2018-02-12_08-56-38,00060 143 | 2018-10-08_08-27-03,05700 144 | 2018-10-29_16-12-02,00850 145 | 2018-12-12_11-27-36,01000 146 | 2018-12-12_09-22-59,03800 147 | 2018-12-12_09-22-59,03500 148 | 2018-12-12_09-22-59,05000 149 | 2018-12-16_12-05-56,00800 150 | 2018-10-29_16-12-02,00750 151 | 2018-12-12_09-22-59,06500 152 | 2019-01-08_11-12-54,00500 153 | 2018-02-04_12-43-00,00000 154 | 2018-12-12_11-27-36,00400 155 | 2018-12-16_12-05-56,00500 156 | 2019-01-09_08-23-49,00100 157 | 2018-10-29_15-46-53,00300 158 | 2018-10-29_16-12-02,00100 159 | 2018-10-08_08-27-03,00700 160 | 2018-10-08_08-27-03,05600 161 | 2019-01-09_08-23-49,01400 162 | 2018-10-08_08-27-03,00050 163 | 2019-01-09_08-23-49,00600 164 | 2019-01-09_08-23-49,00800 165 | 2018-12-12_09-22-59,06200 166 | 2018-02-12_08-56-38,00130 167 | 2018-10-29_14-35-02,01070 168 | 2018-10-08_08-27-03,03200 169 | 2019-01-09_08-27-29,00400 170 | 2018-12-12_09-22-59,02300 171 | 2018-12-12_10-21-21,00000 172 | 2019-01-09_09-28-07,00000 173 | 2019-01-09_08-23-49,01000 174 | 2018-10-08_08-27-03,01050 175 | 2019-01-09_09-28-07,00100 176 | 2018-12-12_09-22-59,06700 177 | 2018-12-12_09-22-59,04200 178 | 2018-10-08_08-27-03,00900 179 | 2018-03-15_09-39-45,00000 180 | 2019-01-09_08-23-49,01300 181 | 2019-01-08_11-12-54,00200 182 | 2019-01-09_14-54-03,01000 183 | 2018-10-29_15-46-53,00100 184 | 2018-03-15_09-30-35,00000 185 | 2018-12-12_09-22-59,04500 186 | 2018-10-29_15-46-53,00600 187 | 2018-10-08_08-27-03,02700 188 | 2018-10-08_08-27-03,03300 189 | 2018-10-08_08-27-03,05120 190 | 2018-12-12_11-27-36,01100 191 | 2019-01-09_08-27-29,00100 192 | 2018-10-08_08-27-03,01750 193 | 2018-10-08_08-27-03,01800 194 | 2018-12-12_09-22-59,05900 195 | 2018-10-08_08-27-03,01200 196 | 2019-01-09_08-27-29,00200 197 | 2018-10-29_16-12-02,00200 198 | 2018-10-08_08-27-03,02000 199 | 2018-10-29_16-12-02,01100 200 | 2018-03-15_09-30-35,00150 201 | 2018-02-04_12-47-32,00000 202 | 2018-02-04_12-47-32,00500 203 | 2018-03-15_10-18-07,00200 204 | 2018-12-19_10-03-38,00300 -------------------------------------------------------------------------------- /src/splits/stf/test_light_fog_night.txt: -------------------------------------------------------------------------------- 1 | 2018-10-29_16-32-57,02900 2 | 2018-02-07_18-33-04,00000 3 | 2018-12-20_15-23-28,00500 4 | 2018-02-07_18-36-39,00300 5 | 2018-10-29_16-32-57,00000 6 | 2018-12-20_15-04-24,00600 7 | 2018-12-17_09-12-03,00500 8 | 2018-02-07_18-21-20,00110 9 | 2018-12-20_15-04-24,00300 10 | 2018-12-17_09-12-03,01100 11 | 2018-02-07_18-21-20,00010 12 | 2018-12-20_16-04-42,00500 13 | 2018-02-07_18-21-20,00060 14 | 2018-12-20_14-41-07,00100 15 | 2018-10-29_16-32-57,00200 16 | 2018-12-20_15-39-16,00200 17 | 2018-12-20_17-08-31,00300 18 | 2018-12-20_15-55-01,00200 19 | 2018-12-20_15-16-30,00800 20 | 2018-02-07_18-30-24,00200 21 | 2018-12-20_15-04-24,02700 22 | 2018-10-29_16-12-02,01270 23 | 2018-12-20_15-04-24,01600 24 | 2018-12-20_15-04-24,02900 25 | 2018-12-20_16-04-42,00300 26 | 2018-12-20_14-41-07,01700 27 | 2018-10-29_16-32-57,00100 28 | 2018-12-20_15-46-51,00700 29 | 2018-02-07_18-21-20,00050 30 | 2018-12-20_15-46-51,00000 31 | 2018-12-17_09-12-03,01000 32 | 2018-12-20_15-55-01,00100 33 | 2018-02-07_18-21-20,00070 34 | 2018-12-20_15-39-16,00600 35 | 2018-12-20_15-16-30,00200 36 | 2018-12-20_15-39-16,00800 37 | 2018-12-17_21-05-50,00000 38 | 2018-12-20_15-23-28,00400 39 | 2018-12-20_16-04-42,00700 40 | 2018-10-29_16-32-57,02600 41 | 2018-12-17_09-12-03,00600 42 | 2018-02-07_18-33-04,00300 43 | 2018-10-29_16-32-57,02800 44 | 2018-02-07_18-21-20,00270 45 | 2018-10-29_16-32-57,00900 46 | 2018-12-20_15-04-24,00400 47 | 2018-10-29_16-32-57,00300 48 | 2018-12-20_14-41-07,00000 49 | 2018-12-17_09-12-03,01600 50 | 2018-02-07_18-21-20,00000 51 | 2018-12-20_16-04-42,00400 52 | 2018-12-17_09-12-03,00700 53 | 2018-12-20_15-11-27,01600 54 | 2018-12-20_16-04-42,00000 55 | 2018-12-20_16-04-42,00200 56 | 2018-12-20_15-04-24,01500 57 | 2018-10-29_16-32-57,02100 58 | 2018-12-20_15-04-24,01000 59 | 2018-12-20_17-08-31,00100 60 | 2018-12-20_15-04-24,00900 61 | 2018-02-07_18-21-20,00040 62 | 2018-12-20_15-46-51,00100 63 | 2018-12-20_15-46-51,00400 64 | 2018-02-07_18-21-20,00290 65 | 2018-12-20_15-39-16,00500 66 | 2018-12-20_15-16-30,00300 67 | 2018-12-17_09-12-03,01800 68 | 2018-12-20_15-46-51,00600 69 | 2018-10-29_16-32-57,00500 70 | 2018-10-29_16-32-57,01000 71 | 2019-01-09_08-09-20,04300 72 | 2018-02-07_18-36-39,00100 73 | 2018-12-14_15-27-11,01500 74 | 2018-12-20_15-55-01,00000 75 | 2018-12-17_21-05-50,00300 76 | 2018-12-17_09-12-03,00900 77 | 2018-02-07_18-36-39,00200 78 | 2018-02-07_18-21-20,00260 79 | 2018-10-29_16-32-57,02700 80 | 2018-12-20_15-04-24,03000 81 | 2018-02-07_18-33-04,00200 82 | 2018-12-21_16-19-18,00100 83 | 2018-02-07_18-33-04,00100 84 | 2018-12-20_16-00-05,00100 85 | 2018-02-07_18-21-20,00030 86 | 2018-12-20_15-39-16,00300 87 | 2018-12-20_15-04-24,00500 88 | 2018-02-07_18-33-04,00400 89 | 2018-12-20_15-04-24,02500 90 | 2018-12-20_16-04-42,00600 91 | 2018-12-20_16-04-42,00100 92 | 2018-10-29_16-32-57,02000 93 | 2018-02-07_18-21-20,00280 94 | 2018-02-07_18-30-24,00000 95 | 2018-12-20_15-46-51,00200 96 | 2018-12-20_15-39-16,00900 97 | 2018-12-20_15-04-24,02800 98 | 2018-12-20_15-16-30,00700 99 | 2018-12-20_16-04-42,00800 100 | 2018-12-20_15-18-12,00200 101 | 2018-12-20_14-19-53,00200 102 | 2018-12-20_15-04-24,02600 103 | 2018-12-20_15-46-51,00500 104 | 2018-12-20_15-18-12,00100 105 | 2018-12-17_21-05-50,00200 106 | 2018-12-20_15-16-30,00400 107 | 2018-10-29_16-32-57,01600 108 | 2018-02-07_18-21-20,00100 109 | 2018-02-07_18-21-20,00250 110 | 2018-12-20_15-46-51,00800 111 | 2018-02-07_18-30-24,00100 112 | 2019-01-09_08-09-20,04200 113 | 2018-12-17_09-12-03,00800 114 | 2018-12-17_09-12-03,01400 115 | 2018-12-20_15-26-45,00000 116 | 2018-12-20_18-03-59,01600 117 | 2018-02-07_18-21-20,00020 118 | 2018-12-20_15-39-16,00700 119 | 2018-12-20_14-41-07,00400 120 | 2018-12-20_14-41-07,00300 121 | 2018-12-16_14-58-58,04000 122 | 2018-02-07_18-06-31,00320 123 | 2018-02-07_18-06-31,00300 124 | 2018-02-07_18-06-31,00100 125 | 2018-02-07_18-06-31,00450 126 | 2018-12-19_16-36-54,04200 -------------------------------------------------------------------------------- /src/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/princeton-computational-imaging/Gated2Gated/5056123c9ea88402dc5c9c9abbb195ab91ba8f6c/src/test.py -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | 9 | from trainer import Trainer 10 | from options import GatedOptions 11 | 12 | options = GatedOptions() 13 | opts = options.parse() 14 | 15 | 16 | if __name__ == "__main__": 17 | trainer = Trainer(opts) 18 | trainer.train() -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright Niantic 2019. Patent Pending. All rights reserved. 2 | # 3 | # This software is licensed under the terms of the Monodepth2 licence 4 | # which allows for non-commercial use only, the full terms of which are made 5 | # available in the LICENSE file. 6 | 7 | from __future__ import absolute_import, division, print_function 8 | import os 9 | import hashlib 10 | import zipfile 11 | from six.moves import urllib 12 | import torch 13 | import cv2 14 | import io 15 | import matplotlib.pyplot as plt 16 | import numpy as np 17 | from matplotlib.lines import Line2D 18 | 19 | 20 | def readlines(filename): 21 | """Read all the lines in a text file and return as a list 22 | """ 23 | with open(filename, 'r') as f: 24 | lines = f.read().splitlines() 25 | return lines 26 | 27 | 28 | def normalize_image(x): 29 | """Rescale image pixels to span range [0, 1] 30 | """ 31 | ma = float(x.max().cpu().data) 32 | mi = float(x.min().cpu().data) 33 | d = ma - mi if ma != mi else 1e5 34 | return (x - mi) / d 35 | 36 | 37 | def sec_to_hm(t): 38 | """Convert time in seconds to time in hours, minutes and seconds 39 | e.g. 10239 -> (2, 50, 39) 40 | """ 41 | t = int(t) 42 | s = t % 60 43 | t //= 60 44 | m = t % 60 45 | t //= 60 46 | return t, m, s 47 | 48 | 49 | def sec_to_hm_str(t): 50 | """Convert time in seconds to a nice string 51 | e.g. 10239 -> '02h50m39s' 52 | """ 53 | h, m, s = sec_to_hm(t) 54 | return "{:02d}h{:02d}m{:02d}s".format(h, m, s) 55 | 56 | 57 | def download_model_if_doesnt_exist(model_name): 58 | """If pretrained kitti model doesn't exist, download and unzip it 59 | """ 60 | # values are tuples of (, ) 61 | download_paths = { 62 | "mono_640x192": 63 | ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono_640x192.zip", 64 | "a964b8356e08a02d009609d9e3928f7c"), 65 | "stereo_640x192": 66 | ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/stereo_640x192.zip", 67 | "3dfb76bcff0786e4ec07ac00f658dd07"), 68 | "mono+stereo_640x192": 69 | ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono%2Bstereo_640x192.zip", 70 | "c024d69012485ed05d7eaa9617a96b81"), 71 | "mono_no_pt_640x192": 72 | ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono_no_pt_640x192.zip", 73 | "9c2f071e35027c895a4728358ffc913a"), 74 | "stereo_no_pt_640x192": 75 | ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/stereo_no_pt_640x192.zip", 76 | "41ec2de112905f85541ac33a854742d1"), 77 | "mono+stereo_no_pt_640x192": 78 | ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono%2Bstereo_no_pt_640x192.zip", 79 | "46c3b824f541d143a45c37df65fbab0a"), 80 | "mono_1024x320": 81 | ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono_1024x320.zip", 82 | "0ab0766efdfeea89a0d9ea8ba90e1e63"), 83 | "stereo_1024x320": 84 | ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/stereo_1024x320.zip", 85 | "afc2f2126d70cf3fdf26b550898b501a"), 86 | "mono+stereo_1024x320": 87 | ("https://storage.googleapis.com/niantic-lon-static/research/monodepth2/mono%2Bstereo_1024x320.zip", 88 | "cdc5fc9b23513c07d5b19235d9ef08f7"), 89 | } 90 | 91 | if not os.path.exists("models"): 92 | os.makedirs("models") 93 | 94 | model_path = os.path.join("models", model_name) 95 | 96 | def check_file_matches_md5(checksum, fpath): 97 | if not os.path.exists(fpath): 98 | return False 99 | with open(fpath, 'rb') as f: 100 | current_md5checksum = hashlib.md5(f.read()).hexdigest() 101 | return current_md5checksum == checksum 102 | 103 | # see if we have the model already downloaded... 104 | if not os.path.exists(os.path.join(model_path, "encoder.pth")): 105 | 106 | model_url, required_md5checksum = download_paths[model_name] 107 | 108 | if not check_file_matches_md5(required_md5checksum, model_path + ".zip"): 109 | print("-> Downloading pretrained model to {}".format(model_path + ".zip")) 110 | urllib.request.urlretrieve(model_url, model_path + ".zip") 111 | 112 | if not check_file_matches_md5(required_md5checksum, model_path + ".zip"): 113 | print(" Failed to download a file which matches the checksum - quitting") 114 | quit() 115 | 116 | print(" Unzipping model...") 117 | with zipfile.ZipFile(model_path + ".zip", 'r') as f: 118 | f.extractall(model_path) 119 | 120 | print(" Model unzipped to {}".format(model_path)) 121 | 122 | def fig2img(fig_buffer): 123 | buf = io.BytesIO() 124 | plt.axis('off') 125 | plt.savefig(buf, format="png",transparent = True, bbox_inches = 'tight', pad_inches = 0,dpi=100, facecolor=(0, 0, 0)) 126 | buf.seek(0) 127 | img_arr = np.frombuffer(buf.getvalue(), dtype=np.uint8) 128 | buf.close() 129 | img = cv2.imdecode(img_arr, 1) 130 | return img 131 | 132 | def disp_to_mpimg(disp,colormap='jet_r'): 133 | fig = plt.figure(figsize=(30,20),dpi=100) 134 | plt.subplots_adjust(wspace=0.00,hspace=0.00) 135 | 136 | fig.add_subplot(111) 137 | plt.imshow(disp,cmap=colormap) 138 | plt.axis('off') 139 | 140 | img = fig2img(fig) 141 | plt.close() 142 | # im_pil = Image.fromarray(img) 143 | # cv2.imwrite("debug.png",img) 144 | return img 145 | 146 | def snr_binary_mask(gated_img, min_intns = 0.04, max_intns = 0.98): 147 | """[snr_mask_binary calculates a binary mask based on the SNR and the maximum intensity of the input gated image] 148 | 149 | Args: 150 | gated_img ([torch.Tensor]): [gated image of dimension B x 3 x H x W] 151 | Returns: 152 | [torch.Tensor]: [Mask with dimension B x 1 x H x W] 153 | """ 154 | max_intensity,_ = torch.max(gated_img, dim=1, keepdims=True) 155 | min_intensity,_ = torch.min(gated_img, dim=1, keepdims=True) 156 | snr = max_intensity - min_intensity 157 | snr_binary_mask = torch.logical_and(snr > min_intns, max_intensity < max_intns).float() 158 | return snr_binary_mask 159 | 160 | def intensity_mask(gated_img, depth): 161 | """[intensity_mask calculates a mask based on the intensities of the input gated image and the utilized range intensity profiles and the depth of the flat world] 162 | Args: 163 | gated_img ([torch.Tensor]): [gated image of dimension B x 3 x H x W] 164 | Returns: 165 | [torch.Tensor]: [Mask with dimension B x 1 x H x W] 166 | """ 167 | max_intensity,_ = torch.max(gated_img, dim=1, keepdims=True) 168 | mask1 = max_intensity == gated_img[:,0:1,:,:] 169 | mask2 = torch.logical_and(max_intensity == gated_img[:,1:2,:,:], depth > 30. * torch.normal(1., 0.1, size=(depth.size())).to(device=depth.device)) 170 | mask3 = torch.logical_and(max_intensity == gated_img[:,2:3,:,:], depth > 73. * torch.normal(1., 0.1, size=(depth.size())).to(device=depth.device)) 171 | intensity_mask = mask1 + mask2 + mask3 172 | intensity_mask = (intensity_mask > 0.0).float() 173 | return intensity_mask 174 | 175 | def plot_grad_flow(named_parameters): 176 | '''Plots the gradients flowing through different layers in the net during training. 177 | Can be used for checking for possible gradient vanishing / exploding problems. 178 | 179 | Usage: Plug this function in Trainer class after loss.backwards() as 180 | "plot_grad_flow(self.model.named_parameters())" to visualize the gradient flow''' 181 | ave_grads = [] 182 | max_grads= [] 183 | layers = [] 184 | for n, p in named_parameters: 185 | if(p.requires_grad) and ("bias" not in n) and (p.grad is not None): 186 | layers.append(n) 187 | ave_grads.append(p.grad.abs().detach().cpu().numpy().mean()) 188 | max_grads.append(p.grad.abs().detach().cpu().numpy().max()) 189 | 190 | fig = plt.figure(figsize=(30,20),dpi=100) 191 | plt.subplots_adjust(wspace=0.00,hspace=0.00) 192 | 193 | fig.add_subplot(111) 194 | plt.bar(np.arange(len(max_grads)), max_grads, alpha=0.1, lw=1, color="c") 195 | plt.bar(np.arange(len(max_grads)), ave_grads, alpha=0.1, lw=1, color="b") 196 | plt.hlines(0, 0, len(ave_grads)+1, lw=2, color="k" ) 197 | plt.xticks(range(0,len(ave_grads), 1), layers, rotation="vertical") 198 | plt.xlim(left=0, right=len(ave_grads)) 199 | plt.ylim(bottom = -0.001, top=0.02) # zoom in on the lower gradient regions 200 | plt.xlabel("Layers") 201 | plt.ylabel("average gradient") 202 | plt.title("Gradient flow") 203 | plt.grid(True) 204 | plt.legend([Line2D([0], [0], color="c", lw=4), 205 | Line2D([0], [0], color="b", lw=4), 206 | Line2D([0], [0], color="k", lw=4)], ['max-gradient', 'mean-gradient', 'zero-gradient']) 207 | # plt.savefig('grad.png') 208 | buf = io.BytesIO() 209 | plt.savefig(buf, format="png", bbox_inches = 'tight', pad_inches = 0) 210 | buf.seek(0) 211 | img_arr = np.frombuffer(buf.getvalue(), dtype=np.uint8) 212 | buf.close() 213 | img = cv2.imdecode(img_arr, 1) 214 | # cv2.imwrite('grad_img.png',img) 215 | plt.close() 216 | return img 217 | 218 | def depth_image(depth, min_depth=0.1, max_depth = 100.0, colormap='jet_r'): 219 | fig = plt.figure(figsize=(20,10),dpi=100) 220 | plt.subplots_adjust(wspace=0.00,hspace=0.00) 221 | 222 | depth = np.clip(depth, min_depth, max_depth) 223 | depth[0,0] = min_depth 224 | depth[-1,-1] = max_depth 225 | 226 | 227 | 228 | fig.add_subplot(111) 229 | plt.imshow(depth,cmap=colormap) 230 | plt.axis('off') 231 | plt.colorbar(aspect=80,orientation='horizontal',pad=0.01) 232 | 233 | img = fig2img(fig) 234 | plt.close() 235 | return img -------------------------------------------------------------------------------- /src/visualize2D.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | import matplotlib.cm as cm 3 | import numpy as np 4 | import cv2 5 | 6 | 7 | def colorize_pointcloud(depth, min_distance=3, max_distance=80, radius=3, cmap = cm.jet): 8 | norm = mpl.colors.Normalize(vmin=min_distance, vmax=max_distance) 9 | 10 | m = cm.ScalarMappable(norm=norm, cmap=cmap) 11 | pos = np.argwhere(depth > 0) 12 | 13 | pointcloud_color = np.zeros((depth.shape[0], depth.shape[1], 3), dtype=np.uint8) 14 | for i in range(pos.shape[0]): 15 | color = tuple([int(255 * value) for value in m.to_rgba(depth[pos[i, 0], pos[i, 1]])[0:3]]) 16 | cv2.circle(pointcloud_color, (pos[i, 1], pos[i, 0]), radius, (color[0], color[1], color[2]), -1) 17 | 18 | return pointcloud_color 19 | 20 | 21 | def colorize_depth(depth, min_distance=3, max_distance=80,cmap = cm.jet): 22 | norm = mpl.colors.Normalize(vmin=min_distance, vmax=max_distance) 23 | 24 | m = cm.ScalarMappable(norm=norm, cmap=cmap) 25 | 26 | depth_color = (255 * m.to_rgba(depth)[:, :, 0:3]).astype(np.uint8) 27 | depth_color[depth <= 0] = [0, 0, 0] 28 | depth_color[np.isnan(depth)] = [0, 0, 0] 29 | depth_color[depth == np.inf] = [0, 0, 0] 30 | 31 | return depth_color --------------------------------------------------------------------------------