├── .gitignore ├── LICENSE ├── README.md ├── bilateral_filter.py ├── depthstillation.py ├── external └── forward_warping │ ├── compile.sh │ └── warping.c ├── flow_colors.py ├── geometry.py ├── requirements.txt └── samples ├── d0.png ├── dCOCO_file_list.txt ├── im0.jpg └── s0.png /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 mattpoggi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Depthstillation 2 | 3 | Demo code for "Learning optical flow from still images", CVPR 2021. 4 | 5 | [[Project page]](https://mattpoggi.github.io/projects/cvpr2021aleotti/) - [[Paper]](https://mattpoggi.github.io/assets/papers/aleotti2021cvpr.pdf) - [[Supplementary]](https://mattpoggi.github.io/assets/papers/aleotti2021cvpr_supp.pdf) 6 | 7 | This code is provided to replicate the qualitative results shown in the supplementary material, Sections 2-4. 8 | The code has been tested using Ubuntu 20.04 LTS, python 3.8 and gcc 9.3.0 9 | 10 | ![Alt text](https://mattpoggi.github.io/assets/img/depthstillation/depthstillation.png?raw=true "Depthstillation pipeline") 11 | 12 | ## Reference 13 | 14 | If you find this code useful, please cite our work: 15 | ```shell 16 | @inproceedings{Aleotti_CVPR_2021, 17 | title = {Learning optical flow from still images}, 18 | author = {Aleotti, Filippo and 19 | Poggi, Matteo and 20 | Mattoccia, Stefano}, 21 | booktitle = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 22 | year = {2021} 23 | } 24 | ``` 25 | 26 | ## Contents 27 | 28 | 1. [Introduction](#introduction) 29 | 2. [Usage](#usage) 30 | 3. [Supplementary](#supplementary) 31 | 4. [Weights](#weights) 32 | 5. [Contacts](#contacts) 33 | 6. [Acknowledgments](#acknowledgments) 34 | 35 | ## Introduction 36 | 37 | This paper deals with the scarcity of data for training optical flow networks, highlighting the limitations of existing sources such as labeled synthetic datasets or unlabeled real videos. Specifically, we introduce a framework to generate accurate ground-truth optical flow annotations quickly and in large amounts from any readily available single real picture. Given an image, we use an off-the-shelf monocular depth estimation network to build a plausible point cloud for the observed scene. Then, we virtually move the camera in the reconstructed environment with known motion vectors and rotation angles, allowing us to synthesize both a novel view and the corresponding optical flow field connecting each pixel in the input image to the one in the new frame. 38 | When trained with our data, state-of-the-art optical flow networks achieve superior generalization to unseen real data compared to the same models trained either on annotated synthetic datasets or unlabeled videos, and better specialization if combined with synthetic images. 39 | 40 | ## Usage 41 | 42 | Install the project requirements in a new python 3 environment: 43 | 44 | ``` 45 | virtualenv -p python3 learning_flow_env 46 | source learning_flow_env/bin/activate 47 | pip install -r requirements.txt 48 | ``` 49 | 50 | Compile the `forward_warping` module, written in C (required to handle warping collisions): 51 | 52 | ``` 53 | cd external/forward_warping 54 | bash compile.sh 55 | cd ../.. 56 | ``` 57 | 58 | You are now ready to run the `depthstillation.py` script: 59 | 60 | ``` 61 | python depthstillation.py 62 | ``` 63 | 64 | By switching some parameters you can generate all the qualitatives provided in the supplementary material. 65 | 66 | These parameters are: 67 | * `num_motions`: changes the number of virtual motions 68 | * `segment`: enables instance segmentation (for independently moving objects) 69 | * `mask_type`: mask selection. Options are `H'` and `H` 70 | * `num_objects`: sets the number of independently moving objects (one, in this example) 71 | * `no_depth`: disables monocular depth and force depth to assume a constant value 72 | * `no_sharp`: disables depth sharpening 73 | * `change_k`: uses different intrinsics `K` 74 | * `change_motion`: samples a different motion (ignored if `num_motions` greater than 1) 75 | 76 | For instance, to simulate a different `K` settings, just run: 77 | 78 | ``` 79 | python depthstillation.py --change_k 80 | ``` 81 | 82 | The results are saved in `dCOCO` folder, organized as follows: 83 | * `depth_color:` colored depth map 84 | * `flow`: generated flow labels (in 16bit KITTI format) 85 | * `flow_color`: colored flow labels 86 | * `H`: H mask 87 | * `H'`: H' mask 88 | * `im0`: real input image 89 | * `im1`: generated virtual image 90 | * `im1_raw`: generated virtual image (pre-inpainting) 91 | * `instances_color`: colored instance map (if `--segment` is enabled) 92 | * `M`: M mask 93 | * `M'`: M' mask 94 | * `P`: P mask 95 | 96 | We report the list of files used to depthstill `dCOCO` in `samples/dCOCO_file_list.txt` 97 | 98 | ## Supplementary 99 | 100 | We report here the list of commands to obtain, in the same order, the Figures shown in Sections 2-4 of the Supplementary Material: 101 | * Section 2 -- the first figure is obtained with default parameters, then we use `--no_depth` and `--no_depth --segment` respectively 102 | * Section 3 -- the first figure is obtained with `--no_sharp`, the remaining figures with default parameters or by setting `--mask_type "H"`. 103 | * Section 4 -- we show three times the results obtained by default parameters, followed respectively by figures generated using `--change_k`, `--change_motion` and `--segment` individually. 104 | 105 | ## Weights 106 | 107 | We provide RAFT models trained in our experiments. To run them and reproduce our results, please refer to [RAFT repository](https://github.com/princeton-vl/RAFT): 108 | 109 | * Tab. 4 (C) [dCOCO](https://drive.google.com/file/d/1MIcP0GpAp6KIjL-kr_-nSiEB4axKEidv/view?usp=sharing) (D) [Ch->Th->dCOCO](https://drive.google.com/file/d/15SIwH5Gzo3BSoKgEaBgPuLImuUbfGHPz/view?usp=sharing) 110 | * Tab. 5 (C) [dCOCO (fine-tuned)](https://drive.google.com/file/d/1hpPy781c_fOOQUpQooewgdPuvJJuASwM/view?usp=sharing) (D) [Ch->Th->dCOCO (fine-tuned)](https://drive.google.com/file/d/1dheYMoRIPQLV-8QilXauyep4rclVvlFz/view?usp=sharing) 111 | * Tab. 7 (C) [dDAVIS](https://drive.google.com/file/d/1cYmVozm31ByuwYybC54dnySO4_fYO_rl/view?usp=sharing) 112 | * Tab. 8 (C) [dKITTI](https://drive.google.com/file/d/1c2OqT4YbkZrw7OftPfOOiPn4geREgKnK/view?usp=sharing) 113 | 114 | **Errata** - the crop size used to train on dCOCO is 472x368 (instead of 496x368) 115 | 116 | ## Contacts 117 | m [dot] poggi [at] unibo [dot] it 118 | 119 | ## Acknowledgments 120 | 121 | Thanks to Clément Godard and Niantic for sharing [monodepth2](https://github.com/nianticlabs/monodepth2) code, used to simulate camera motion. 122 | 123 | Our work is inspired by Jamie Watson et al., [Learning Stereo from Single Images](https://arxiv.org/abs/2008.01484). 124 | -------------------------------------------------------------------------------- /bilateral_filter.py: -------------------------------------------------------------------------------- 1 | # 2 | # Classes and functions in this script are taken from https://github.com/vt-vl-lab/3d-photo-inpainting/blob/master/bilateral_filtering.py 3 | # Licensed under MIT license 4 | # 5 | 6 | import numpy as np 7 | from functools import reduce 8 | import time 9 | 10 | __all__ = ["sparse_bilateral_filtering"] 11 | 12 | 13 | def sparse_bilateral_filtering( 14 | depth, 15 | image, 16 | filter_size, 17 | sigma_r=0.5, 18 | sigma_s=4.0, 19 | depth_threshold=0.04, 20 | HR=False, 21 | mask=None, 22 | gsHR=True, 23 | edge_id=None, 24 | num_iter=None, 25 | num_gs_iter=None, 26 | ): 27 | 28 | save_discontinuities = [] 29 | vis_depth = depth.copy() 30 | backup_vis_depth = vis_depth.copy() 31 | 32 | depth_max = vis_depth.max() 33 | depth_min = vis_depth.min() 34 | vis_image = image.copy() 35 | for i in range(num_iter): 36 | vis_image = image.copy() 37 | u_over, b_over, l_over, r_over = vis_depth_discontinuity( 38 | vis_depth, depth_threshold, mask=mask 39 | ) 40 | vis_image[u_over > 0] = np.array([0, 0, 0]) 41 | vis_image[b_over > 0] = np.array([0, 0, 0]) 42 | vis_image[l_over > 0] = np.array([0, 0, 0]) 43 | vis_image[r_over > 0] = np.array([0, 0, 0]) 44 | 45 | discontinuity_map = (u_over + b_over + l_over + r_over).clip(0.0, 1.0) 46 | discontinuity_map[depth == 0] = 1 47 | save_discontinuities.append(discontinuity_map) 48 | if mask is not None: 49 | discontinuity_map[mask == 0] = 0 50 | vis_depth = bilateral_filter( 51 | vis_depth, 52 | sigma_r=sigma_r, 53 | sigma_s=sigma_s, 54 | discontinuity_map=discontinuity_map, 55 | HR=HR, 56 | mask=mask, 57 | window_size=filter_size[i], 58 | ) 59 | 60 | return vis_depth 61 | 62 | 63 | def vis_depth_discontinuity( 64 | depth, depth_threshold, vis_diff=False, label=False, mask=None 65 | ): 66 | if label == False: 67 | disp = 1.0 / depth 68 | u_diff = (disp[1:, :] - disp[:-1, :])[:-1, 1:-1] 69 | b_diff = (disp[:-1, :] - disp[1:, :])[1:, 1:-1] 70 | l_diff = (disp[:, 1:] - disp[:, :-1])[1:-1, :-1] 71 | r_diff = (disp[:, :-1] - disp[:, 1:])[1:-1, 1:] 72 | if mask is not None: 73 | u_mask = (mask[1:, :] * mask[:-1, :])[:-1, 1:-1] 74 | b_mask = (mask[:-1, :] * mask[1:, :])[1:, 1:-1] 75 | l_mask = (mask[:, 1:] * mask[:, :-1])[1:-1, :-1] 76 | r_mask = (mask[:, :-1] * mask[:, 1:])[1:-1, 1:] 77 | u_diff = u_diff * u_mask 78 | b_diff = b_diff * b_mask 79 | l_diff = l_diff * l_mask 80 | r_diff = r_diff * r_mask 81 | u_over = (np.abs(u_diff) > depth_threshold).astype(np.float32) 82 | b_over = (np.abs(b_diff) > depth_threshold).astype(np.float32) 83 | l_over = (np.abs(l_diff) > depth_threshold).astype(np.float32) 84 | r_over = (np.abs(r_diff) > depth_threshold).astype(np.float32) 85 | else: 86 | disp = depth 87 | u_diff = (disp[1:, :] * disp[:-1, :])[:-1, 1:-1] 88 | b_diff = (disp[:-1, :] * disp[1:, :])[1:, 1:-1] 89 | l_diff = (disp[:, 1:] * disp[:, :-1])[1:-1, :-1] 90 | r_diff = (disp[:, :-1] * disp[:, 1:])[1:-1, 1:] 91 | if mask is not None: 92 | u_mask = (mask[1:, :] * mask[:-1, :])[:-1, 1:-1] 93 | b_mask = (mask[:-1, :] * mask[1:, :])[1:, 1:-1] 94 | l_mask = (mask[:, 1:] * mask[:, :-1])[1:-1, :-1] 95 | r_mask = (mask[:, :-1] * mask[:, 1:])[1:-1, 1:] 96 | u_diff = u_diff * u_mask 97 | b_diff = b_diff * b_mask 98 | l_diff = l_diff * l_mask 99 | r_diff = r_diff * r_mask 100 | u_over = (np.abs(u_diff) > 0).astype(np.float32) 101 | b_over = (np.abs(b_diff) > 0).astype(np.float32) 102 | l_over = (np.abs(l_diff) > 0).astype(np.float32) 103 | r_over = (np.abs(r_diff) > 0).astype(np.float32) 104 | u_over = np.pad(u_over, 1, mode="constant") 105 | b_over = np.pad(b_over, 1, mode="constant") 106 | l_over = np.pad(l_over, 1, mode="constant") 107 | r_over = np.pad(r_over, 1, mode="constant") 108 | u_diff = np.pad(u_diff, 1, mode="constant") 109 | b_diff = np.pad(b_diff, 1, mode="constant") 110 | l_diff = np.pad(l_diff, 1, mode="constant") 111 | r_diff = np.pad(r_diff, 1, mode="constant") 112 | 113 | if vis_diff: 114 | return [u_over, b_over, l_over, r_over], [u_diff, b_diff, l_diff, r_diff] 115 | else: 116 | return [u_over, b_over, l_over, r_over] 117 | 118 | 119 | def bilateral_filter( 120 | depth, 121 | sigma_s, 122 | sigma_r, 123 | window_size, 124 | discontinuity_map=None, 125 | HR=False, 126 | mask=None, 127 | ): 128 | sort_time = 0 129 | replace_time = 0 130 | filter_time = 0 131 | init_time = 0 132 | filtering_time = 0 133 | 134 | midpt = window_size // 2 135 | ax = np.arange(-midpt, midpt + 1.0) 136 | xx, yy = np.meshgrid(ax, ax) 137 | if discontinuity_map is not None: 138 | spatial_term = np.exp(-(xx ** 2 + yy ** 2) / (2.0 * sigma_s ** 2)) 139 | 140 | # padding 141 | depth = depth[1:-1, 1:-1] 142 | depth = np.pad(depth, ((1, 1), (1, 1)), "edge") 143 | pad_depth = np.pad(depth, (midpt, midpt), "edge") 144 | if discontinuity_map is not None: 145 | discontinuity_map = discontinuity_map[1:-1, 1:-1] 146 | discontinuity_map = np.pad(discontinuity_map, ((1, 1), (1, 1)), "edge") 147 | pad_discontinuity_map = np.pad(discontinuity_map, (midpt, midpt), "edge") 148 | pad_discontinuity_hole = 1 - pad_discontinuity_map 149 | # filtering 150 | output = depth.copy() 151 | pad_depth_patches = rolling_window(pad_depth, [window_size, window_size], [1, 1]) 152 | if discontinuity_map is not None: 153 | pad_discontinuity_patches = rolling_window( 154 | pad_discontinuity_map, [window_size, window_size], [1, 1] 155 | ) 156 | pad_discontinuity_hole_patches = rolling_window( 157 | pad_discontinuity_hole, [window_size, window_size], [1, 1] 158 | ) 159 | 160 | if mask is not None: 161 | pad_mask = np.pad(mask, (midpt, midpt), "constant") 162 | pad_mask_patches = rolling_window(pad_mask, [window_size, window_size], [1, 1]) 163 | from itertools import product 164 | 165 | if discontinuity_map is not None: 166 | pH, pW = pad_depth_patches.shape[:2] 167 | for pi in range(pH): 168 | for pj in range(pW): 169 | if mask is not None and mask[pi, pj] == 0: 170 | continue 171 | if discontinuity_map is not None: 172 | if bool(pad_discontinuity_patches[pi, pj].any()) is False: 173 | continue 174 | discontinuity_patch = pad_discontinuity_patches[pi, pj] 175 | discontinuity_holes = pad_discontinuity_hole_patches[pi, pj] 176 | depth_patch = pad_depth_patches[pi, pj] 177 | depth_order = depth_patch.ravel().argsort() 178 | patch_midpt = depth_patch[window_size // 2, window_size // 2] 179 | if discontinuity_map is not None: 180 | coef = discontinuity_holes.astype(np.float32) 181 | if mask is not None: 182 | coef = coef * pad_mask_patches[pi, pj] 183 | else: 184 | range_term = np.exp( 185 | -((depth_patch - patch_midpt) ** 2) / (2.0 * sigma_r ** 2) 186 | ) 187 | coef = spatial_term * range_term 188 | if coef.max() == 0: 189 | output[pi, pj] = patch_midpt 190 | continue 191 | if discontinuity_map is not None and (coef.max() == 0): 192 | output[pi, pj] = patch_midpt 193 | else: 194 | coef = coef / (coef.sum()) 195 | coef_order = coef.ravel()[depth_order] 196 | cum_coef = np.cumsum(coef_order) 197 | ind = np.digitize(0.5, cum_coef) 198 | output[pi, pj] = depth_patch.ravel()[depth_order][ind] 199 | else: 200 | pH, pW = pad_depth_patches.shape[:2] 201 | for pi in range(pH): 202 | for pj in range(pW): 203 | if discontinuity_map is not None: 204 | if ( 205 | pad_discontinuity_patches[pi, pj][ 206 | window_size // 2, window_size // 2 207 | ] 208 | == 1 209 | ): 210 | continue 211 | discontinuity_patch = pad_discontinuity_patches[pi, pj] 212 | discontinuity_holes = 1.0 - discontinuity_patch 213 | depth_patch = pad_depth_patches[pi, pj] 214 | depth_order = depth_patch.ravel().argsort() 215 | patch_midpt = depth_patch[window_size // 2, window_size // 2] 216 | range_term = np.exp( 217 | -((depth_patch - patch_midpt) ** 2) / (2.0 * sigma_r ** 2) 218 | ) 219 | if discontinuity_map is not None: 220 | coef = spatial_term * range_term * discontinuity_holes 221 | else: 222 | coef = spatial_term * range_term 223 | if coef.sum() == 0: 224 | output[pi, pj] = patch_midpt 225 | continue 226 | if discontinuity_map is not None and (coef.sum() == 0): 227 | output[pi, pj] = patch_midpt 228 | else: 229 | coef = coef / (coef.sum()) 230 | coef_order = coef.ravel()[depth_order] 231 | cum_coef = np.cumsum(coef_order) 232 | ind = np.digitize(0.5, cum_coef) 233 | output[pi, pj] = depth_patch.ravel()[depth_order][ind] 234 | 235 | return output 236 | 237 | 238 | def rolling_window(a, window, strides): 239 | assert ( 240 | len(a.shape) == len(window) == len(strides) 241 | ), "'a', 'window', 'strides' dimension mismatch" 242 | shape_fn = lambda i, w, s: (a.shape[i] - w) // s + 1 243 | shape = [shape_fn(i, w, s) for i, (w, s) in enumerate(zip(window, strides))] + list( 244 | window 245 | ) 246 | 247 | def acc_shape(i): 248 | if i + 1 >= len(a.shape): 249 | return 1 250 | else: 251 | return reduce(lambda x, y: x * y, a.shape[i + 1 :]) 252 | 253 | _strides = [acc_shape(i) * s * a.itemsize for i, s in enumerate(strides)] + list( 254 | a.strides 255 | ) 256 | 257 | return np.lib.stride_tricks.as_strided(a, shape=shape, strides=_strides) 258 | -------------------------------------------------------------------------------- /depthstillation.py: -------------------------------------------------------------------------------- 1 | # Ctypes package used to call the forward warping C library 2 | from numpy.ctypeslib import ndpointer 3 | from ctypes import * 4 | import ctypes 5 | 6 | # Some packages we use 7 | import matplotlib.pyplot as plt 8 | import collections 9 | import numpy as np 10 | import argparse 11 | import cv2 12 | import os 13 | import torch 14 | import math 15 | import random 16 | import tqdm 17 | 18 | # External scripts 19 | from bilateral_filter import sparse_bilateral_filtering 20 | from flow_colors import * 21 | from geometry import * 22 | 23 | # Import warping library 24 | lib = cdll.LoadLibrary("external/forward_warping/libwarping.so") 25 | warp = lib.forward_warping 26 | 27 | # Parse input arguments 28 | parser = argparse.ArgumentParser(description="Depthstillation options") 29 | parser.add_argument("--num_motions", dest="num_motions", type=int, help="Number of motions", default=1) 30 | parser.add_argument("--segment", dest="segment", action="store_true", help="Enable segmentation (for moving objects)") 31 | parser.add_argument("--mask_type", dest="mask_type", type=str, default="H'", help="Select mask type", choices=["H", "H'"]) 32 | parser.add_argument("--num_objects", dest="num_objects", type=int, help="Number of moving objects", default=1) 33 | parser.add_argument("--no_depth", dest="no_depth", action="store_true", help="Assumes constant depth") 34 | parser.add_argument("--no_sharp", dest="no_sharp", action="store_true", help="Disable depth sharpening") 35 | parser.add_argument("--change_k", dest="change_k", action="store_true", help="Use a different K matrix") 36 | parser.add_argument("--change_motion", dest="change_motion", action="store_true", help="Sample a different random motion") 37 | args = parser.parse_args() 38 | 39 | # if num_motions greater than 1, ignore change_motion setting 40 | if args.num_motions > 1: 41 | args.change_motion = False 42 | 43 | # Init progress bar 44 | pbar = tqdm.tqdm(total=args.num_motions) 45 | 46 | # Create directories to save outputs 47 | if not os.path.exists(os.path.join("dCOCO", "im0")): 48 | os.makedirs(os.path.join("dCOCO", "im0")) 49 | if not os.path.exists(os.path.join("dCOCO", "im1_raw")): 50 | os.makedirs(os.path.join("dCOCO", "im1_raw")) 51 | if not os.path.exists(os.path.join("dCOCO", "im1")): 52 | os.makedirs(os.path.join("dCOCO", "im1")) 53 | if not os.path.exists(os.path.join("dCOCO", "flow")): 54 | os.makedirs(os.path.join("dCOCO", "flow")) 55 | if not os.path.exists(os.path.join("dCOCO", "flow_color")): 56 | os.makedirs(os.path.join("dCOCO", "flow_color")) 57 | if not os.path.exists(os.path.join("dCOCO", "depth_color")): 58 | os.makedirs(os.path.join("dCOCO", "depth_color")) 59 | if not os.path.exists(os.path.join("dCOCO", "instances_color")): 60 | os.makedirs(os.path.join("dCOCO", "instances_color")) 61 | if not os.path.exists(os.path.join("dCOCO", "H")): 62 | os.makedirs(os.path.join("dCOCO", "H")) 63 | if not os.path.exists(os.path.join("dCOCO", "M")): 64 | os.makedirs(os.path.join("dCOCO", "M")) 65 | if not os.path.exists(os.path.join("dCOCO", "M'")): 66 | os.makedirs(os.path.join("dCOCO", "M'")) 67 | if not os.path.exists(os.path.join("dCOCO", "P")): 68 | os.makedirs(os.path.join("dCOCO", "P")) 69 | if not os.path.exists(os.path.join("dCOCO", "H'")): 70 | os.makedirs(os.path.join("dCOCO", "H'")) 71 | 72 | # Fix random seeds 73 | random.seed(1024) 74 | np.random.seed(1024) 75 | 76 | # Open I0 image 77 | rgb = cv2.imread("samples/im0.jpg", -1) 78 | if len(rgb.shape)<3: 79 | h, w = rgb.shape 80 | rgb = np.stack((rgb,rgb,rgb),-1) 81 | else: 82 | h, w, _ = rgb.shape 83 | 84 | # Open D0 (inverse) depth map and resize to I0 85 | depth = cv2.imread("samples/d0.png", -1) / (2**16-1) 86 | if depth.shape[0] != h or depth.shape[1] != w: 87 | depth = cv2.resize(depth, (w, h)) 88 | 89 | # Get depth map and normalize 90 | depth = 1.0 / (depth + 0.005) 91 | depth[depth > 100] = 100 92 | 93 | # Set depth to constant value in case we do not want to use depth 94 | if args.no_depth: 95 | depth = depth * 0. + 1. 96 | 97 | # Depth sharpening (bilateral filter) 98 | if not args.no_sharp: 99 | depth = sparse_bilateral_filtering( depth.copy(), rgb.copy(), filter_size=[5, 5], num_iter=2, ) 100 | 101 | # Load segmentation mask in case we simulate moving objects 102 | if args.segment: 103 | labels=[] 104 | instances_mask = cv2.imread("samples/s0.png", -1) 105 | 106 | # Resize instance mask to I0 107 | if instances_mask.shape[0] != h or instances_mask.shape[1] != w: 108 | instances_mask = cv2.resize(instances_mask, (w, h)) 109 | 110 | # Get total number of objects 111 | classes = instances_mask.max() 112 | 113 | # Get pixels count for each object 114 | areas = np.array([instances_mask[instances_mask==c].sum() for c in range(classes+1)], np.float32) 115 | 116 | # If we have any object 117 | if areas.shape[0] > 1: 118 | 119 | # Keep args.num_objects labels having the largest amount of pixels 120 | labels=areas.argsort()[-args.num_objects:][::-1] 121 | instances = [] 122 | 123 | # For each object kept 124 | for l in labels: 125 | 126 | # Create a segmentation mask for the single object 127 | seg_mask = np.zeros_like(instances_mask) 128 | 129 | # Set to 1 pixels having label l 130 | seg_mask[instances_mask==l] = 1 131 | seg_mask = np.expand_dims(seg_mask, 0) 132 | 133 | # Cast to pytorch tensor and append to masks list 134 | seg_mask = torch.from_numpy(np.stack((seg_mask, seg_mask), -1)).float() 135 | instances.append(seg_mask) 136 | 137 | # Cast I0 and D0 to pytorch tensors 138 | rgb = torch.from_numpy(np.expand_dims(rgb, 0)) 139 | depth = torch.from_numpy(np.expand_dims(depth, 0)).float() 140 | 141 | # Fix a plausible K matrix 142 | K = np.array([[[0.58, 0, 0.5, 0], [0, 0.58, 0.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]]], dtype=np.float32) 143 | 144 | # Fix a different K matrix in case 145 | if args.change_k: 146 | K = np.array([[[1.16, 0, 0.5, 0], [0, 1.16, 0.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]]], dtype=np.float32) 147 | K[:, 0, :] *= w 148 | K[:, 1, :] *= h 149 | inv_K = torch.from_numpy(np.linalg.pinv(K)) 150 | K = torch.from_numpy(K) 151 | 152 | # Create objects in charge of 3D projection 153 | backproject_depth = BackprojectDepth(1, h, w) 154 | project_3d = Project3D(1, h, w) 155 | 156 | # Prepare p0 coordinates 157 | meshgrid = np.meshgrid(range(w), range(h), indexing="xy") 158 | p0 = np.stack(meshgrid, axis=-1).astype(np.float32) 159 | 160 | # Loop over the number of motions 161 | for idm in range(args.num_motions): 162 | 163 | # Initiate masks dictionary 164 | masks = {} 165 | 166 | # Sample random motion (twice, if you want a new one) 167 | sample_motions = 2 if args.change_motion else 1 168 | for mot in range(sample_motions): 169 | 170 | # Generate random vector t 171 | # Random sign 172 | scx = ( (-1)**random.randrange(2) ) 173 | scy = ( (-1)**random.randrange(2) ) 174 | scz = ( (-1)**random.randrange(2) ) 175 | # Random scalars in -0.2,0.2, excluding -0.1,0.1 to avoid zeros / very small motions 176 | cx = (random.random()*0.1+0.1)* scx 177 | cy = (random.random()*0.1+0.1)* scy 178 | cz = (random.random()*0.1+0.1)* scz 179 | camera_mot = [cx, cy, cz] 180 | 181 | # generate random triplet of Euler angles 182 | # Random sign 183 | sax = ( (-1)**random.randrange(2) ) 184 | say = ( (-1)**random.randrange(2) ) 185 | saz = ( (-1)**random.randrange(2) ) 186 | # Random angles in -pi/18,pi/18, excluding -pi/36,pi/36 to avoid zeros / very small rotations 187 | ax = (random.random()*math.pi / 36.0 + math.pi / 36.0) * sax 188 | ay = (random.random()*math.pi / 36.0 + math.pi / 36.0) * say 189 | az = (random.random()*math.pi / 36.0 + math.pi / 36.0) * saz 190 | camera_ang = [ax, ay, az] 191 | 192 | axisangle = torch.from_numpy(np.array([[camera_ang]], dtype=np.float32)) 193 | translation = torch.from_numpy(np.array([[camera_mot]])) 194 | 195 | # Compute (R|t) 196 | T1 = transformation_from_parameters(axisangle, translation) 197 | 198 | # Back-projection 199 | cam_points = backproject_depth(depth, inv_K) 200 | 201 | # Apply transformation T_{0->1} 202 | p1, z1 = project_3d(cam_points, K, T1) 203 | z1 = z1.reshape(1, h, w) 204 | 205 | # Simulate objects moving independently 206 | if args.segment: 207 | 208 | # Loop over objects 209 | for l in range(len(labels)): 210 | 211 | sign=1 212 | # We multiply the sign by -1 to obtain a motion similar to the one shown in the supplementary (not exactly the same). Can be removed for general-purpose use 213 | if not args.no_depth: 214 | sign=-1 215 | 216 | # Random t (scalars and signs). Zeros and small motions are avoided as before 217 | cix = (random.random()*0.05+0.05)* ( sign*(-1)**random.randrange(2) ) 218 | ciy = (random.random()*0.05+0.05)* ( sign*(-1)**random.randrange(2) ) 219 | ciz = (random.random()*0.05+0.05)* ( sign*(-1)**random.randrange(2) ) 220 | camerai_mot = [cix, ciy, ciz] 221 | 222 | # Random Euler angles (scalars and signs). Zeros and small rotations are avoided as before 223 | aix = (random.random()*math.pi / 72.0 + math.pi / 72.0) * ( sign*(-1)**random.randrange(2) ) 224 | aiy = (random.random()*math.pi / 72.0 + math.pi / 72.0) * ( sign*(-1)**random.randrange(2) ) 225 | aiz = (random.random()*math.pi / 72.0 + math.pi / 72.0) * ( sign*(-1)**random.randrange(2) ) 226 | camerai_ang = [aix, aiy, aiz] 227 | 228 | ai = torch.from_numpy(np.array([[camerai_ang]], dtype=np.float32)) 229 | tri = torch.from_numpy(np.array([[camerai_mot]])) 230 | 231 | # Compute (R|t) 232 | Ti = transformation_from_parameters(axisangle + ai, translation + tri) 233 | 234 | # Apply transformation T_{0->\pi_i} 235 | pi, zi = project_3d(cam_points, K, Ti) 236 | 237 | # If a pixel belongs to object label l, replace coordinates in I1... 238 | p1[instances[l] > 0] = pi[instances[l] > 0] 239 | 240 | # ... and its depth 241 | zi = zi.reshape(1, h, w) 242 | z1[instances[l][:, :, :, 0] > 0] = zi[instances[l][:, :, :, 0] > 0] 243 | 244 | # Bring p1 coordinates in [0,W-1]x[0,H-1] format 245 | p1 = (p1 + 1) / 2 246 | p1[:, :, :, 0] *= w - 1 247 | p1[:, :, :, 1] *= h - 1 248 | 249 | # Create auxiliary data for warping 250 | dlut = torch.ones(1, h, w).float() * 1000 251 | safe_y = np.maximum(np.minimum(p1[:, :, :, 1].long(), h - 1), 0) 252 | safe_x = np.maximum(np.minimum(p1[:, :, :, 0].long(), w - 1), 0) 253 | warped_arr = np.zeros(h*w*5).astype(np.uint8) 254 | img = rgb.reshape(-1) 255 | 256 | # Call forward warping routine (C code) 257 | warp( c_void_p(img.numpy().ctypes.data), c_void_p(safe_x[0].numpy().ctypes.data), c_void_p(safe_y[0].numpy().ctypes.data), c_void_p(z1.reshape(-1).numpy().ctypes.data), c_void_p(warped_arr.ctypes.data), c_int(h), c_int(w)) 258 | warped_arr = warped_arr.reshape(1,h,w,5).astype(np.uint8) 259 | 260 | # Warped image 261 | im1_raw = warped_arr[0,:,:,0:3] 262 | 263 | # Validity mask H 264 | masks["H"] = warped_arr[0,:,:,3:4] 265 | 266 | # Collision mask M 267 | masks["M"] = warped_arr[0,:,:,4:5] 268 | # Keep all pixels that are invalid (H) or collide (M) 269 | masks["M"] = 1-(masks["M"]==masks["H"]).astype(np.uint8) 270 | 271 | # Dilated collision mask M' 272 | kernel = np.ones((3,3),np.uint8) 273 | masks["M'"] = cv2.dilate(masks["M"],kernel,iterations = 1) 274 | masks["P"] = (np.expand_dims(masks["M'"], -1) == masks["M"]).astype(np.uint8) 275 | 276 | # Final mask P 277 | masks["H'"] = masks["H"]*masks["P"] 278 | 279 | # Compute flow as p1-p0 280 | flow_01 = p1 - p0 281 | 282 | # Get 16-bit flow (KITTI format) and colored flows 283 | flow_16bit = cv2.cvtColor( np.concatenate((flow_01 * 64. + (2**15), np.ones_like(flow_01)[:,:,:,0:1]), -1)[0], cv2.COLOR_BGR2RGB ) 284 | flow_color = flow_to_color(flow_01[0].numpy(), convert_to_bgr=True) 285 | 286 | im1 = cv2.inpaint(im1_raw, 1 - masks[args.mask_type], 3, cv2.INPAINT_TELEA) 287 | 288 | # Save images 289 | cv2.imwrite(os.path.join("dCOCO","im0","95022.jpg"), rgb[0].numpy()) 290 | cv2.imwrite(os.path.join("dCOCO","im1_raw","95022_%02d.jpg"%(idm)), im1_raw) 291 | cv2.imwrite(os.path.join("dCOCO","im1","95022_%02d.jpg"%(idm)), im1) 292 | cv2.imwrite(os.path.join("dCOCO","flow","95022_%02d.png"%(idm)), flow_16bit.astype(np.uint16)) 293 | cv2.imwrite(os.path.join("dCOCO","H","95022_%02d.png"%(idm)), masks["H"]*255) 294 | cv2.imwrite(os.path.join("dCOCO","M","95022_%02d.png"%(idm)), masks["M"]*255) 295 | cv2.imwrite(os.path.join("dCOCO","M'","95022_%02d.png"%(idm)), masks["M'"]*255) 296 | cv2.imwrite(os.path.join("dCOCO","P","95022_%02d.png"%(idm)), masks["P"]*255) 297 | cv2.imwrite(os.path.join("dCOCO","H'","95022_%02d.png"%(idm)), masks["H'"]*255) 298 | cv2.imwrite(os.path.join("dCOCO","flow_color","95022_%02d.png"%(idm)), flow_color) 299 | plt.imsave(os.path.join("dCOCO","depth_color","95022_%02d.png"%(idm)), 1./depth[0].detach().numpy(), cmap="magma") 300 | if args.segment: 301 | plt.imsave(os.path.join("dCOCO","instances_color","95022_%02d.png"%(idm)), instances_mask, cmap="magma") 302 | 303 | # Clear cache and update progress bar 304 | ctypes._reset_cache() 305 | pbar.update(1) 306 | 307 | # Close progress bar, cya! 308 | pbar.close() 309 | -------------------------------------------------------------------------------- /external/forward_warping/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | gcc -fPIC -shared -o libwarping.so warping.c -------------------------------------------------------------------------------- /external/forward_warping/warping.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #define valid(X, Y, W) (Y*W*5+X*5+3) 4 | #define collision(X, Y, W) (Y*W*5+X*5+4) 5 | 6 | void forward_warping(const void *src, const void *idx, const void *idy, const void *z, void *warped, int h, int w) 7 | { 8 | float *dlut = (float *)calloc(h * w, sizeof(float)); 9 | for (int i = 0; i < h; i++) 10 | for (int j = 0; j < w; j++) 11 | dlut[i * w + j] = 1000; 12 | 13 | for (int i = 0; i < h; i++) 14 | for (int j = 0; j < w; j++) 15 | { 16 | int x = ((long *)idx)[i * w + j]; 17 | int y = ((long *)idy)[i * w + j]; 18 | 19 | if (((float *)z)[i * w + j] < dlut[y * w + x]) 20 | for (int c = 0; c < 3; c++) 21 | ((unsigned char *)warped)[y * w * 5 + x * 5 + c] = ((unsigned char *)src)[i * w * 3 + j * 3 + c]; 22 | 23 | ((unsigned char *)warped)[valid(x,y,w)] = 1; 24 | if (dlut[y * w + x] != 1000) 25 | ((unsigned char *)warped)[collision(x,y,w)] = 0; 26 | else 27 | ((unsigned char *)warped)[collision(x,y,w)] = 1; 28 | dlut[y * w + x] = ((float *)z)[i * w + j]; 29 | } 30 | 31 | free(dlut); 32 | return; 33 | } 34 | -------------------------------------------------------------------------------- /flow_colors.py: -------------------------------------------------------------------------------- 1 | # 2 | # Utility functions for coloring optical flow maps 3 | # 4 | 5 | import os 6 | import numpy as np 7 | import sys 8 | import re 9 | import cv2 10 | import torch 11 | import torch.nn.functional as F 12 | 13 | def make_colorwheel(): 14 | """ 15 | Generates a color wheel for optical flow visualization as presented in: 16 | Baker et al. "A Database and Evaluation Methodology for Optical Flow" (ICCV, 2007) 17 | URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf 18 | Code follows the original C++ source code of Daniel Scharstein. 19 | Code follows the the Matlab source code of Deqing Sun. 20 | Returns: 21 | np.ndarray: Color wheel 22 | """ 23 | 24 | RY = 15 25 | YG = 6 26 | GC = 4 27 | CB = 11 28 | BM = 13 29 | MR = 6 30 | 31 | ncols = RY + YG + GC + CB + BM + MR 32 | colorwheel = np.zeros((ncols, 3)) 33 | col = 0 34 | 35 | # RY 36 | colorwheel[0:RY, 0] = 255 37 | colorwheel[0:RY, 1] = np.floor(255 * np.arange(0, RY) / RY) 38 | col = col + RY 39 | # YG 40 | colorwheel[col : col + YG, 0] = 255 - np.floor(255 * np.arange(0, YG) / YG) 41 | colorwheel[col : col + YG, 1] = 255 42 | col = col + YG 43 | # GC 44 | colorwheel[col : col + GC, 1] = 255 45 | colorwheel[col : col + GC, 2] = np.floor(255 * np.arange(0, GC) / GC) 46 | col = col + GC 47 | # CB 48 | colorwheel[col : col + CB, 1] = 255 - np.floor(255 * np.arange(CB) / CB) 49 | colorwheel[col : col + CB, 2] = 255 50 | col = col + CB 51 | # BM 52 | colorwheel[col : col + BM, 2] = 255 53 | colorwheel[col : col + BM, 0] = np.floor(255 * np.arange(0, BM) / BM) 54 | col = col + BM 55 | # MR 56 | colorwheel[col : col + MR, 2] = 255 - np.floor(255 * np.arange(MR) / MR) 57 | colorwheel[col : col + MR, 0] = 255 58 | return colorwheel 59 | 60 | 61 | def flow_uv_to_colors(u, v, convert_to_bgr=False): 62 | """ 63 | Applies the flow color wheel to (possibly clipped) flow components u and v. 64 | According to the C++ source code of Daniel Scharstein 65 | According to the Matlab source code of Deqing Sun 66 | Args: 67 | u (np.ndarray): Input horizontal flow of shape [H,W] 68 | v (np.ndarray): Input vertical flow of shape [H,W] 69 | convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False. 70 | Returns: 71 | np.ndarray: Flow visualization image of shape [H,W,3] 72 | """ 73 | flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8) 74 | colorwheel = make_colorwheel() # shape [55x3] 75 | ncols = colorwheel.shape[0] 76 | rad = np.sqrt(np.square(u) + np.square(v)) 77 | a = np.arctan2(-v, -u) / np.pi 78 | fk = (a + 1) / 2 * (ncols - 1) 79 | k0 = np.floor(fk).astype(np.int32) 80 | k1 = k0 + 1 81 | k1[k1 == ncols] = 0 82 | f = fk - k0 83 | for i in range(colorwheel.shape[1]): 84 | tmp = colorwheel[:, i] 85 | col0 = tmp[k0] / 255.0 86 | col1 = tmp[k1] / 255.0 87 | col = (1 - f) * col0 + f * col1 88 | idx = rad <= 1 89 | col[idx] = 1 - rad[idx] * (1 - col[idx]) 90 | col[~idx] = col[~idx] * 0.75 # out of range 91 | # Note the 2-i => BGR instead of RGB 92 | ch_idx = 2 - i if convert_to_bgr else i 93 | flow_image[:, :, ch_idx] = np.floor(255 * col) 94 | return flow_image 95 | 96 | 97 | def flow_to_color(flow_uv, clip_flow=None, convert_to_bgr=False): 98 | """ 99 | Expects a two dimensional flow image of shape. 100 | Args: 101 | flow_uv (np.ndarray): Flow UV image of shape [H,W,2] 102 | clip_flow (float, optional): Clip maximum of flow values. Defaults to None. 103 | convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to False. 104 | Returns: 105 | np.ndarray: Flow visualization image of shape [H,W,3] 106 | """ 107 | assert flow_uv.ndim == 3, "input flow must have three dimensions" 108 | assert flow_uv.shape[2] == 2, "input flow must have shape [H,W,2]" 109 | if clip_flow is not None: 110 | flow_uv = np.clip(flow_uv, 0, clip_flow) 111 | u = flow_uv[:, :, 0] 112 | v = flow_uv[:, :, 1] 113 | rad = np.sqrt(np.square(u) + np.square(v)) 114 | rad_max = np.max(rad) 115 | epsilon = 1e-5 116 | u = u / (rad_max + epsilon) 117 | v = v / (rad_max + epsilon) 118 | return flow_uv_to_colors(u, v, convert_to_bgr) 119 | -------------------------------------------------------------------------------- /geometry.py: -------------------------------------------------------------------------------- 1 | # 2 | # Classes and functions in this script are taken from https://github.com/nianticlabs/monodepth2 3 | # Use conditions available in the LICENSE file at https://github.com/nianticlabs/monodepth2/blob/master/LICENSE 4 | # Copyright © Niantic, Inc. 2018. All rights reserved. 5 | 6 | from __future__ import absolute_import, division, print_function 7 | 8 | import numpy as np 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | 14 | __all__ = ["BackprojectDepth", "Project3D", "transformation_from_parameters"] 15 | 16 | 17 | class BackprojectDepth(nn.Module): 18 | """Layer to transform a depth image into a point cloud 19 | """ 20 | def __init__(self, batch_size, height, width): 21 | super(BackprojectDepth, self).__init__() 22 | 23 | self.batch_size = batch_size 24 | self.height = height 25 | self.width = width 26 | 27 | meshgrid = np.meshgrid(range(self.width), range(self.height), indexing='xy') 28 | self.id_coords = np.stack(meshgrid, axis=0).astype(np.float32) 29 | self.id_coords = nn.Parameter(torch.from_numpy(self.id_coords), 30 | requires_grad=False) 31 | 32 | self.ones = nn.Parameter(torch.ones(self.batch_size, 1, self.height * self.width), 33 | requires_grad=False) 34 | 35 | self.pix_coords = torch.unsqueeze(torch.stack( 36 | [self.id_coords[0].view(-1), self.id_coords[1].view(-1)], 0), 0) 37 | self.pix_coords = self.pix_coords.repeat(batch_size, 1, 1) 38 | self.pix_coords = nn.Parameter(torch.cat([self.pix_coords, self.ones], 1), 39 | requires_grad=False) 40 | 41 | def forward(self, depth, inv_K): 42 | cam_points = torch.matmul(inv_K[:, :3, :3], self.pix_coords) 43 | cam_points = depth.view(self.batch_size, 1, -1) * cam_points 44 | cam_points = torch.cat([cam_points, self.ones], 1) 45 | 46 | return cam_points 47 | 48 | 49 | class Project3D(nn.Module): 50 | """Layer which projects 3D points into a camera with intrinsics K and at position T 51 | """ 52 | def __init__(self, batch_size, height, width, eps=1e-7): 53 | super(Project3D, self).__init__() 54 | 55 | self.batch_size = batch_size 56 | self.height = height 57 | self.width = width 58 | self.eps = eps 59 | 60 | def forward(self, points, K, T): 61 | P = torch.matmul(K, T)[:, :3, :] 62 | 63 | cam_points = torch.matmul(P, points) 64 | 65 | pix_coords = cam_points[:, :2, :] / (cam_points[:, 2, :].unsqueeze(1) + self.eps) 66 | pix_coords = pix_coords.view(self.batch_size, 2, self.height, self.width) 67 | pix_coords = pix_coords.permute(0, 2, 3, 1) 68 | pix_coords[..., 0] /= self.width - 1 69 | pix_coords[..., 1] /= self.height - 1 70 | pix_coords = (pix_coords - 0.5) * 2 71 | return pix_coords, cam_points[:, 2, :].unsqueeze(1) 72 | 73 | 74 | def transformation_from_parameters(axisangle, translation, invert=False): 75 | """Convert the network's (axisangle, translation) output into a 4x4 matrix 76 | """ 77 | R = rot_from_axisangle(axisangle) 78 | t = translation.clone() 79 | 80 | if invert: 81 | R = R.transpose(1, 2) 82 | t *= -1 83 | 84 | T = get_translation_matrix(t) 85 | 86 | if invert: 87 | M = torch.matmul(R, T) 88 | else: 89 | M = torch.matmul(T, R) 90 | 91 | return M 92 | 93 | 94 | def get_translation_matrix(translation_vector): 95 | """Convert a translation vector into a 4x4 transformation matrix 96 | """ 97 | T = torch.zeros(translation_vector.shape[0], 4, 4).to(device=translation_vector.device) 98 | 99 | t = translation_vector.contiguous().view(-1, 3, 1) 100 | 101 | T[:, 0, 0] = 1 102 | T[:, 1, 1] = 1 103 | T[:, 2, 2] = 1 104 | T[:, 3, 3] = 1 105 | T[:, :3, 3, None] = t 106 | 107 | return T 108 | 109 | 110 | def rot_from_axisangle(vec): 111 | """Convert an axisangle rotation into a 4x4 transformation matrix 112 | (adapted from https://github.com/Wallacoloo/printipi) 113 | Input 'vec' has to be Bx1x3 114 | """ 115 | angle = torch.norm(vec, 2, 2, True) 116 | axis = vec / (angle + 1e-7) 117 | 118 | ca = torch.cos(angle) 119 | sa = torch.sin(angle) 120 | C = 1 - ca 121 | 122 | x = axis[..., 0].unsqueeze(1) 123 | y = axis[..., 1].unsqueeze(1) 124 | z = axis[..., 2].unsqueeze(1) 125 | 126 | xs = x * sa 127 | ys = y * sa 128 | zs = z * sa 129 | xC = x * C 130 | yC = y * C 131 | zC = z * C 132 | xyC = x * yC 133 | yzC = y * zC 134 | zxC = z * xC 135 | 136 | rot = torch.zeros((vec.shape[0], 4, 4)).to(device=vec.device) 137 | 138 | rot[:, 0, 0] = torch.squeeze(x * xC + ca) 139 | rot[:, 0, 1] = torch.squeeze(xyC - zs) 140 | rot[:, 0, 2] = torch.squeeze(zxC + ys) 141 | rot[:, 1, 0] = torch.squeeze(xyC + zs) 142 | rot[:, 1, 1] = torch.squeeze(y * yC + ca) 143 | rot[:, 1, 2] = torch.squeeze(yzC - xs) 144 | rot[:, 2, 0] = torch.squeeze(zxC - ys) 145 | rot[:, 2, 1] = torch.squeeze(yzC + xs) 146 | rot[:, 2, 2] = torch.squeeze(z * zC + ca) 147 | rot[:, 3, 3] = 1 148 | 149 | return rot 150 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Cython==0.29.21 2 | matplotlib==3.3.1 3 | numpy==1.18.5 4 | scipy==1.4.1 5 | torch==1.6.0 6 | tqdm==4.48.2 7 | opencv-python==4.4.0.42 -------------------------------------------------------------------------------- /samples/d0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattpoggi/depthstillation/b74ea4343d8d9f082c82e9f72d9294200aea8bb7/samples/d0.png -------------------------------------------------------------------------------- /samples/im0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattpoggi/depthstillation/b74ea4343d8d9f082c82e9f72d9294200aea8bb7/samples/im0.jpg -------------------------------------------------------------------------------- /samples/s0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattpoggi/depthstillation/b74ea4343d8d9f082c82e9f72d9294200aea8bb7/samples/s0.png --------------------------------------------------------------------------------